feat(ci): add github actions

This commit is contained in:
2025-11-17 12:30:44 +02:00
parent 22aab9ffc6
commit 3f53c1e03f
31 changed files with 895 additions and 653 deletions
+2 -1
View File
@@ -4,6 +4,7 @@
"@basango/domain": "workspace:*",
"@basango/encryption": "workspace:*",
"@basango/logger": "workspace:*",
"@devscast/config": "catalog:",
"@hono/node-server": "^1.19.6",
"@hono/trpc-server": "^0.4.0",
"@hono/zod-openapi": "^1.1.4",
@@ -27,7 +28,7 @@
"private": true,
"scripts": {
"dev": "bun run --hot src/index.ts",
"start": "bun run src/index.ts",
"start": "NODE_ENV=production bun run src/index.ts",
"typecheck": "tsc --noEmit"
},
"type": "module"
+4 -29
View File
@@ -105,6 +105,10 @@ bun run crawler:sync -- --sourceId radiookapi.net --dateRange 2024-01-01:2024-01
bun run crawler:sync -- --sourceId example.com --category politics
```
Crawled data will be saved in the `data/` directory as JSONL files.
and can be push to the database using the `bun run crawler:push -- --sourceId radiookapi.net`.
### Asynchronous Crawling
Best for large-scale operations and when you need job queuing:
@@ -148,32 +152,3 @@ bun run crawler:worker -- -q listing -q processing
| Option | Description | Example |
|--------|-------------|---------|
| `--queue`, `-q` | Specify queue(s) to process (can be used multiple times) | `--queue listing --queue details` |
## Project Structure
```
basango/apps/crawler/
├── src/
│ ├── config.ts # Configuration schema and loading
│ ├── constants.ts # Application constants
│ ├── schema.ts # Zod validation schemas
│ ├── utils.ts # Utility functions
│ ├── http/ # HTTP client and utilities
│ ├── process/ # Core crawling logic
│ │ ├── async/ # Async processing (queues, workers)
│ │ ├── sync/ # Synchronous processing
│ │ ├── parsers/ # Content parsers (HTML, WordPress)
│ │ ├── crawler.ts # Main crawler interface
│ │ └── persistence.ts # Data persistence layer
│ ├── scripts/ # CLI entry points
│ │ ├── sync.ts # Sync crawling script
│ │ ├── async.ts # Async job scheduling
│ │ ├── worker.ts # Worker process
│ │ └── utils.ts # CLI utilities
│ └── __tests__/ # Test files
├── config/
│ ├── sources.json # Source configurations
│ └── pipeline.json # Pipeline settings
├── data/ # Output directory for crawled data
└── package.json
```
+2 -2
View File
@@ -3,6 +3,7 @@
"@basango/domain": "workspace:*",
"@basango/encryption": "workspace:*",
"@basango/logger": "workspace:*",
"@devscast/config": "catalog:",
"bullmq": "^4.18.3",
"date-fns": "catalog:",
"ioredis": "^5.8.2",
@@ -11,8 +12,7 @@
"zod": "catalog:"
},
"devDependencies": {
"@types/turndown": "^5.0.6",
"vitest": "^4.0.7"
"@types/turndown": "^5.0.6"
},
"imports": {
"#crawler/*": "./src/*"
-17
View File
@@ -1,17 +0,0 @@
import path from "node:path";
import { defineConfig } from "vitest/config";
export default defineConfig({
resolve: {
alias: {
"@": path.resolve(__dirname, "src"),
},
},
test: {
environment: "node",
globals: true,
include: ["src/**/*.test.ts"],
setupFiles: ["./vitest.setup.ts"],
},
});
-1
View File
@@ -1 +0,0 @@
process.env.NODE_ENV = process.env.NODE_ENV ?? "test";
+1 -1
View File
@@ -46,6 +46,6 @@
"build": "next build",
"clean": "rm -rf .next node_modules",
"dev": "next dev",
"start": "next start"
"start": "NODE_ENV=production next start"
}
}
@@ -11,7 +11,7 @@ export const metadata: Metadata = {
export default async function Page() {
batchPrefetch([
trpc.articles.getPublicationGraph.queryOptions({}),
trpc.articles.getPublications.queryOptions({}),
trpc.articles.getSourceDistribution.queryOptions({ limit: 8 }),
]);
@@ -1,4 +1,3 @@
import { Source } from "@basango/domain/models/sources";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@basango/ui/components/tabs";
import { Metadata } from "next";
@@ -20,11 +19,11 @@ export default async function Page({ params }: { params: Promise<{ id: string }>
batchPrefetch([
trpc.sources.getById.queryOptions({ id }),
trpc.sources.getCategoryShares.queryOptions({ id, limit: 10 }),
trpc.sources.getPublicationGraph.queryOptions({ id }),
trpc.sources.getPublications.queryOptions({ id }),
trpc.articles.list.infiniteQueryOptions({ limit: 12, sourceId: id }),
]);
const source: Source = await queryClient.fetchQuery(trpc.sources.getById.queryOptions({ id }));
const source = await queryClient.fetchQuery(trpc.sources.getById.queryOptions({ id }));
return (
<HydrateClient>
@@ -21,35 +21,7 @@ import { z } from "zod";
import { useZodForm } from "#dashboard/hooks/use-zod-form";
import { useTRPC } from "#dashboard/trpc/client";
const baseSchema = updateSourceSchema.pick({
description: true,
displayName: true,
id: true,
name: true,
});
const sourceEditSchema = z.object({
description: z
.string()
.optional()
.transform((value) => {
const trimmed = value?.trim();
return trimmed ? trimmed : undefined;
})
.pipe(baseSchema.shape.description),
displayName: z
.string()
.optional()
.transform((value) => {
const trimmed = value?.trim();
return trimmed ? trimmed : undefined;
})
.pipe(baseSchema.shape.displayName),
id: baseSchema.shape.id,
name: z.string().trim().pipe(baseSchema.shape.name),
});
type SourceEditValues = z.infer<typeof sourceEditSchema>;
type SourceEditValues = z.infer<typeof updateSourceSchema>;
type Props = {
source: RouterOutputs["sources"]["getById"];
@@ -59,12 +31,13 @@ export function SourceEditForm({ source }: Props) {
const trpc = useTRPC();
const queryClient = useQueryClient();
const form = useZodForm(sourceEditSchema, {
const form = useZodForm(updateSourceSchema, {
defaultValues: {
description: source.description ?? "",
displayName: source.displayName ?? "",
id: source.id,
name: source.name,
url: source.url ?? "",
},
mode: "onChange",
});
@@ -1,6 +1,5 @@
"use client";
import { Source } from "@basango/domain/models";
import {
Card,
CardContent,
@@ -11,9 +10,14 @@ import {
import Link from "next/link";
import type { ReactNode } from "react";
import { RouterOutputs } from "#api/trpc/routers/_app";
import { SourceEditForm } from "#dashboard/components/forms/source-edit-form";
export function SourceDetailsTab({ source }: { source: Source }) {
type Props = {
source: RouterOutputs["sources"]["getById"];
};
export function SourceDetailsTab({ source }: Props) {
const credibility = source.credibility;
return (
+3 -4
View File
@@ -5,7 +5,6 @@ import { createI18nClient } from "next-international/client";
// NOTE: Also update middleware.ts to support locale
export const languages = ["en"];
export const { useScopedI18n, I18nProviderClient, useCurrentLocale, useChangeLocale, useI18n } =
createI18nClient({
en: () => import("./translations/en"),
});
export const { I18nProviderClient, useCurrentLocale } = createI18nClient({
en: () => import("./translations/en"),
});
+1 -1
View File
@@ -1,5 +1,5 @@
import { createI18nServer } from "next-international/server";
export const { getI18n, getScopedI18n, getStaticParams } = createI18nServer({
export const { getStaticParams } = createI18nServer({
en: () => import("./translations/en"),
});
+6 -5
View File
@@ -56,21 +56,22 @@ type AnyQueryOptions =
export function prefetch<T extends AnyQueryOptions>(queryOptions: T) {
const queryClient = getQueryClient();
if (queryOptions.queryKey[1]?.type === "infinite") {
const meta = queryOptions.queryKey[1];
if (!Array.isArray(meta) && meta?.type === "infinite") {
void queryClient.prefetchInfiniteQuery(queryOptions as any);
} else {
void queryClient.prefetchQuery(queryOptions);
void queryClient.prefetchQuery(queryOptions as any);
}
}
export function batchPrefetch<T extends AnyQueryOptions>(queryOptionsArray: T[]) {
const queryClient = getQueryClient();
for (const queryOptions of queryOptionsArray) {
if (queryOptions.queryKey[1]?.type === "infinite") {
const meta = queryOptions.queryKey[1];
if (!Array.isArray(meta) && meta?.type === "infinite") {
void queryClient.prefetchInfiniteQuery(queryOptions as any);
} else {
void queryClient.prefetchQuery(queryOptions);
void queryClient.prefetchQuery(queryOptions as any);
}
}
}