From f39635e04f91c4adc2280f06432b926e3d78600c Mon Sep 17 00:00:00 2001 From: bernard-ng Date: Mon, 17 Nov 2025 00:04:27 +0200 Subject: [PATCH] feat(domain): centralize data definition --- .vscode/settings.json | 2 +- AGENTS.md | 2 +- apps/api/package.json | 4 + apps/api/src/rest/{types.ts => init.ts} | 0 apps/api/src/rest/routers/articles.ts | 9 +- apps/api/src/schemas/articles.ts | 73 ---- apps/api/src/schemas/sources.ts | 106 ------ apps/api/src/trpc/routers/articles.ts | 28 +- apps/api/src/trpc/routers/sources.ts | 16 +- apps/api/src/utils/parse.ts | 4 - apps/api/src/utils/response.ts | 17 +- apps/api/tsconfig.json | 10 - apps/crawler/README.md | 4 +- apps/crawler/package.json | 10 +- apps/crawler/src/config.ts | 11 +- apps/crawler/src/constants.ts | 29 -- apps/crawler/src/http/http-client.ts | 9 +- apps/crawler/src/http/open-graph.ts | 6 +- apps/crawler/src/http/user-agent.ts | 4 +- apps/crawler/src/process/async/handlers.ts | 30 +- apps/crawler/src/process/async/schemas.ts | 8 +- apps/crawler/src/process/crawler.ts | 6 +- apps/crawler/src/process/parsers/base.ts | 8 +- apps/crawler/src/process/parsers/html.ts | 5 +- apps/crawler/src/process/parsers/wordpress.ts | 5 +- apps/crawler/src/process/persistence.ts | 56 ++- apps/crawler/src/schema.ts | 130 ------- .../src/scripts/{queue.ts => async.ts} | 2 + apps/crawler/src/scripts/crawl.ts | 23 -- apps/crawler/src/scripts/push.ts | 79 ++++ apps/crawler/src/scripts/sync.ts | 106 +----- apps/crawler/src/scripts/worker.ts | 2 + apps/crawler/src/utils.ts | 58 ++- apps/crawler/tsconfig.json | 9 +- apps/dashboard/next.config.ts | 2 +- apps/dashboard/package.json | 8 +- .../(app)/(sidebar)/articles/page.tsx | 19 +- .../(app)/(sidebar)/dashboard/page.tsx | 30 ++ .../(app)/(sidebar)/sources/[id]/page.tsx | 29 +- .../[locale]/(app)/(sidebar)/sources/page.tsx | 23 +- .../dashboard/src/components/article-card.tsx | 138 +++++++ .../src/components/articles-feed.tsx | 94 +++++ .../src/components/charts/area-chart.tsx | 50 +++ .../articles/publication-graph-chart.tsx | 81 +++++ .../articles/source-distribution-chart.tsx | 82 +++++ .../src/components/charts/bar-chart.tsx | 28 ++ .../src/components/charts/chart-filters.tsx | 259 +++++++++++++ .../charts/source-publication-graph-chart.tsx | 109 ------ .../category-shares-chart.tsx} | 38 +- .../sources/publication-graph-chart.tsx | 62 ++++ .../src/components/charts/status.tsx | 38 ++ .../src/components/forms/source-edit-form.tsx | 176 +++++++++ .../src/components/forms/source-form.tsx | 186 ++++++++++ .../components/modals/source-create-modal.tsx | 47 +++ .../src/components/shell/page-layout.tsx | 2 +- .../src/components/sidebar/app-sidebar.tsx | 14 +- apps/dashboard/src/components/source-card.tsx | 86 +++++ .../src/components/source-details-tab.tsx | 72 ++++ .../src/components/widgets/source-card.tsx | 83 ----- apps/dashboard/src/hooks/use-source-params.ts | 12 + apps/dashboard/src/trpc/server.tsx | 14 +- apps/dashboard/src/utils/utils.ts | 4 +- apps/dashboard/tsconfig.json | 13 +- biome.json | 3 +- bun.lock | 33 ++ docs/forms-handling.md | 180 ++++++++++ packages/db/package.json | 4 + packages/db/src/constants.ts | 31 -- packages/db/src/queries/articles.ts | 212 +++++++++-- packages/db/src/queries/sources.ts | 102 ++---- packages/db/src/schema.ts | 99 +---- packages/db/src/types/articles.ts | 27 ++ packages/db/src/types/shared.ts | 26 ++ packages/db/src/types/sources.ts | 21 ++ packages/db/src/utils/computed.ts | 17 +- packages/db/src/utils/filters.ts | 48 +++ packages/db/src/utils/index.ts | 2 +- packages/db/src/utils/pagination.ts | 132 ++++--- packages/db/src/utils/search-query.ts | 16 - packages/db/tsconfig.json | 6 - packages/domain/package.json | 24 ++ packages/domain/src/constants.ts | 25 ++ packages/domain/src/crawler/config.ts | 47 +++ packages/domain/src/crawler/index.ts | 2 + packages/domain/src/crawler/schemas.ts | 66 ++++ packages/domain/src/models/articles.ts | 175 +++++++++ packages/domain/src/models/index.ts | 4 + packages/domain/src/models/shared.ts | 339 ++++++++++++++++++ packages/domain/src/models/sources.ts | 63 ++++ packages/domain/src/models/users.ts | 2 + packages/domain/tsconfig.json | 5 + packages/tsconfig/base.json | 7 + packages/ui/package.json | 3 + packages/ui/src/components/calendar.tsx | 179 +++++++++ packages/ui/src/components/popover.tsx | 41 +++ packages/ui/src/components/submit-button.tsx | 30 ++ 96 files changed, 3474 insertions(+), 1167 deletions(-) rename apps/api/src/rest/{types.ts => init.ts} (100%) delete mode 100644 apps/api/src/schemas/articles.ts delete mode 100644 apps/api/src/schemas/sources.ts delete mode 100644 apps/api/src/utils/parse.ts delete mode 100644 apps/crawler/src/constants.ts delete mode 100644 apps/crawler/src/schema.ts rename apps/crawler/src/scripts/{queue.ts => async.ts} (96%) delete mode 100644 apps/crawler/src/scripts/crawl.ts create mode 100644 apps/crawler/src/scripts/push.ts create mode 100644 apps/dashboard/src/app/[locale]/(app)/(sidebar)/dashboard/page.tsx create mode 100644 apps/dashboard/src/components/article-card.tsx create mode 100644 apps/dashboard/src/components/articles-feed.tsx create mode 100644 apps/dashboard/src/components/charts/area-chart.tsx create mode 100644 apps/dashboard/src/components/charts/articles/publication-graph-chart.tsx create mode 100644 apps/dashboard/src/components/charts/articles/source-distribution-chart.tsx create mode 100644 apps/dashboard/src/components/charts/bar-chart.tsx create mode 100644 apps/dashboard/src/components/charts/chart-filters.tsx delete mode 100644 apps/dashboard/src/components/charts/source-publication-graph-chart.tsx rename apps/dashboard/src/components/charts/{source-category-shares-chart.tsx => sources/category-shares-chart.tsx} (63%) create mode 100644 apps/dashboard/src/components/charts/sources/publication-graph-chart.tsx create mode 100644 apps/dashboard/src/components/charts/status.tsx create mode 100644 apps/dashboard/src/components/forms/source-edit-form.tsx create mode 100644 apps/dashboard/src/components/forms/source-form.tsx create mode 100644 apps/dashboard/src/components/modals/source-create-modal.tsx create mode 100644 apps/dashboard/src/components/source-card.tsx create mode 100644 apps/dashboard/src/components/source-details-tab.tsx delete mode 100644 apps/dashboard/src/components/widgets/source-card.tsx create mode 100644 apps/dashboard/src/hooks/use-source-params.ts create mode 100644 docs/forms-handling.md delete mode 100644 packages/db/src/constants.ts create mode 100644 packages/db/src/types/articles.ts create mode 100644 packages/db/src/types/shared.ts create mode 100644 packages/db/src/types/sources.ts create mode 100644 packages/db/src/utils/filters.ts delete mode 100644 packages/db/src/utils/search-query.ts create mode 100644 packages/domain/package.json create mode 100644 packages/domain/src/constants.ts create mode 100644 packages/domain/src/crawler/config.ts create mode 100644 packages/domain/src/crawler/index.ts create mode 100644 packages/domain/src/crawler/schemas.ts create mode 100644 packages/domain/src/models/articles.ts create mode 100644 packages/domain/src/models/index.ts create mode 100644 packages/domain/src/models/shared.ts create mode 100644 packages/domain/src/models/sources.ts create mode 100644 packages/domain/src/models/users.ts create mode 100644 packages/domain/tsconfig.json create mode 100644 packages/ui/src/components/calendar.tsx create mode 100644 packages/ui/src/components/popover.tsx create mode 100644 packages/ui/src/components/submit-button.tsx diff --git a/.vscode/settings.json b/.vscode/settings.json index 58f6ef6..c47933b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,7 +3,7 @@ "editor.defaultFormatter": "biomejs.biome" }, "[typescript]": { - "editor.defaultFormatter": "biomejs.biome" + "editor.defaultFormatter": "esbenp.prettier-vscode" }, "[typescriptreact]": { "editor.defaultFormatter": "biomejs.biome" diff --git a/AGENTS.md b/AGENTS.md index 90e68fd..7dca36a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -66,5 +66,5 @@ Gotchas Contact Points - Architecture overview: `docs/architecture.md`. -- Architecture map: `docs/architectures/README.md`. +- Forms handling patterns: `docs/forms-handling.md`. diff --git a/apps/api/package.json b/apps/api/package.json index 4ad01b8..0090e51 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -1,6 +1,7 @@ { "dependencies": { "@basango/db": "workspace:*", + "@basango/domain": "workspace:*", "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", "@hono/node-server": "^1.19.6", @@ -19,6 +20,9 @@ "exports": { "./trpc/routers/_app": "./src/trpc/routers/_app.ts" }, + "imports": { + "#api/*": "./src/*" + }, "name": "@basango/api", "private": true, "scripts": { diff --git a/apps/api/src/rest/types.ts b/apps/api/src/rest/init.ts similarity index 100% rename from apps/api/src/rest/types.ts rename to apps/api/src/rest/init.ts diff --git a/apps/api/src/rest/routers/articles.ts b/apps/api/src/rest/routers/articles.ts index 33b817d..8424fef 100644 --- a/apps/api/src/rest/routers/articles.ts +++ b/apps/api/src/rest/routers/articles.ts @@ -1,10 +1,10 @@ import { createArticle } from "@basango/db/queries"; +import { createArticleResponseSchema, createArticleSchema } from "@basango/domain/models"; import { OpenAPIHono, createRoute } from "@hono/zod-openapi"; +import type { Context } from "#api/rest/init"; import { withCrawlerAuth } from "#api/rest/middlewares/crawler"; import { withDatabase } from "#api/rest/middlewares/db"; -import type { Context } from "#api/rest/types"; -import { createArticleResponseSchema, createArticleSchema } from "#api/schemas/articles"; import { validateResponse } from "#api/utils/response"; const app = new OpenAPIHono(); @@ -44,10 +44,7 @@ app.openapi( const input = c.req.valid("json"); const result = await createArticle(db, input); - return c.json( - validateResponse(result, createArticleResponseSchema) as { id: string; sourceId: string }, - 201, - ); + return c.json(validateResponse(result, createArticleResponseSchema), 201); }, ); diff --git a/apps/api/src/schemas/articles.ts b/apps/api/src/schemas/articles.ts deleted file mode 100644 index 550d92a..0000000 --- a/apps/api/src/schemas/articles.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { z } from "@hono/zod-openapi"; - -const metadataSchema = z.object({ - description: z.string().optional().openapi({ - description: "A brief description or summary of the article.", - example: "This article discusses the latest advancements in AI technology.", - }), - image: z.url().optional().openapi({ - description: "The URL of the main image associated with the article.", - example: "https://example.com/image.jpg", - }), - title: z.string().optional().openapi({ - description: "The title of the article for metadata purposes.", - example: "The Rise of AI", - }), -}); - -export const createArticleSchema = z - .object({ - body: z.string().min(1).openapi({ - description: "The main content of the article.", - example: "This is the body of the article...", - }), - categories: z - .array(z.string()) - .openapi({ - description: "The categories or tags associated with the article.", - example: ["Technology", "AI"], - }) - .optional() - .default([]), - hash: z.string().min(1).openapi({ - description: "The unique hash of the article link.", - example: "d41d8cd98f00b204e9800998ecf8427e", - }), - link: z.url().openapi({ - description: "The URL of the article.", - example: "https://example.com/article", - }), - metadata: metadataSchema.optional(), - publishedAt: z - .string() - .refine((value) => !Number.isNaN(Date.parse(value)), { - message: "Invalid date format", - }) - .transform((value) => new Date(value)) - .openapi({ - description: "The publication date of the article in ISO 8601 format.", - example: "2023-01-01T00:00:00Z", - }), - sourceId: z.string().openapi({ - description: "The unique identifier of the source from which the article was crawled.", - example: "radiookapi.net", - }), - title: z.string().min(1).openapi({ - description: "The title of the article.", - example: "The Rise of AI", - }), - }) - .openapi("CreateArticle"); - -export const createArticleResponseSchema = z - .object({ - id: z.uuid().openapi({ - description: "The unique identifier of the article.", - example: "b3b7c8e2-1f2a-4c3d-9e4f-5a6b7c8d9e0f", - }), - sourceId: z.uuid().openapi({ - description: "The unique identifier of the source associated with the article.", - example: "a1a2b3c4-d5e6-7f8g-9h0i-j1k2l3m4n5o6", - }), - }) - .openapi("CreateArticleResponse"); diff --git a/apps/api/src/schemas/sources.ts b/apps/api/src/schemas/sources.ts deleted file mode 100644 index 56d8de6..0000000 --- a/apps/api/src/schemas/sources.ts +++ /dev/null @@ -1,106 +0,0 @@ -import { z } from "zod"; - -const idSchema = z.uuid().openapi({ - description: "The unique identifier of the source.", - example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g", -}); - -const biasSchema = z.enum(["neutral", "slightly", "partisan", "extreme"]).openapi({ - description: "The bias level of the source.", - example: "neutral", -}); -const reliabilitySchema = z - .enum(["trusted", "reliable", "average", "low_trust", "unreliable"]) - .openapi({ - description: "The reliability level of the source.", - example: "trusted", - }); - -const transparencySchema = z.enum(["high", "medium", "low"]).openapi({ - description: "The transparency level of the source.", - example: "high", -}); - -const credibilitySchema = z - .object({ - bias: biasSchema.default("neutral"), - reliability: reliabilitySchema.default("average"), - transparency: transparencySchema.default("medium"), - }) - .openapi({ - description: "Credibility information about the source.", - }); - -export const createSourceSchema = z.object({ - credibility: credibilitySchema.optional(), - description: z.string().max(1024).optional().openapi({ - description: "A brief description of the source.", - example: "Radio Okapi is a Congolese radio station that provides news and information.", - }), - displayName: z.string().min(1).max(255).optional().openapi({ - description: "The display name of the source.", - example: "Radio Okapi", - }), - name: z.string().min(1).max(255).openapi({ - description: "The name of the source.", - example: "radiookapi.com", - }), - url: z.url().openapi({ - description: "The URL of the source.", - example: "https://techcrunch.com", - }), -}); - -export const getSourceSchema = z.object({ - id: idSchema, -}); - -export const getSourcePublicationGraphSchema = z.object({ - days: z - .number() - .optional() - .openapi({ - default: 60, - description: "", - example: 60, - }) - .openapi({ - description: "The number of days to include in the publication graph.", - }), - id: idSchema, - range: z - .object({ - from: z.date().openapi({ - description: "The start date of the range.", - }), - to: z.date().openapi({ - description: "The end date of the range.", - }), - }) - .optional() - .openapi({ - description: "The date range for the publication graph.", - }), -}); - -export const getSourceCategorySharesSchema = z.object({ - id: idSchema, - limit: z.number().int().min(1).max(100).optional().openapi({ - default: 10, - description: "The maximum number of categories to return.", - example: 10, - }), -}); - -export const updateSourceSchema = z.object({ - credibility: credibilitySchema.optional(), - description: createSourceSchema.shape.description, - displayName: createSourceSchema.shape.displayName, - id: idSchema, - name: createSourceSchema.shape.name.optional(), -}); - -export const createSourceResponseSchema = z.object({ - id: idSchema, - ...createSourceSchema.shape, -}); diff --git a/apps/api/src/trpc/routers/articles.ts b/apps/api/src/trpc/routers/articles.ts index 05e0e00..f2cb4ef 100644 --- a/apps/api/src/trpc/routers/articles.ts +++ b/apps/api/src/trpc/routers/articles.ts @@ -1,10 +1,34 @@ -import { createArticle } from "@basango/db/queries"; +import { + createArticle, + getArticles, + getArticlesPublicationGraph, + getArticlesSourceDistribution, +} from "@basango/db/queries"; +import { + createArticleSchema, + getArticlesSchema, + getDistributionsSchema, + getPublicationsSchema, +} from "@basango/domain/models"; -import { createArticleSchema } from "#api/schemas/articles"; import { createTRPCRouter, protectedProcedure } from "#api/trpc/init"; export const articlesRouter = createTRPCRouter({ create: protectedProcedure.input(createArticleSchema).mutation(async ({ ctx, input }) => { return createArticle(ctx.db, input); }), + + getPublications: protectedProcedure.input(getPublicationsSchema).query(async ({ ctx, input }) => { + return getArticlesPublicationGraph(ctx.db, input); + }), + + getSourceDistribution: protectedProcedure + .input(getDistributionsSchema) + .query(async ({ ctx, input }) => { + return getArticlesSourceDistribution(ctx.db, input); + }), + + list: protectedProcedure.input(getArticlesSchema).query(async ({ ctx, input }) => { + return getArticles(ctx.db, input); + }), }); diff --git a/apps/api/src/trpc/routers/sources.ts b/apps/api/src/trpc/routers/sources.ts index 654e937..73295c2 100644 --- a/apps/api/src/trpc/routers/sources.ts +++ b/apps/api/src/trpc/routers/sources.ts @@ -6,14 +6,14 @@ import { getSources, updateSource, } from "@basango/db/queries"; - import { createSourceSchema, - getSourceCategorySharesSchema, - getSourcePublicationGraphSchema, + getCategorySharesSchema, + getPublicationsSchema, getSourceSchema, updateSourceSchema, -} from "#api/schemas/sources"; +} from "@basango/domain/models"; + import { createTRPCRouter, protectedProcedure } from "#api/trpc/init"; export const sourcesRouter = createTRPCRouter({ @@ -21,24 +21,24 @@ export const sourcesRouter = createTRPCRouter({ return createSource(ctx.db, input); }), - get: protectedProcedure.query(async ({ ctx }) => getSources(ctx.db)), - getById: protectedProcedure.input(getSourceSchema).query(async ({ ctx, input }) => { return getSourceById(ctx.db, input.id); }), getCategoryShares: protectedProcedure - .input(getSourceCategorySharesSchema) + .input(getCategorySharesSchema) .query(async ({ ctx, input }) => { return getSourceCategoryShares(ctx.db, input); }), getPublicationGraph: protectedProcedure - .input(getSourcePublicationGraphSchema) + .input(getPublicationsSchema) .query(async ({ ctx, input }) => { return getSourcePublicationGraph(ctx.db, input); }), + list: protectedProcedure.query(async ({ ctx }) => getSources(ctx.db)), + update: protectedProcedure.input(updateSourceSchema).mutation(async ({ ctx, input }) => { return updateSource(ctx.db, input); }), diff --git a/apps/api/src/utils/parse.ts b/apps/api/src/utils/parse.ts deleted file mode 100644 index c8750ee..0000000 --- a/apps/api/src/utils/parse.ts +++ /dev/null @@ -1,4 +0,0 @@ -export function parseInputValue(value?: string | null) { - if (value === null) return null; - return value ? JSON.parse(value) : undefined; -} diff --git a/apps/api/src/utils/response.ts b/apps/api/src/utils/response.ts index 9af2bd3..fa2e95a 100644 --- a/apps/api/src/utils/response.ts +++ b/apps/api/src/utils/response.ts @@ -1,20 +1,19 @@ import { logger } from "@basango/logger"; import { z } from "zod"; -export function validateResponse(data: unknown, schema: z.ZodSchema) { +type ValidationSuccess = z.infer; + +export function validateResponse( + data: unknown, + schema: T, +): ValidationSuccess { const result = schema.safeParse(data); if (!result.success) { const cause = z.treeifyError(result.error); + logger.error({ cause }, "Response validation failed"); - logger.error(cause); - - return { - data: null, - details: cause, - error: "Response validation failed", - success: false, - }; + throw new Error("Response validation failed"); } return result.data; diff --git a/apps/api/tsconfig.json b/apps/api/tsconfig.json index 58bae02..3f49a41 100644 --- a/apps/api/tsconfig.json +++ b/apps/api/tsconfig.json @@ -1,14 +1,4 @@ { - "compilerOptions": { - "baseUrl": ".", - "composite": true, - "incremental": true, - "paths": { - "@basango/db": ["../../packages/db/src/*"], - "#api/*": ["./src/*"], - "#db/*": ["../../packages/db/src/*"] - } - }, "extends": "@basango/tsconfig/base.json", "include": ["src"] } diff --git a/apps/crawler/README.md b/apps/crawler/README.md index ffb0bfd..bed9154 100644 --- a/apps/crawler/README.md +++ b/apps/crawler/README.md @@ -166,8 +166,8 @@ basango/apps/crawler/ │ │ ├── crawler.ts # Main crawler interface │ │ └── persistence.ts # Data persistence layer │ ├── scripts/ # CLI entry points -│ │ ├── crawl.ts # Sync crawling script -│ │ ├── queue.ts # Async job scheduling +│ │ ├── sync.ts # Sync crawling script +│ │ ├── async.ts # Async job scheduling │ │ ├── worker.ts # Worker process │ │ └── utils.ts # CLI utilities │ └── __tests__/ # Test files diff --git a/apps/crawler/package.json b/apps/crawler/package.json index 6f56b14..867763d 100644 --- a/apps/crawler/package.json +++ b/apps/crawler/package.json @@ -1,5 +1,6 @@ { "dependencies": { + "@basango/domain": "workspace:*", "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", "bullmq": "^4.18.3", @@ -13,13 +14,16 @@ "@types/turndown": "^5.0.6", "vitest": "^4.0.7" }, + "imports": { + "#crawler/*": "./src/*" + }, "name": "@basango/crawler", "private": true, "scripts": { "clean": "rm -rf .turbo node_modules", - "crawler:async": "bun run src/scripts/queue.ts", - "crawler:push": "bun run src/scripts/sync.ts", - "crawler:sync": "bun run src/scripts/crawl.ts", + "crawler:async": "bun run src/scripts/async.ts", + "crawler:push": "bun run src/scripts/push.ts", + "crawler:sync": "bun run src/scripts/sync.ts", "crawler:worker": "bun run src/scripts/worker.ts", "dev": "bun run src/scripts/worker.ts", "test": "vitest --run", diff --git a/apps/crawler/src/config.ts b/apps/crawler/src/config.ts index a94969b..cf80ca7 100644 --- a/apps/crawler/src/config.ts +++ b/apps/crawler/src/config.ts @@ -1,15 +1,14 @@ import path from "node:path"; -import { loadConfig as defineConfig } from "@devscast/config"; -import { z } from "zod"; - import { - DateRangeSchema, HtmlSourceConfigSchema, PageRangeSchema, + TimestampRangeSchema, UpdateDirectionSchema, WordPressSourceConfigSchema, -} from "#crawler/schema"; +} from "@basango/domain/crawler"; +import { loadConfig as defineConfig } from "@devscast/config"; +import { z } from "zod"; export const PROJECT_DIR = path.resolve(__dirname, "../"); @@ -43,7 +42,7 @@ export const PipelineConfigSchema = z.object({ }), crawler: z.object({ category: z.string().optional(), - dateRange: DateRangeSchema.optional(), + dateRange: TimestampRangeSchema.optional(), direction: UpdateDirectionSchema.default("forward"), isUpdate: z.boolean().default(false), maxWorkers: z.number().int().positive().default(5), diff --git a/apps/crawler/src/constants.ts b/apps/crawler/src/constants.ts deleted file mode 100644 index f53f54c..0000000 --- a/apps/crawler/src/constants.ts +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Default date format used for parsing and formatting dates. - * Follows the "yyyy-LL-dd" pattern (e.g., "2024-06-15"). - */ -export const DEFAULT_DATE_FORMAT = "yyyy-LL-dd"; - -/** - * Default User-Agent string for HTTP requests made by the crawler. - * Some websites may block requests with missing or generic User-Agent headers. - */ -export const DEFAULT_USER_AGENT = "Basango/0.1 (+https://github.com/bernard-ng/basango)"; - -/** - * User-Agent string used for Open Graph requests. - * Some services require a specific User-Agent to return Open Graph data. - */ -export const OPEN_GRAPH_USER_AGENT = "facebookexternalhit/1.1"; - -/** - * HTTP status codes considered transient errors. - * Used for retry logic in HTTP clients. - */ -export const TRANSIENT_HTTP_STATUSES = [429, 500, 502, 503, 504]; - -/** - * Default header name for Retry-After responses. - * Used when handling rate limiting. - */ -export const DEFAULT_RETRY_AFTER_HEADER = "retry-after"; diff --git a/apps/crawler/src/http/http-client.ts b/apps/crawler/src/http/http-client.ts index d97c37d..b90ebf5 100644 --- a/apps/crawler/src/http/http-client.ts +++ b/apps/crawler/src/http/http-client.ts @@ -1,11 +1,12 @@ import { setTimeout as delay } from "node:timers/promises"; -import { FetchClientConfig } from "#crawler/config"; import { DEFAULT_RETRY_AFTER_HEADER, + DEFAULT_TRANSIENT_HTTP_STATUSES, DEFAULT_USER_AGENT, - TRANSIENT_HTTP_STATUSES, -} from "#crawler/constants"; +} from "@basango/domain/constants"; + +import { FetchClientConfig } from "#crawler/config"; import { UserAgents } from "#crawler/http/user-agent"; export type HttpHeaders = Record; @@ -187,7 +188,7 @@ export class SyncHttpClient extends BaseHttpClient { const response = await this.fetchImpl(target, init); if ( - TRANSIENT_HTTP_STATUSES.includes(response.status as number) && + DEFAULT_TRANSIENT_HTTP_STATUSES.includes(response.status as number) && attempt < this.config.maxRetries ) { await this.maybeDelay(attempt, response, retryAfterHeader); diff --git a/apps/crawler/src/http/open-graph.ts b/apps/crawler/src/http/open-graph.ts index 008da50..c1c5a8f 100644 --- a/apps/crawler/src/http/open-graph.ts +++ b/apps/crawler/src/http/open-graph.ts @@ -1,10 +1,10 @@ +import { DEFAULT_OPEN_GRAPH_USER_AGENT } from "@basango/domain/constants"; +import { ArticleMetadata } from "@basango/domain/models"; import { parse } from "node-html-parser"; import { config } from "#crawler/config"; -import { OPEN_GRAPH_USER_AGENT } from "#crawler/constants"; import { SyncHttpClient } from "#crawler/http/http-client"; import { UserAgents } from "#crawler/http/user-agent"; -import { ArticleMetadata } from "#crawler/schema"; import { createAbsoluteUrl } from "#crawler/utils"; /** @@ -45,7 +45,7 @@ export class OpenGraph { constructor() { const settings = config.fetch.client; - const provider = new UserAgents(true, OPEN_GRAPH_USER_AGENT); + const provider = new UserAgents(true, DEFAULT_OPEN_GRAPH_USER_AGENT); this.client = new SyncHttpClient(settings, { defaultHeaders: { "User-Agent": provider.og() }, diff --git a/apps/crawler/src/http/user-agent.ts b/apps/crawler/src/http/user-agent.ts index 6b66037..3027e0c 100644 --- a/apps/crawler/src/http/user-agent.ts +++ b/apps/crawler/src/http/user-agent.ts @@ -1,4 +1,4 @@ -import { DEFAULT_USER_AGENT, OPEN_GRAPH_USER_AGENT } from "#crawler/constants"; +import { DEFAULT_OPEN_GRAPH_USER_AGENT, DEFAULT_USER_AGENT } from "@basango/domain/constants"; /** * User agent provider with optional rotation. @@ -30,7 +30,7 @@ export class UserAgents { } og(): string { - return OPEN_GRAPH_USER_AGENT; + return DEFAULT_OPEN_GRAPH_USER_AGENT; } get(): string { diff --git a/apps/crawler/src/process/async/handlers.ts b/apps/crawler/src/process/async/handlers.ts index a9ed9ad..b92c034 100644 --- a/apps/crawler/src/process/async/handlers.ts +++ b/apps/crawler/src/process/async/handlers.ts @@ -1,8 +1,8 @@ +import type { HtmlSourceConfig, WordPressSourceConfig } from "@basango/domain/crawler"; +import { Article } from "@basango/domain/models"; import { logger } from "@basango/logger"; -import { config, env } from "#crawler/config"; import { UnsupportedSourceKindError } from "#crawler/errors"; -import { SyncHttpClient } from "#crawler/http/http-client"; import { QueueManager, createQueueManager } from "#crawler/process/async/queue"; import { DetailsTaskPayload, @@ -12,11 +12,11 @@ import { import { createPersistors, resolveCrawlerConfig } from "#crawler/process/crawler"; import { HtmlCrawler } from "#crawler/process/parsers/html"; import { WordPressCrawler } from "#crawler/process/parsers/wordpress"; -import { Article, HtmlSourceConfig, WordPressSourceConfig } from "#crawler/schema"; +import { forward } from "#crawler/process/persistence"; import { - createDateRange, - formatDateRange, + createTimestampRange, formatPageRange, + formatTimestampRange, resolveSourceConfig, } from "#crawler/utils"; @@ -45,7 +45,7 @@ export const collectHtmlListing = async ( await manager.enqueueArticle({ category: payload.category, - dateRange: createDateRange(payload.dateRange), + dateRange: createTimestampRange(payload.dateRange), sourceId: payload.sourceId, url, } as DetailsTaskPayload); @@ -85,7 +85,7 @@ export const collectWordPressListing = async ( await manager.enqueueArticle({ category: payload.category, data, - dateRange: createDateRange(payload.dateRange), + dateRange: createTimestampRange(payload.dateRange), sourceId: payload.sourceId, url, } as DetailsTaskPayload); @@ -106,7 +106,7 @@ export const collectArticle = async ( const source = resolveSourceConfig(payload.sourceId); const settings = resolveCrawlerConfig(source, { category: payload.category, - dateRange: payload.dateRange ? formatDateRange(payload.dateRange) : undefined, + dateRange: payload.dateRange ? formatTimestampRange(payload.dateRange) : undefined, pageRange: payload.pageRange ? formatPageRange(payload.pageRange) : undefined, sourceId: payload.sourceId, }); @@ -141,19 +141,7 @@ export const forwardForProcessing = async (payload: ProcessingTaskPayload): Prom try { logger.info({ article: payload.article.title }, "Forwarding article to API"); - - const client = new SyncHttpClient(config.fetch.client); - const response = await client.post(env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT"), { - headers: { - Authorization: `${env("BASANGO_CRAWLER_TOKEN")}`, - }, - json: payload.article, - }); - - if (response.ok) { - const data = await response.json(); - logger.info({ ...data }, "Article successfully forwarded to API"); - } + await forward(payload.article); } catch (error) { logger.error({ error }, "Failed to forward article to API"); } diff --git a/apps/crawler/src/process/async/schemas.ts b/apps/crawler/src/process/async/schemas.ts index 356fb7c..b492f16 100644 --- a/apps/crawler/src/process/async/schemas.ts +++ b/apps/crawler/src/process/async/schemas.ts @@ -1,7 +1,7 @@ +import { PageRangeSchema, TimestampRangeSchema } from "@basango/domain/crawler"; +import { articleSchema } from "@basango/domain/models"; import { z } from "zod"; -import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "#crawler/schema"; - export const ListingTaskPayloadSchema = z.object({ category: z.string().optional(), dateRange: z.string().optional(), @@ -12,7 +12,7 @@ export const ListingTaskPayloadSchema = z.object({ export const DetailsTaskPayloadSchema = z.object({ category: z.string().optional(), data: z.any().optional(), - dateRange: DateRangeSchema.optional(), + dateRange: TimestampRangeSchema.optional(), page: z.number().int().nonnegative().optional(), pageRange: PageRangeSchema.optional(), sourceId: z.string(), @@ -20,7 +20,7 @@ export const DetailsTaskPayloadSchema = z.object({ }); export const ProcessingTaskPayloadSchema = z.object({ - article: ArticleSchema, + article: articleSchema, sourceId: z.string(), }); diff --git a/apps/crawler/src/process/crawler.ts b/apps/crawler/src/process/crawler.ts index a2ea2dd..dd5595d 100644 --- a/apps/crawler/src/process/crawler.ts +++ b/apps/crawler/src/process/crawler.ts @@ -1,9 +1,9 @@ +import type { AnySourceConfig } from "@basango/domain/crawler"; import logger from "@basango/logger"; import { FetchCrawlerConfig, config } from "#crawler/config"; import { JsonlPersistor, Persistor } from "#crawler/process/persistence"; -import { AnySourceConfig } from "#crawler/schema"; -import { createDateRange, createPageRange } from "#crawler/utils"; +import { createPageRange, createTimestampRange } from "#crawler/utils"; export interface CrawlingOptions { sourceId: string; @@ -19,7 +19,7 @@ export const resolveCrawlerConfig = ( return { ...config.fetch.crawler, category: options.category, - dateRange: createDateRange(options.dateRange), + dateRange: createTimestampRange(options.dateRange), pageRange: createPageRange(options.pageRange), source, }; diff --git a/apps/crawler/src/process/parsers/base.ts b/apps/crawler/src/process/parsers/base.ts index 5a7a5cd..1e19510 100644 --- a/apps/crawler/src/process/parsers/base.ts +++ b/apps/crawler/src/process/parsers/base.ts @@ -1,10 +1,11 @@ +import type { AnySourceConfig } from "@basango/domain/crawler"; +import { Article } from "@basango/domain/models"; import { HTMLElement, parse as parseHtml } from "node-html-parser"; import { FetchCrawlerConfig, config } from "#crawler/config"; import { SyncHttpClient } from "#crawler/http/http-client"; import { OpenGraph } from "#crawler/http/open-graph"; import type { Persistor } from "#crawler/process/persistence"; -import { AnySourceConfig, Article } from "#crawler/schema"; export interface CrawlerOptions { persistors?: Persistor[]; @@ -97,7 +98,10 @@ export abstract class BaseCrawler { * @param record - The article record * @param url - The URL to fetch Open Graph data from */ - protected async enrichWithOpenGraph(record: Article, url?: string): Promise
{ + protected async enrichWithOpenGraph( + record: Partial
, + url?: string, + ): Promise> { try { const metadata = url ? await this.openGraph.consumeUrl(url) : undefined; return { ...record, metadata }; diff --git a/apps/crawler/src/process/parsers/html.ts b/apps/crawler/src/process/parsers/html.ts index dea1da5..826970f 100644 --- a/apps/crawler/src/process/parsers/html.ts +++ b/apps/crawler/src/process/parsers/html.ts @@ -1,3 +1,5 @@ +import type { HtmlSourceConfig, TimestampRange } from "@basango/domain/crawler"; +import { Article } from "@basango/domain/models"; import { logger } from "@basango/logger"; import { fromUnixTime, getUnixTime, isMatch as isDateMatch, parse } from "date-fns"; import { HTMLElement } from "node-html-parser"; @@ -12,7 +14,6 @@ import { } from "#crawler/errors"; import { BaseCrawler } from "#crawler/process/parsers/base"; import { Persistor, persist } from "#crawler/process/persistence"; -import { Article, DateRange, HtmlSourceConfig } from "#crawler/schema"; import { createAbsoluteUrl, isTimestampInRange } from "#crawler/utils"; const md = new TurndownService({ @@ -106,7 +107,7 @@ export class HtmlCrawler extends BaseCrawler { * @param html - The HTML content of the article * @param dateRange - Optional date range for filtering */ - async fetchOne(html: string, dateRange?: DateRange | null): Promise
{ + async fetchOne(html: string, dateRange?: TimestampRange | null): Promise> { const root = this.parseHtml(html); const selectors = this.source.sourceSelectors; diff --git a/apps/crawler/src/process/parsers/wordpress.ts b/apps/crawler/src/process/parsers/wordpress.ts index dd48871..a7a2584 100644 --- a/apps/crawler/src/process/parsers/wordpress.ts +++ b/apps/crawler/src/process/parsers/wordpress.ts @@ -1,3 +1,5 @@ +import type { PageRange, TimestampRange, WordPressSourceConfig } from "@basango/domain/crawler"; +import { Article } from "@basango/domain/models"; import { logger } from "@basango/logger"; import { fromUnixTime } from "date-fns"; import TurndownService from "turndown"; @@ -10,7 +12,6 @@ import { } from "#crawler/errors"; import { BaseCrawler } from "#crawler/process/parsers/base"; import { Persistor, persist } from "#crawler/process/persistence"; -import { Article, DateRange, PageRange, WordPressSourceConfig } from "#crawler/schema"; import { isTimestampInRange } from "#crawler/utils"; const md = new TurndownService({ @@ -107,7 +108,7 @@ export class WordPressCrawler extends BaseCrawler { * @param input - Decoded JSON object or raw JSON string * @param dateRange - Optional date range for filtering */ - async fetchOne(input: unknown, dateRange?: DateRange | null): Promise
{ + async fetchOne(input: unknown, dateRange?: TimestampRange | null): Promise
{ // input can be the decoded JSON object or a raw JSON string let data: WordPressPost | null = null; try { diff --git a/apps/crawler/src/process/persistence.ts b/apps/crawler/src/process/persistence.ts index 43e47ed..ded7dd7 100644 --- a/apps/crawler/src/process/persistence.ts +++ b/apps/crawler/src/process/persistence.ts @@ -1,13 +1,15 @@ import fs from "node:fs"; import path from "node:path"; +import type { Article } from "@basango/domain/models"; import { md5 } from "@basango/encryption"; import logger from "@basango/logger"; -import { Article } from "#crawler/schema"; +import { config, env } from "#crawler/config"; +import { HttpError, SyncHttpClient } from "#crawler/http/http-client"; export interface Persistor { - persist(record: Article): Promise | void; + persist(record: Partial
): Promise | void; close: () => Promise | void; } @@ -35,17 +37,20 @@ const sanitize = (text: string): string => { return s.trim(); }; -export const persist = async (payload: Article, persistors: Persistor[]): Promise
=> { +export const persist = async ( + payload: Partial
, + persistors: Persistor[], +): Promise
=> { const data = { ...payload, - body: sanitize(payload.body), - categories: payload.categories.map(sanitize), - title: sanitize(payload.title), + body: sanitize(payload.body!), + categories: payload.categories!.map(sanitize), + title: sanitize(payload.title!), }; const article = { ...data, - hash: md5(data.link), + hash: md5(data.link!), } as Article; for (const persistor of persistors) { @@ -60,6 +65,37 @@ export const persist = async (payload: Article, persistors: Persistor[]): Promis return article; }; +export const forward = async (payload: Partial
): Promise => { + const client = new SyncHttpClient(config.fetch.client); + const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT"); + const token = env("BASANGO_CRAWLER_TOKEN"); + + try { + const response = await client.post(endpoint, { + headers: { + Authorization: `${token}`, + }, + json: payload, + }); + + if (response.ok) { + const data = await response.json(); + logger.info({ ...data }, "Article forwarded"); + return; + } + + logger.error({ status: response.status, url: payload.link }, "Forwarding failed"); + } catch (error) { + if (error instanceof HttpError) { + const data = await error.response.json(); + logger.error({ ...data, url: payload.link }, "Error forwarding article"); + return; + } + + logger.error({ error, url: payload.link }, "Error forwarding article"); + } +}; + export class JsonlPersistor implements Persistor { private readonly filePath: string; private readonly encoding: BufferEncoding; @@ -78,15 +114,15 @@ export class JsonlPersistor implements Persistor { } } - persist(record: Article): Promise { + persist(payload: Partial
): Promise { if (this.closed) { return Promise.reject(new Error("Persistor has been closed")); } - const payload = `${JSON.stringify(record)}\n`; + const record = `${JSON.stringify(payload)}\n`; this.pending = this.pending.then(async () => { - fs.appendFileSync(this.filePath, payload, { encoding: this.encoding }); + fs.appendFileSync(this.filePath, record, { encoding: this.encoding }); }); return this.pending; diff --git a/apps/crawler/src/schema.ts b/apps/crawler/src/schema.ts deleted file mode 100644 index 17a8308..0000000 --- a/apps/crawler/src/schema.ts +++ /dev/null @@ -1,130 +0,0 @@ -import { z } from "zod"; - -export const UpdateDirectionSchema = z.enum(["forward", "backward"]); -export const SourceKindSchema = z.enum(["wordpress", "html"]); - -export const DateRangeSchema = z - .object({ - end: z.number().int(), - start: z.number().int(), - }) - .superRefine((value, ctx) => { - if (value.start === 0 || value.end === 0) { - ctx.addIssue({ - code: "custom", - message: "Timestamp cannot be zero", - }); - } - if (value.end < value.start) { - ctx.addIssue({ - code: "custom", - message: "End timestamp must be greater than or equal to start", - }); - } - }); - -export const PageRangeSchema = z - .object({ - end: z.number().int().min(0), - start: z.number().int().min(0), - }) - .superRefine((value, ctx) => { - if (value.end < value.start) { - ctx.addIssue({ - code: "custom", - message: "End page must be greater than or equal to start page", - }); - } - }); - -export const PageRangeSpecSchema = z - .string() - .regex(/^[0-9]+:[0-9]+$/, "Invalid page range format. Use start:end") - .transform((spec) => { - const [startText, endText] = spec.split(":"); - return { - end: Number.parseInt(String(endText), 10), - start: Number.parseInt(String(startText), 10), - }; - }); - -export const DateRangeSpecSchema = z - .string() - .regex(/.+:.+/, "Expected start:end format") - .transform((spec) => { - const [startRaw, endRaw] = spec.split(":"); - return { endRaw: String(endRaw), startRaw: String(startRaw) }; - }); - -export const SourceDateSchema = z.object({ - format: z.string().default("yyyy-LL-dd HH:mm"), -}); - -const BaseSourceSchema = z.object({ - categories: z.array(z.string()).default([]), - requiresDetails: z.boolean().default(false), - requiresRateLimit: z.boolean().default(false), - sourceDate: SourceDateSchema, - sourceId: z.string(), - sourceKind: SourceKindSchema, - sourceUrl: z.url(), - supportsCategories: z.boolean().default(false), -}); - -export const HtmlSourceConfigSchema = BaseSourceSchema.extend({ - paginationTemplate: z.string(), - sourceKind: z.literal("html"), - sourceSelectors: z.object({ - articleBody: z.string(), - articleCategories: z.string().optional(), - articleDate: z.string(), - articleLink: z.string(), - articles: z.string(), - articleTitle: z.string(), - pagination: z.string().default("ul.pagination > li a"), - }), -}); - -export const WordPressSourceConfigSchema = BaseSourceSchema.extend({ - sourceDate: SourceDateSchema.default(SourceDateSchema.parse({ format: "yyyy-LL-dd'T'HH:mm:ss" })), - sourceKind: z.literal("wordpress"), -}); - -export const ArticleMetadataSchema = z.object({ - description: z.string().optional(), - image: z.string().optional(), - title: z.string().optional(), - url: z.url().optional(), -}); - -export const ArticleTokenStatisticsSchema = z.object({ - body: z.number().int().nonnegative().default(0), - categories: z.number().int().nonnegative().default(0), - excerpt: z.number().int().nonnegative().default(0), - title: z.number().int().nonnegative().default(0), -}); - -export const ArticleSchema = z.object({ - body: z.string(), - categories: z.array(z.string()).default([]), - hash: z.string().optional(), - link: z.url(), - metadata: ArticleMetadataSchema.optional(), - publishedAt: z.date(), - sourceId: z.string(), - title: z.string(), - tokenStatistics: ArticleTokenStatisticsSchema.optional(), -}); - -export type ArticleMetadata = z.infer; -export type Article = z.infer; -export type DateRange = z.infer; -export type PageRange = z.infer; -export type HtmlSourceConfig = z.infer; -export type WordPressSourceConfig = z.infer; -export type AnySourceConfig = HtmlSourceConfig | WordPressSourceConfig; - -export interface CreateDateRangeOptions { - format?: string; - separator?: string; -} diff --git a/apps/crawler/src/scripts/queue.ts b/apps/crawler/src/scripts/async.ts similarity index 96% rename from apps/crawler/src/scripts/queue.ts rename to apps/crawler/src/scripts/async.ts index f754e19..683b934 100644 --- a/apps/crawler/src/scripts/queue.ts +++ b/apps/crawler/src/scripts/async.ts @@ -1,3 +1,5 @@ +#! /usr/bin/env bun + import { logger } from "@basango/logger"; import { scheduleAsyncCrawl } from "#crawler/process/async/tasks"; diff --git a/apps/crawler/src/scripts/crawl.ts b/apps/crawler/src/scripts/crawl.ts deleted file mode 100644 index 2911eff..0000000 --- a/apps/crawler/src/scripts/crawl.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { logger } from "@basango/logger"; - -import { runSyncCrawl } from "#crawler/process/sync/tasks"; -import { CRAWLING_USAGE, parseCrawlingCliArgs } from "#crawler/scripts/utils"; - -const main = async (): Promise => { - const options = parseCrawlingCliArgs(); - - if (options.sourceId === undefined) { - console.log(CRAWLING_USAGE); - process.exitCode = 1; - return; - } - - try { - await runSyncCrawl({ ...options }); - } catch (error) { - logger.error({ error }, "Synchronous crawl failed"); - process.exitCode = 1; - } -}; - -void main(); diff --git a/apps/crawler/src/scripts/push.ts b/apps/crawler/src/scripts/push.ts new file mode 100644 index 0000000..3bd10a8 --- /dev/null +++ b/apps/crawler/src/scripts/push.ts @@ -0,0 +1,79 @@ +#! /usr/bin/env bun +import fs from "node:fs"; +import path from "node:path"; +import { createInterface } from "node:readline"; +import { parseArgs } from "node:util"; + +import type { Article } from "@basango/domain/models"; +import { logger } from "@basango/logger"; + +import { config } from "#crawler/config"; +import { forward } from "#crawler/process/persistence"; + +const USAGE = ` + Usage: bun run crawler:sync -- --sourceId +`; + +const parseCliArgs = (): { sourceId?: string } => { + const { values } = parseArgs({ + options: { + sourceId: { type: "string" }, + }, + }); + return values as { sourceId?: string }; +}; + +const main = async (): Promise => { + const { sourceId } = parseCliArgs(); + if (!sourceId) { + console.log(USAGE); + process.exitCode = 1; + return; + } + + const filePath = path.join(config.paths.data, `${sourceId}.jsonl`); + + if (!fs.existsSync(filePath)) { + logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL not found"); + process.exitCode = 1; + return; + } + + const stat = fs.statSync(filePath); + if (stat.size === 0) { + logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL is empty"); + process.exitCode = 1; + return; + } + + logger.info({ filePath, sourceId }, "Syncing articles from JSONL to backend"); + + const stream = fs.createReadStream(filePath, { encoding: "utf-8" }); + const rl = createInterface({ crlfDelay: Infinity, input: stream }); + + let count = 0; + try { + for await (const raw of rl) { + const line = raw.trim(); + if (!line) continue; + + try { + const article = JSON.parse(line) as Article & { publishedAt: string }; + await forward({ + ...article, + publishedAt: new Date(article.publishedAt), + }); + + count += 1; + } catch (error) { + logger.error({ error, linePreview: line.slice(0, 100) }, "Invalid JSONL line"); + } + } + } finally { + rl.close(); + } + + logger.info({ forwarded: count, sourceId }, "Sync completed"); +}; + +void main(); diff --git a/apps/crawler/src/scripts/sync.ts b/apps/crawler/src/scripts/sync.ts index 22bc632..1143406 100644 --- a/apps/crawler/src/scripts/sync.ts +++ b/apps/crawler/src/scripts/sync.ts @@ -1,109 +1,25 @@ -import fs from "node:fs"; -import path from "node:path"; -import { createInterface } from "node:readline"; -import { parseArgs } from "node:util"; +#! /usr/bin/env bun import { logger } from "@basango/logger"; -import { config, env } from "#crawler/config"; -import { HttpError, SyncHttpClient } from "#crawler/http/http-client"; -import type { Article } from "#crawler/schema"; - -const USAGE = ` - Usage: bun run crawler:sync -- --sourceId -`; - -const parseCliArgs = (): { sourceId?: string } => { - const { values } = parseArgs({ - options: { - sourceId: { type: "string" }, - }, - }); - return values as { sourceId?: string }; -}; - -const forwardArticle = async (article: Article): Promise => { - const client = new SyncHttpClient(config.fetch.client); - const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT"); - const token = env("BASANGO_CRAWLER_TOKEN"); - - try { - const response = await client.post(endpoint, { - headers: { - Authorization: `${token}`, - }, - json: article, - }); - - if (response.ok) { - const data = await response.json(); - logger.info({ ...data }, "Article forwarded"); - return; - } - - logger.error({ link: article.link, status: response.status }, "Forwarding failed"); - } catch (error) { - if (error instanceof HttpError) { - const data = await error.response.json(); - logger.error({ ...data, link: article.link }, "Error forwarding article"); - return; - } - - logger.error({ error, link: article.link }, "Error forwarding article"); - } -}; +import { runSyncCrawl } from "#crawler/process/sync/tasks"; +import { CRAWLING_USAGE, parseCrawlingCliArgs } from "#crawler/scripts/utils"; const main = async (): Promise => { - const { sourceId } = parseCliArgs(); - if (!sourceId) { - console.log(USAGE); + const options = parseCrawlingCliArgs(); + + if (options.sourceId === undefined) { + console.log(CRAWLING_USAGE); process.exitCode = 1; return; } - const filePath = path.join(config.paths.data, `${sourceId}.jsonl`); - - if (!fs.existsSync(filePath)) { - logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL not found"); - process.exitCode = 1; - return; - } - - const stat = fs.statSync(filePath); - if (stat.size === 0) { - logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL is empty"); - process.exitCode = 1; - return; - } - - logger.info({ filePath, sourceId }, "Syncing articles from JSONL to backend"); - - const stream = fs.createReadStream(filePath, { encoding: "utf-8" }); - const rl = createInterface({ crlfDelay: Infinity, input: stream }); - - let count = 0; try { - for await (const raw of rl) { - const line = raw.trim(); - if (!line) continue; - - try { - const article = JSON.parse(line) as Article & { publishedAt: string }; - await forwardArticle({ - ...article, - publishedAt: new Date(article.publishedAt), - }); - - count += 1; - } catch (error) { - logger.error({ error, linePreview: line.slice(0, 100) }, "Invalid JSONL line"); - } - } - } finally { - rl.close(); + await runSyncCrawl({ ...options }); + } catch (error) { + logger.error({ error }, "Synchronous crawl failed"); + process.exitCode = 1; } - - logger.info({ forwarded: count, sourceId }, "Sync completed"); }; void main(); diff --git a/apps/crawler/src/scripts/worker.ts b/apps/crawler/src/scripts/worker.ts index b9564f4..fbdf9b4 100644 --- a/apps/crawler/src/scripts/worker.ts +++ b/apps/crawler/src/scripts/worker.ts @@ -1,3 +1,5 @@ +#! /usr/bin/env bun + import { logger } from "@basango/logger"; import { createQueueManager } from "#crawler/process/async/queue"; diff --git a/apps/crawler/src/utils.ts b/apps/crawler/src/utils.ts index 459fc78..8cb827c 100644 --- a/apps/crawler/src/utils.ts +++ b/apps/crawler/src/utils.ts @@ -1,20 +1,19 @@ -import { format, getUnixTime, isMatch, parse } from "date-fns"; -import type { RedisOptions } from "ioredis"; - -import { config } from "#crawler/config"; -import { DEFAULT_DATE_FORMAT } from "#crawler/constants"; +import { DEFAULT_DATE_FORMAT } from "@basango/domain/constants"; import { AnySourceConfig, - CreateDateRangeOptions, - DateRange, - DateRangeSchema, - DateRangeSpecSchema, + DateSpecSchema, HtmlSourceConfig, PageRange, PageRangeSchema, - PageRangeSpecSchema, + PageSpecSchema, + TimestampRange, + TimestampRangeSchema, WordPressSourceConfig, -} from "#crawler/schema"; +} from "@basango/domain/crawler"; +import { format, fromUnixTime, getUnixTime, isMatch, parse } from "date-fns"; +import type { RedisOptions } from "ioredis"; + +import { config } from "#crawler/config"; /** * Resolve a source configuration by its ID. @@ -71,7 +70,7 @@ const parseDate = (value: string, format: string): Date => { */ export const createPageRange = (spec: string | undefined): PageRange | undefined => { if (!spec) return undefined; - const parsed = PageRangeSpecSchema.parse(spec); + const parsed = PageSpecSchema.parse(spec); return PageRangeSchema.parse(parsed); }; @@ -80,10 +79,13 @@ export const createPageRange = (spec: string | undefined): PageRange | undefined * @param spec - The date range specification (e.g., "2023-01-01:2023-12-31") * @param options - Options for date range creation */ -export const createDateRange = ( +export const createTimestampRange = ( spec: string | undefined, - options: CreateDateRangeOptions = {}, -): DateRange | undefined => { + options: { + format?: string; + separator?: string; + } = {}, +): TimestampRange | undefined => { if (!spec) return undefined; const { format = DEFAULT_DATE_FORMAT, separator = ":" } = options; if (!separator) { @@ -91,7 +93,7 @@ export const createDateRange = ( } const normalized = spec.replace(separator, ":"); - const parsedSpec = DateRangeSpecSchema.parse(normalized); + const parsedSpec = DateSpecSchema.parse(normalized); const startDate = parseDate(parsedSpec.startRaw, format); const endDate = parseDate(parsedSpec.endRaw, format); @@ -101,7 +103,7 @@ export const createDateRange = ( start: getUnixTime(startDate), }; - return DateRangeSchema.parse(range); + return TimestampRangeSchema.parse(range); }; /** @@ -109,9 +111,9 @@ export const createDateRange = ( * @param range - The date range * @param fmt - The date format (default: DEFAULT_DATE_FORMAT) */ -export const formatDateRange = (range: DateRange, fmt = DEFAULT_DATE_FORMAT): string => { - const start = format(new Date(range.start * 1000), fmt); - const end = format(new Date(range.end * 1000), fmt); +export const formatTimestampRange = (range: TimestampRange, fmt = DEFAULT_DATE_FORMAT): string => { + const start = format(fromUnixTime(range.start), fmt); + const end = format(fromUnixTime(range.end), fmt); return `${start}:${end}`; }; @@ -128,7 +130,7 @@ export const formatPageRange = (range: PageRange): string => { * @param range - The date range * @param timestamp - The timestamp to check */ -export const isTimestampInRange = (range: DateRange, timestamp: number): boolean => { +export const isTimestampInRange = (range: TimestampRange, timestamp: number): boolean => { return range.start <= timestamp && timestamp <= range.end; }; @@ -145,17 +147,3 @@ export const createAbsoluteUrl = (base: string, href: string): string => { return href; } }; - -/** - * extract the domain name from a URL. - * @param url - The URL string - * @returns The domain name or null if invalid URL - */ -export const extractDomainName = (url: string): string | null => { - try { - const parsed = new URL(url); - return parsed.hostname; - } catch { - return null; - } -}; diff --git a/apps/crawler/tsconfig.json b/apps/crawler/tsconfig.json index 06f4264..3f49a41 100644 --- a/apps/crawler/tsconfig.json +++ b/apps/crawler/tsconfig.json @@ -1,11 +1,4 @@ { - "compilerOptions": { - "baseUrl": ".", - "paths": { - "#crawler/*": ["./src/*"] - } - }, "extends": "@basango/tsconfig/base.json", - "include": ["src"], - "references": [] + "include": ["src"] } diff --git a/apps/dashboard/next.config.ts b/apps/dashboard/next.config.ts index c04a64f..dfe9060 100644 --- a/apps/dashboard/next.config.ts +++ b/apps/dashboard/next.config.ts @@ -16,7 +16,7 @@ const nextConfig = { }, poweredByHeader: false, reactStrictMode: true, - transpilePackages: ["@basango/ui", "@basango/api"], + transpilePackages: ["@basango/ui", "@basango/api", "@basango/domain"], }; export default nextConfig; diff --git a/apps/dashboard/package.json b/apps/dashboard/package.json index 138d49f..e4f6431 100644 --- a/apps/dashboard/package.json +++ b/apps/dashboard/package.json @@ -1,6 +1,7 @@ { "dependencies": { "@basango/api": "workspace:*", + "@basango/domain": "workspace:*", "@basango/ui": "workspace:*", "@date-fns/tz": "^1.4.1", "@hookform/resolvers": "^5.2.2", @@ -18,10 +19,12 @@ "next-themes": "^0.4.6", "nuqs": "^2.7.3", "react": "catalog:", + "react-day-picker": "^9.11.1", "react-dom": "catalog:", "react-hook-form": "^7.66.0", "recharts": "^3.4.1", "server-only": "^0.0.1", + "sonner": "^2.0.7", "superjson": "^2.2.5", "zod": "^4.1.12", "zustand": "^5.0.8" @@ -34,12 +37,15 @@ "@types/react-dom": "catalog:", "typescript": "catalog:" }, + "imports": { + "#dashboard/*": "./src/*" + }, "name": "@basango/dashboard", "private": true, "scripts": { "build": "next build", + "clean": "rm -rf .next node_modules", "dev": "next dev", - "lint": "eslint", "start": "next start" } } diff --git a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/articles/page.tsx b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/articles/page.tsx index 1d7e544..786b089 100644 --- a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/articles/page.tsx +++ b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/articles/page.tsx @@ -1,22 +1,21 @@ import { Metadata } from "next"; +import { ArticlesFeed } from "#dashboard/components/articles-feed"; import { PageLayout } from "#dashboard/components/shell/page-layout"; +import { HydrateClient, batchPrefetch, trpc } from "#dashboard/trpc/server"; export const metadata: Metadata = { title: "Articles | Basango Dashboard", }; export default function Page() { + batchPrefetch([trpc.articles.list.infiniteQueryOptions({ limit: 12 })]); + return ( - -
-
-
-
-
-
-
-
- + + + + + ); } diff --git a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/dashboard/page.tsx b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/dashboard/page.tsx new file mode 100644 index 0000000..dd1f5a5 --- /dev/null +++ b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/dashboard/page.tsx @@ -0,0 +1,30 @@ +import { Metadata } from "next"; + +import { PublicationGraphChart } from "#dashboard/components/charts/articles/publication-graph-chart"; +import { SourceDistributionChart } from "#dashboard/components/charts/articles/source-distribution-chart"; +import { PageLayout } from "#dashboard/components/shell/page-layout"; +import { HydrateClient, batchPrefetch, trpc } from "#dashboard/trpc/server"; + +export const metadata: Metadata = { + title: "Dashboard | Basango", +}; + +export default async function Page() { + batchPrefetch([ + trpc.articles.getPublicationGraph.queryOptions({}), + trpc.articles.getSourceDistribution.queryOptions({ limit: 8 }), + ]); + + return ( + + +
+
+ +
+ +
+
+
+ ); +} diff --git a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/[id]/page.tsx b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/[id]/page.tsx index 6f05550..a34b7d7 100644 --- a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/[id]/page.tsx +++ b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/[id]/page.tsx @@ -1,9 +1,12 @@ +import { Source } from "@basango/domain/models/sources"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@basango/ui/components/tabs"; import { Metadata } from "next"; -import { SourceCategorySharesChart } from "#dashboard/components/charts/source-category-shares-chart"; -import { SourcePublicationgGraphChart } from "#dashboard/components/charts/source-publication-graph-chart"; +import { ArticlesFeed } from "#dashboard/components/articles-feed"; +import { CategorySharesChart } from "#dashboard/components/charts/sources/category-shares-chart"; +import { PublicationGraphChart } from "#dashboard/components/charts/sources/publication-graph-chart"; import { PageLayout } from "#dashboard/components/shell/page-layout"; +import { SourceDetailsTab } from "#dashboard/components/source-details-tab"; import { HydrateClient, batchPrefetch, getQueryClient, trpc } from "#dashboard/trpc/server"; export const metadata: Metadata = { @@ -16,11 +19,12 @@ export default async function Page({ params }: { params: Promise<{ id: string }> batchPrefetch([ trpc.sources.getById.queryOptions({ id }), - trpc.sources.getCategoryShares.queryOptions({ id }), + trpc.sources.getCategoryShares.queryOptions({ id, limit: 10 }), trpc.sources.getPublicationGraph.queryOptions({ id }), + trpc.articles.list.infiniteQueryOptions({ limit: 12, sourceId: id }), ]); - const source = await queryClient.fetchQuery(trpc.sources.getById.queryOptions({ id })); + const source: Source = await queryClient.fetchQuery(trpc.sources.getById.queryOptions({ id })); return ( @@ -29,20 +33,17 @@ export default async function Page({ params }: { params: Promise<{ id: string }> Overview Articles + Details - - + + -
-
-
-
-
-
-
-
+ + + + diff --git a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/page.tsx b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/page.tsx index 22671c6..058212b 100644 --- a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/page.tsx +++ b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/sources/page.tsx @@ -1,33 +1,44 @@ -import { RouterOutputs } from "@basango/api/trpc/routers/_app"; +import { Source } from "@basango/domain/models/sources"; +import { Button } from "@basango/ui/components/button"; +import { PlusIcon } from "lucide-react"; import { Metadata } from "next"; import Link from "next/link"; +import { SourceCreateModal } from "#dashboard/components/modals/source-create-modal"; import { PageLayout } from "#dashboard/components/shell/page-layout"; -import { SourceCard } from "#dashboard/components/widgets/source-card"; +import { SourceCard } from "#dashboard/components/source-card"; import { HydrateClient, getQueryClient, prefetch, trpc } from "#dashboard/trpc/server"; export const metadata: Metadata = { title: "Sources | Basango Dashboard", }; -type SourceDetails = RouterOutputs["sources"]["get"][number]; - export default async function Page() { const queryClient = getQueryClient(); prefetch(trpc.sources.get.queryOptions()); - const sources = await queryClient.fetchQuery(trpc.sources.get.queryOptions()); + const sources: Source[] = await queryClient.fetchQuery(trpc.sources.get.queryOptions()); return ( +
+ + + +
- {sources.map((source: SourceDetails) => ( + {sources.map((source: Source) => ( ))}
+ +
); diff --git a/apps/dashboard/src/components/article-card.tsx b/apps/dashboard/src/components/article-card.tsx new file mode 100644 index 0000000..741b76c --- /dev/null +++ b/apps/dashboard/src/components/article-card.tsx @@ -0,0 +1,138 @@ +"use client"; + +import type { RouterOutputs } from "@basango/api/trpc/routers/_app"; +import { Badge } from "@basango/ui/components/badge"; +import { Button } from "@basango/ui/components/button"; +import { Card, CardContent, CardFooter, CardHeader, CardTitle } from "@basango/ui/components/card"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@basango/ui/components/dropdown-menu"; +import { Skeleton } from "@basango/ui/components/skeleton"; +import { ExternalLink, Link2, MoreHorizontal } from "lucide-react"; +import Link from "next/link"; +import * as React from "react"; + +import { formatDate, formatRelativeTime } from "#dashboard/utils/utils"; + +type Article = RouterOutputs["articles"]["list"]["items"][number]; + +type ArticleCardProps = { + article: Article; +}; + +function getDescription(article: Article) { + return ( + article.metadata?.description ?? + article.excerpt ?? + "No description was provided for this article." + ); +} + +export function ArticleCard({ article }: ArticleCardProps) { + const [copied, setCopied] = React.useState(false); + const description = getDescription(article); + const imageUrl = article.image ?? undefined; + + const copyLink = React.useCallback(async () => { + try { + await navigator.clipboard.writeText(article.link); + setCopied(true); + setTimeout(() => setCopied(false), 1500); + } catch { + setCopied(false); + } + }, [article.link]); + + return ( + + +
+ {imageUrl ? ( + {article.title} + ) : ( +
+ No image available +
+ )} +
+ {article.sourceName} +
+
+ + + + + + + + + Open original + + + + + {copied ? "Copied!" : "Copy link"} + + + +
+
+
+ + + + {article.title} + + +

{description}

+
+ +
+ + {formatDate(article.publishedAt.toISOString(), "PP", false)} + + {formatRelativeTime(new Date(article.publishedAt))} +
+ {article.readingTime} min +
+
+ ); +} + +export function ArticleCardSkeleton() { + return ( + +
+ +
+ + + + + + + + + +
+ ); +} diff --git a/apps/dashboard/src/components/articles-feed.tsx b/apps/dashboard/src/components/articles-feed.tsx new file mode 100644 index 0000000..e12239e --- /dev/null +++ b/apps/dashboard/src/components/articles-feed.tsx @@ -0,0 +1,94 @@ +"use client"; + +import { Alert, AlertDescription, AlertTitle } from "@basango/ui/components/alert"; +import { Button } from "@basango/ui/components/button"; +import { useInfiniteQuery } from "@tanstack/react-query"; +import { Loader2 } from "lucide-react"; +import * as React from "react"; + +import { useTRPC } from "#dashboard/trpc/client"; + +import { ArticleCard, ArticleCardSkeleton } from "./article-card"; + +type ArticlesTableProps = { + sourceId?: string; +}; + +const PLACEHOLDER_COUNT = 8; + +export function ArticlesFeed({ sourceId }: ArticlesTableProps) { + const trpc = useTRPC(); + + const query = useInfiniteQuery( + trpc.articles.list.infiniteQueryOptions( + { + limit: 12, + sourceId, + }, + { + getNextPageParam: (lastPage) => (lastPage.meta.hasNext ? lastPage.meta.nextCursor : null), + initialCursor: null, + }, + ), + ); + + const articles = React.useMemo( + () => query.data?.pages.flatMap((page) => page.items) ?? [], + [query.data], + ); + + const isInitialLoading = query.isLoading && !query.data; + + return ( +
+ {query.isError && ( + + Unable to load articles + + {query.error.message ?? "An unexpected error occurred while fetching articles."} + + + )} + + {isInitialLoading ? ( +
+ {Array.from({ length: PLACEHOLDER_COUNT }).map((_, index) => ( + + ))} +
+ ) : articles.length > 0 ? ( +
+ {articles.map((article) => ( + + ))} +
+ ) : ( +
+ No articles match your filters yet. +
+ )} + +
+ {query.hasNextPage ? ( + + ) : articles.length > 0 ? ( +

You're all caught up.

+ ) : null} +
+
+ ); +} diff --git a/apps/dashboard/src/components/charts/area-chart.tsx b/apps/dashboard/src/components/charts/area-chart.tsx new file mode 100644 index 0000000..ffbbfae --- /dev/null +++ b/apps/dashboard/src/components/charts/area-chart.tsx @@ -0,0 +1,50 @@ +"use client"; + +import { ChartTooltip, ChartTooltipContent } from "@basango/ui/components/chart"; +import { Area, AreaChart as BaseAreachart, CartesianGrid, XAxis, YAxis } from "recharts"; + +import { formatDate, formatNumber } from "#dashboard/utils/utils"; + +type AreaChartProps = { + data: unknown; +}; + +export function AreaChart({ data }: AreaChartProps) { + return ( + + + formatDate(String(value))} + tickLine={false} + tickMargin={8} + /> + formatNumber(Number(value))} + tickLine={false} + width={48} + /> + formatDate(String(value), "PP")} + nameKey="count" + /> + } + cursor={{ stroke: "var(--border)", strokeDasharray: "4 4" }} + /> + + + ); +} diff --git a/apps/dashboard/src/components/charts/articles/publication-graph-chart.tsx b/apps/dashboard/src/components/charts/articles/publication-graph-chart.tsx new file mode 100644 index 0000000..e763c4c --- /dev/null +++ b/apps/dashboard/src/components/charts/articles/publication-graph-chart.tsx @@ -0,0 +1,81 @@ +// @ts-nocheck +"use client"; + +import { + Card, + CardContent, + CardDescription, + CardFooter, + CardHeader, + CardTitle, +} from "@basango/ui/components/card"; +import { ChartConfig, ChartContainer } from "@basango/ui/components/chart"; +import { useQuery } from "@tanstack/react-query"; + +import { AreaChart } from "#dashboard/components/charts/area-chart"; +import { + ChartPeriodPicker, + useChartPeriodFilter, +} from "#dashboard/components/charts/chart-filters"; +import { Status } from "#dashboard/components/charts/status"; +import { useTRPC } from "#dashboard/trpc/client"; +import { formatNumber } from "#dashboard/utils/utils"; + +const chartConfig = { + count: { + color: "var(--chart-1)", + label: "Articles", + }, +} satisfies ChartConfig; + +export function PublicationGraphChart() { + const trpc = useTRPC(); + const period = useChartPeriodFilter(); + + const { data } = useQuery( + trpc.articles.getPublicationGraph.queryOptions({ + range: period.range, + }), + ); + + return ( + + +
+ {formatNumber(data?.meta?.current)} articles + +
+ + vs previous +
+
+
+
+ +
+
+ + + + + + + + + + Showing total crawled articles for the selected period, +
+ + {formatNumber(data?.meta?.current)} vs {formatNumber(data?.meta?.previous)} articles + + + period + {data?.meta?.previous === 0 && data?.meta?.current === 0 && ( + (no articles yet) + )} +
+
+
+
+ ); +} diff --git a/apps/dashboard/src/components/charts/articles/source-distribution-chart.tsx b/apps/dashboard/src/components/charts/articles/source-distribution-chart.tsx new file mode 100644 index 0000000..cd97a2a --- /dev/null +++ b/apps/dashboard/src/components/charts/articles/source-distribution-chart.tsx @@ -0,0 +1,82 @@ +// @ts-nocheck +"use client"; + +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@basango/ui/components/card"; +import { + ChartConfig, + ChartContainer, + ChartTooltip, + ChartTooltipContent, +} from "@basango/ui/components/chart"; +import { useQuery } from "@tanstack/react-query"; +import { Cell, Pie, PieChart } from "recharts"; + +import { useTRPC } from "#dashboard/trpc/client"; +import { getColorFromName } from "#dashboard/utils/categories"; +import { formatNumber } from "#dashboard/utils/utils"; + +const chartConfig = {} satisfies ChartConfig; + +export function SourceDistributionChart() { + const trpc = useTRPC(); + + const { data } = useQuery( + trpc.articles.getSourceDistribution.queryOptions({ + limit: 10, + }), + ); + + return ( + + +
+ Source distribution + Share of articles by source +
+
+ + + + } /> + + {data?.items.map((item) => ( + + ))} + + + + +
    + {data?.items.map((item) => ( +
  • + + + {item.name} + + + {formatNumber(item.count)} ({item.percentage}%) + +
  • + ))} +
+
+
+ ); +} diff --git a/apps/dashboard/src/components/charts/bar-chart.tsx b/apps/dashboard/src/components/charts/bar-chart.tsx new file mode 100644 index 0000000..37116fb --- /dev/null +++ b/apps/dashboard/src/components/charts/bar-chart.tsx @@ -0,0 +1,28 @@ +"use client"; + +import { ChartTooltip, ChartTooltipContent } from "@basango/ui/components/chart"; +import { Bar, BarChart as BaseBarChart, CartesianGrid, XAxis } from "recharts"; + +import { formatDate } from "#dashboard/utils/utils"; + +type BarChartProps = { + data: unknown; +}; + +export function BarChart({ data }: BarChartProps) { + return ( + + + formatDate(value)} + tickLine={false} + tickMargin={8} + /> + } cursor={false} /> + + + ); +} diff --git a/apps/dashboard/src/components/charts/chart-filters.tsx b/apps/dashboard/src/components/charts/chart-filters.tsx new file mode 100644 index 0000000..4bda641 --- /dev/null +++ b/apps/dashboard/src/components/charts/chart-filters.tsx @@ -0,0 +1,259 @@ +"use client"; + +import { Button } from "@basango/ui/components/button"; +import { Calendar } from "@basango/ui/components/calendar"; +import { Popover, PopoverContent, PopoverTrigger } from "@basango/ui/components/popover"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@basango/ui/components/select"; +import { ToggleGroup, ToggleGroupItem } from "@basango/ui/components/toggle-group"; +import { differenceInCalendarDays, format, subDays } from "date-fns"; +import { CalendarIcon, ChevronDown } from "lucide-react"; +import { parseAsInteger, parseAsIsoDate, useQueryStates } from "nuqs"; +import { useMemo, useState } from "react"; +import { DateRange } from "react-day-picker"; + +const DEFAULT_PERIOD_OPTIONS = [ + { label: "Last 7 days", value: 7 }, + { label: "Last 30 days", value: 30 }, + { label: "Last 3 months", value: 90 }, + { label: "Last 6 months", value: 180 }, + { label: "Last 12 months", value: 365 }, +] as const; + +type DateInput = number | Date | null | undefined; + +const createRangeFromDays = (days: number): DateRange => { + const end = new Date(); + + return { + from: subDays(end, Math.max(days - 1, 0)), + to: end, + }; +}; + +const DEFAULT_LIMIT_OPTIONS = [ + { label: "Top 10", value: 10 }, + { label: "Top 20", value: 20 }, + { label: "Top 50", value: 50 }, +] as const; + +type ChartPeriodFilterOptions = { + defaultDays?: number; + paramKey?: string; +}; + +type ChartLimitFilterOptions = { + defaultValue?: number; + paramKey?: string; +}; + +export function useChartPeriodFilter(options: ChartPeriodFilterOptions = {}) { + const { defaultDays = 30, paramKey = "chartPeriod" } = options; + const fromKey = `${paramKey}From`; + const toKey = `${paramKey}To`; + + const defaultRange = useMemo(() => createRangeFromDays(defaultDays), [defaultDays]); + + const [state, setState] = useQueryStates({ + [fromKey]: parseAsIsoDate, + [toKey]: parseAsIsoDate, + }); + + const from = state[fromKey] ?? undefined; + const to = state[toKey] ?? undefined; + + const selectedRange = useMemo(() => { + if (from || to) { + return { from, to }; + } + + return undefined; + }, [from, to]); + + const range = useMemo(() => { + if (from && to) { + return { from, to }; + } + + return defaultRange; + }, [defaultRange, from, to]); + + return { + defaultDays, + keys: { fromKey, toKey }, + range, + selectedRange, + setState, + }; +} + +export function useChartLimitFilter(options: ChartLimitFilterOptions = {}) { + const { defaultValue = 10, paramKey = "chartLimit" } = options; + const [state, setState] = useQueryStates({ + [paramKey]: parseAsInteger.withDefault(defaultValue), + }); + + const limit = state[paramKey]; + + return { + limit, + setLimit: (value: number) => { + setState({ [paramKey]: value }); + }, + }; +} + +type ChartPeriodPickerProps = ChartPeriodFilterOptions & { + options?: ReadonlyArray<{ label: string; value: number }>; +}; + +export function ChartPeriodPicker({ + defaultDays = 30, + options = DEFAULT_PERIOD_OPTIONS, + paramKey = "chartPeriod", + disabled, +}: ChartPeriodPickerProps & { disabled?: boolean }) { + const { range, selectedRange, keys, setState } = useChartPeriodFilter({ defaultDays, paramKey }); + const [open, setOpen] = useState(false); + + const selectValue = useMemo(() => { + if (!range?.from || !range?.to) { + return "custom"; + } + + const diff = differenceInCalendarDays(range.to, range.from) + 1; + const match = options.find((option) => option.value === diff); + + return match ? String(match.value) : "custom"; + }, [options, range]); + + const handlePresetChange = (value: string) => { + if (value === "custom") { + return; + } + + const presetRange = createRangeFromDays(Number(value)); + + setState({ + [keys.fromKey]: presetRange.from ?? null, + [keys.toKey]: presetRange.to ?? null, + }); + }; + + const handleCalendarSelect = (value: DateRange | undefined) => { + if (value?.from && value?.to) { + setState({ + [keys.fromKey]: value.from, + [keys.toKey]: value.to, + }); + } else { + setState({ + [keys.fromKey]: null, + [keys.toKey]: null, + }); + } + }; + + const displayLabel = + formatDateRange(range) ?? + options.find((option) => String(option.value) === selectValue)?.label ?? + "Select range"; + + return ( + + + + + + + + + +
+ + +
+
+
+ ); +} + +type ChartLimitToggleProps = ChartLimitFilterOptions & { + options?: ReadonlyArray<{ label: string; value: number }>; +}; + +export function ChartLimitToggle({ + defaultValue = 10, + options = DEFAULT_LIMIT_OPTIONS, + paramKey = "chartLimit", +}: ChartLimitToggleProps) { + const { limit, setLimit } = useChartLimitFilter({ defaultValue, paramKey }); + + return ( + { + if (value) { + setLimit(Number(value)); + } + }} + type="single" + value={String(limit)} + variant="outline" + > + {options.map((option) => ( + + {option.label} + + ))} + + ); +} + +function formatDateRange(range?: { from?: DateInput; to?: DateInput }) { + if (!range?.from || !range?.to) return null; + + return `${format(range.from, "MMM d, yyyy")} - ${format(range.to, "MMM d, yyyy")}`; +} diff --git a/apps/dashboard/src/components/charts/source-publication-graph-chart.tsx b/apps/dashboard/src/components/charts/source-publication-graph-chart.tsx deleted file mode 100644 index aef2ac6..0000000 --- a/apps/dashboard/src/components/charts/source-publication-graph-chart.tsx +++ /dev/null @@ -1,109 +0,0 @@ -"use client"; - -import { - Card, - CardContent, - CardDescription, - CardHeader, - CardTitle, -} from "@basango/ui/components/card"; -import { - ChartConfig, - ChartContainer, - ChartTooltip, - ChartTooltipContent, -} from "@basango/ui/components/chart"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@basango/ui/components/select"; -import { useQuery } from "@tanstack/react-query"; -import * as React from "react"; -import { Bar, BarChart, CartesianGrid, XAxis } from "recharts"; - -import { useTRPC } from "#dashboard/trpc/client"; -import { formatDate } from "#dashboard/utils/utils"; - -const chartConfig = { - count: { - color: "var(--chart-2)", - label: "Articles", - }, - views: { - label: "Articles", - }, -} satisfies ChartConfig; - -type Props = { - sourceId: string; -}; - -export function SourcePublicationgGraphChart({ sourceId }: Props) { - const trpc = useTRPC(); - const [timeRange, setTimeRange] = React.useState("30"); - - const { data } = useQuery( - trpc.sources.getPublicationGraph.queryOptions({ - days: Number(timeRange), - id: sourceId, - }), - ); - - return ( - - -
- Publication Graph - - Showing total crawled articles for the last {timeRange} days - -
- -
- - - - - formatDate(value)} - tickLine={false} - tickMargin={8} - /> - } cursor={false} /> - - - - -
- ); -} diff --git a/apps/dashboard/src/components/charts/source-category-shares-chart.tsx b/apps/dashboard/src/components/charts/sources/category-shares-chart.tsx similarity index 63% rename from apps/dashboard/src/components/charts/source-category-shares-chart.tsx rename to apps/dashboard/src/components/charts/sources/category-shares-chart.tsx index e676074..b7a3dc6 100644 --- a/apps/dashboard/src/components/charts/source-category-shares-chart.tsx +++ b/apps/dashboard/src/components/charts/sources/category-shares-chart.tsx @@ -1,3 +1,4 @@ +// @ts-nocheck "use client"; import { @@ -7,11 +8,10 @@ import { CardHeader, CardTitle, } from "@basango/ui/components/card"; -import { ToggleGroup, ToggleGroupItem } from "@basango/ui/components/toggle-group"; import { useQuery } from "@tanstack/react-query"; -import { useState } from "react"; import { Bar, BarChart, Legend, ResponsiveContainer, XAxis, YAxis } from "recharts"; +import { ChartLimitToggle, useChartLimitFilter } from "#dashboard/components/charts/chart-filters"; import { useTRPC } from "#dashboard/trpc/client"; import { getColorFromName } from "#dashboard/utils/categories"; @@ -19,30 +19,24 @@ type Props = { sourceId: string; }; -export function SourceCategorySharesChart({ sourceId }: Props) { +export function CategorySharesChart({ sourceId }: Props) { const trpc = useTRPC(); - const [limit, setLimit] = useState(10); + const { limit } = useChartLimitFilter(); const { data } = useQuery( trpc.sources.getCategoryShares.queryOptions({ id: sourceId, - limit: limit, + limit, }), ); - const items = data?.items ?? []; const chartData = [ { name: "Total", - ...Object.fromEntries(items.map((item) => [item.category, item.count])), + ...Object.fromEntries(data?.items.map((item) => [item.category, item.count])), }, ]; - const barData = items.map((item) => ({ - fill: getColorFromName(item.category), - name: item.category, - })); - return ( @@ -50,17 +44,7 @@ export function SourceCategorySharesChart({ sourceId }: Props) { Category Shares showing top {limit} categories for this source
- setLimit(Number(v))} - type="single" - value={String(limit)} - variant="outline" - > - Top 10 - Top 20 - Top 50 - +
@@ -80,15 +64,15 @@ export function SourceCategorySharesChart({ sourceId }: Props) { /> - {barData.map((entry, index) => ( + {data?.items.map((entry, index) => ( diff --git a/apps/dashboard/src/components/charts/sources/publication-graph-chart.tsx b/apps/dashboard/src/components/charts/sources/publication-graph-chart.tsx new file mode 100644 index 0000000..4189366 --- /dev/null +++ b/apps/dashboard/src/components/charts/sources/publication-graph-chart.tsx @@ -0,0 +1,62 @@ +// @ts-nocheck +"use client"; + +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@basango/ui/components/card"; +import { ChartConfig, ChartContainer } from "@basango/ui/components/chart"; +import { useQuery } from "@tanstack/react-query"; + +import { AreaChart } from "#dashboard/components/charts/area-chart"; +import { + ChartPeriodPicker, + useChartPeriodFilter, +} from "#dashboard/components/charts/chart-filters"; +import { useTRPC } from "#dashboard/trpc/client"; + +const chartConfig = { + count: { + color: "var(--chart-2)", + label: "Articles", + }, + views: { + label: "Articles", + }, +} satisfies ChartConfig; + +type Props = { + sourceId: string; +}; + +export function PublicationGraphChart({ sourceId }: Props) { + const trpc = useTRPC(); + const period = useChartPeriodFilter(); + + const { data } = useQuery( + trpc.sources.getPublicationGraph.queryOptions({ + id: sourceId, + range: period.range, + }), + ); + + return ( + + +
+ Publication Graph + Showing total crawled articles for the selected period +
+ +
+ + + + + +
+ ); +} diff --git a/apps/dashboard/src/components/charts/status.tsx b/apps/dashboard/src/components/charts/status.tsx new file mode 100644 index 0000000..2c4e003 --- /dev/null +++ b/apps/dashboard/src/components/charts/status.tsx @@ -0,0 +1,38 @@ +import { Delta } from "@basango/domain/models"; +import { cn } from "@basango/ui/lib/utils"; +import { ArrowDownRightIcon, ArrowUpRightIcon } from "lucide-react"; + +import { formatNumber } from "#dashboard/utils/utils"; + +type StatusProps = { + value: Delta | undefined; + percentage?: boolean; + icons?: boolean; + sign?: boolean; +}; + +export function Status({ value, percentage = false, icons = true, sign = true }: StatusProps) { + if (value === undefined) { + return 0; + } + + const color = value.delta >= 0 ? "text-emerald-600" : "text-rose-600"; + const icon = + value.delta >= 0 ? ( + + ) : ( + + ); + + return ( + <> + {icons && icon} + + {sign && value.sign} + {percentage + ? `${formatNumber(Math.abs(value.percentage))}%` + : formatNumber(Math.abs(value.delta))} + + + ); +} diff --git a/apps/dashboard/src/components/forms/source-edit-form.tsx b/apps/dashboard/src/components/forms/source-edit-form.tsx new file mode 100644 index 0000000..f30e19e --- /dev/null +++ b/apps/dashboard/src/components/forms/source-edit-form.tsx @@ -0,0 +1,176 @@ +"use client"; + +import type { RouterOutputs } from "@basango/api/trpc/routers/_app"; +import { updateSourceSchema } from "@basango/domain/models/sources"; +import { + Field, + FieldDescription, + FieldError, + FieldGroup, + FieldLabel, +} from "@basango/ui/components/field"; +import { Input } from "@basango/ui/components/input"; +import { SubmitButton } from "@basango/ui/components/submit-button"; +import { Textarea } from "@basango/ui/components/textarea"; +import { useMutation, useQueryClient } from "@tanstack/react-query"; +import { useCallback, useEffect } from "react"; +import { Controller } from "react-hook-form"; +import { toast } from "sonner"; +import { z } from "zod"; + +import { useZodForm } from "#dashboard/hooks/use-zod-form"; +import { useTRPC } from "#dashboard/trpc/client"; + +const baseSchema = updateSourceSchema.pick({ + description: true, + displayName: true, + id: true, + name: true, +}); + +const sourceEditSchema = z.object({ + description: z + .string() + .optional() + .transform((value) => { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; + }) + .pipe(baseSchema.shape.description), + displayName: z + .string() + .optional() + .transform((value) => { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; + }) + .pipe(baseSchema.shape.displayName), + id: baseSchema.shape.id, + name: z.string().trim().pipe(baseSchema.shape.name), +}); + +type SourceEditValues = z.infer; + +type Props = { + source: RouterOutputs["sources"]["getById"]; +}; + +export function SourceEditForm({ source }: Props) { + const trpc = useTRPC(); + const queryClient = useQueryClient(); + + const form = useZodForm(sourceEditSchema, { + defaultValues: { + description: source.description ?? "", + displayName: source.displayName ?? "", + id: source.id, + name: source.name, + }, + mode: "onChange", + }); + + useEffect(() => { + form.reset({ + description: source.description ?? "", + displayName: source.displayName ?? "", + id: source.id, + name: source.name, + }); + }, [form, source.description, source.displayName, source.id, source.name]); + + const mutation = useMutation( + trpc.sources.update.mutationOptions({ + onError(error) { + toast.error(error.message ?? "Unable to update source."); + }, + onSuccess() { + toast.success("Source updated successfully."); + void Promise.all([ + queryClient.invalidateQueries({ + queryKey: trpc.sources.list.queryKey(), + }), + queryClient.invalidateQueries({ + queryKey: trpc.sources.getById.queryKey({ id: source.id }), + }), + ]); + }, + }), + ); + + const handleSubmit = useCallback( + (values: SourceEditValues) => { + mutation.mutate(values); + }, + [mutation], + ); + + return ( +
+ + ( + + Name + + Internal identifier of the source. + {fieldState.invalid && } + + )} + /> + + ( + + Display name + + Optional friendly label shown in the dashboard. + {fieldState.invalid && } + + )} + /> + + ( + + Description +