diff --git a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/layout.tsx b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/layout.tsx index 9db22c3..2adffb3 100644 --- a/apps/dashboard/src/app/[locale]/(app)/(sidebar)/layout.tsx +++ b/apps/dashboard/src/app/[locale]/(app)/(sidebar)/layout.tsx @@ -1,4 +1,4 @@ -import { SidebarInset, SidebarProvider } from "@basango/ui/components/sidebar"; +import { SidebarProvider } from "@basango/ui/components/sidebar"; import { AppSidebar } from "#dashboard/components/sidebar/app-sidebar"; import { HydrateClient } from "#dashboard/trpc/server"; @@ -9,7 +9,7 @@ export default async function Layout({ children }: { children: React.ReactNode } - {children} + {children} ); diff --git a/apps/dashboard/src/components/shell/page-header.tsx b/apps/dashboard/src/components/shell/page-header.tsx index 3846cd3..5206cd6 100644 --- a/apps/dashboard/src/components/shell/page-header.tsx +++ b/apps/dashboard/src/components/shell/page-header.tsx @@ -4,11 +4,9 @@ import { BreadcrumbList, BreadcrumbPage, } from "@basango/ui/components/breadcrumb"; -import { Separator } from "@basango/ui/components/separator"; import { SidebarTrigger } from "@basango/ui/components/sidebar"; -import { Show } from "#dashboard/components/shell/show"; -import { ThemeToggle } from "#dashboard/components/theme-toggle"; +import { ThemeToggle } from "../theme-toggle"; type Props = { title?: string | React.ReactNode; @@ -16,20 +14,16 @@ type Props = { export function PageHeader({ title }: Props) { return ( -
+
- - - - - - - {title} - - - - + + + + {title} + + +
diff --git a/apps/dashboard/src/components/shell/page-layout.tsx b/apps/dashboard/src/components/shell/page-layout.tsx index 110b519..fed1e03 100644 --- a/apps/dashboard/src/components/shell/page-layout.tsx +++ b/apps/dashboard/src/components/shell/page-layout.tsx @@ -11,9 +11,13 @@ interface PageProps { export const PageLayout = (props: React.PropsWithChildren) => { const { title, header = , children } = props; return ( -
- {header} - {children} +
+
+ {header} +
+ {children} +
+
); }; diff --git a/bun.lock b/bun.lock index 7f0b81c..0a16672 100644 --- a/bun.lock +++ b/bun.lock @@ -143,10 +143,13 @@ "packages/db": { "name": "@basango/db", "dependencies": { + "@ai-sdk/google": "^2.0.44", + "@ai-sdk/openai": "^2.0.75", "@basango/domain": "workspace:*", "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", "@date-fns/utc": "^2.1.1", + "ai": "^5.0.105", "date-fns": "catalog:", "drizzle-orm": "^0.44.7", "mysql2": "^3.15.3", @@ -257,6 +260,16 @@ "packages": { "@0no-co/graphql.web": ["@0no-co/graphql.web@1.2.0", "", { "peerDependencies": { "graphql": "^14.0.0 || ^15.0.0 || ^16.0.0" }, "optionalPeers": ["graphql"] }, "sha512-/1iHy9TTr63gE1YcR5idjx8UREz1s0kFhydf3bBLCXyqjhkIc6igAzTOx3zPifCwFR87tsh/4Pa9cNts6d2otw=="], + "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.17", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-oVAG6q72KsjKlrYdLhWjRO7rcqAR8CjokAbYuyVZoCO4Uh2PH/VzZoxZav71w2ipwlXhHCNaInGYWNs889MMDA=="], + + "@ai-sdk/google": ["@ai-sdk/google@2.0.44", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-c5dck36FjqiVoeeMJQLTEmUheoURcGTU/nBT6iJu8/nZiKFT/y8pD85KMDRB7RerRYaaQOtslR2d6/5PditiRw=="], + + "@ai-sdk/openai": ["@ai-sdk/openai@2.0.75", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ThDHg1+Jes7S0AOXa01EyLBSzZiZwzB5do9vAlufNkoiRHGTH1BmoShrCyci/TUsg4ky1HwbK4hPK+Z0isiE6g=="], + + "@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], + + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.18", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ypv1xXMsgGcNKUP+hglKqtdDuMg68nWHucPPAhIENrbFAI+xCHiqPVN8Zllxyv1TNZwGWUghPxJXU+Mqps0YRQ=="], + "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], "@asteasolutions/zod-to-openapi": ["@asteasolutions/zod-to-openapi@8.1.0", "", { "dependencies": { "openapi3-ts": "^4.1.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-tQFxVs05J/6QXXqIzj6rTRk3nj1HFs4pe+uThwE95jL5II2JfpVXkK+CqkO7aT0Do5AYqO6LDrKpleLUFXgY+g=="], @@ -1083,6 +1096,8 @@ "@urql/exchange-retry": ["@urql/exchange-retry@1.3.2", "", { "dependencies": { "@urql/core": "^5.1.2", "wonka": "^6.3.2" } }, "sha512-TQMCz2pFJMfpNxmSfX1VSfTjwUIFx/mL+p1bnfM1xjjdla7Z+KnGMW/EhFbpckp3LyWAH4PgOsMwOMnIN+MBFg=="], + "@vercel/oidc": ["@vercel/oidc@3.0.5", "", {}, "sha512-fnYhv671l+eTTp48gB4zEsTW/YtRgRPnkI2nT7x6qw5rkI1Lq2hTmQIpHPgyThI0znLK+vX2n9XxKdXZ7BUbbw=="], + "@xmldom/xmldom": ["@xmldom/xmldom@0.8.11", "", {}, "sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw=="], "JSONStream": ["JSONStream@1.3.5", "", { "dependencies": { "jsonparse": "^1.2.0", "through": ">=2.2.7 <3" }, "bin": { "JSONStream": "./bin.js" } }, "sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ=="], @@ -1099,6 +1114,8 @@ "aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], + "ai": ["ai@5.0.105", "", { "dependencies": { "@ai-sdk/gateway": "2.0.17", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-waQZAvv44KYzys6S3l25ti2jcSuJnkyWFTliSKy3swASL6w6ttPxJTm80d+v9sLWoIxrqE3OwhTJbweNp065fg=="], + "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], "anser": ["anser@1.4.10", "", {}, "sha512-hCv9AqTQ8ycjpSd3upOJd7vFwW1JaoYQ7tpham03GJ1ca8/65rqn0RpaWpItOAd6ylW9wAw6luXYPJIyPFVOww=="], @@ -1457,6 +1474,8 @@ "eventemitter3": ["eventemitter3@5.0.1", "", {}, "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA=="], + "eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="], + "exec-async": ["exec-async@2.2.0", "", {}, "sha512-87OpwcEiMia/DeiKFzaQNBNFeN3XkkpYIh9FyOqq5mS2oKv3CBE67PXoEKcr6nodWdXNogTiQ0jE2NGuoffXPw=="], "execa": ["execa@5.1.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^6.0.0", "human-signals": "^2.1.0", "is-stream": "^2.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^4.0.1", "onetime": "^5.1.2", "signal-exit": "^3.0.3", "strip-final-newline": "^2.0.0" } }, "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg=="], @@ -1763,6 +1782,8 @@ "json-parse-even-better-errors": ["json-parse-even-better-errors@2.3.1", "", {}, "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w=="], + "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], + "json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], diff --git a/packages/db/migrations/0003_categories.sql b/packages/db/migrations/0003_categories.sql new file mode 100644 index 0000000..515d6ba --- /dev/null +++ b/packages/db/migrations/0003_categories.sql @@ -0,0 +1,20 @@ +CREATE TABLE "category" ( + "candidates" text[] NOT NULL, + "created_at" timestamp DEFAULT now() NOT NULL, + "description" varchar(512), + "embeddings" jsonb, + "id" uuid PRIMARY KEY NOT NULL, + "name" varchar(255) NOT NULL, + "slug" varchar(255) NOT NULL, + "updated_at" timestamp, + "weight" integer DEFAULT 0 NOT NULL +); +--> statement-breakpoint +ALTER TABLE "article" ADD COLUMN "category_id" uuid;--> statement-breakpoint +ALTER TABLE "article" ADD COLUMN "clustered" boolean DEFAULT false NOT NULL;--> statement-breakpoint +CREATE UNIQUE INDEX "unq_category_name" ON "category" USING btree (lower((name)::text));--> statement-breakpoint +CREATE UNIQUE INDEX "unq_category_slug" ON "category" USING btree (lower((slug)::text));--> statement-breakpoint +CREATE INDEX "idx_category_weight" ON "category" USING btree ("weight");--> statement-breakpoint +ALTER TABLE "article" ADD CONSTRAINT "fk_article_category_id" FOREIGN KEY ("category_id") REFERENCES "public"."category"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint +CREATE INDEX "idx_article_category_id" ON "article" USING btree ("category_id");--> statement-breakpoint +CREATE INDEX "idx_article_clustered" ON "article" USING btree ("clustered"); \ No newline at end of file diff --git a/packages/db/migrations/meta/0003_snapshot.json b/packages/db/migrations/meta/0003_snapshot.json new file mode 100644 index 0000000..fcf1cfe --- /dev/null +++ b/packages/db/migrations/meta/0003_snapshot.json @@ -0,0 +1,1420 @@ +{ + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + }, + "dialect": "postgresql", + "enums": {}, + "id": "9a185b15-4baf-4994-b431-a76884342fa7", + "policies": {}, + "prevId": "842ee997-0c79-4705-8027-02fa9d803227", + "roles": {}, + "schemas": {}, + "sequences": {}, + "tables": { + "public.article": { + "checkConstraints": {}, + "columns": { + "body": { + "name": "body", + "notNull": true, + "primaryKey": false, + "type": "text" + }, + "categories": { + "name": "categories", + "notNull": false, + "primaryKey": false, + "type": "text[]" + }, + "category_id": { + "name": "category_id", + "notNull": false, + "primaryKey": false, + "type": "uuid" + }, + "clustered": { + "default": false, + "name": "clustered", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "crawled_at": { + "default": "now()", + "name": "crawled_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "credibility": { + "name": "credibility", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "excerpt": { + "generated": { + "as": "(\"left\"(body, 200) || '...'::text)", + "type": "stored" + }, + "name": "excerpt", + "notNull": false, + "primaryKey": false, + "type": "varchar(255)" + }, + "hash": { + "name": "hash", + "notNull": true, + "primaryKey": false, + "type": "varchar(32)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "image": { + "generated": { + "as": "(metadata ->> 'image'::text)", + "type": "stored" + }, + "name": "image", + "notNull": false, + "primaryKey": false, + "type": "varchar(1024)" + }, + "link": { + "name": "link", + "notNull": true, + "primaryKey": false, + "type": "varchar(1024)" + }, + "metadata": { + "name": "metadata", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "published_at": { + "name": "published_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "reading_time": { + "default": 1, + "name": "reading_time", + "notNull": false, + "primaryKey": false, + "type": "integer" + }, + "sentiment": { + "name": "sentiment", + "notNull": true, + "primaryKey": false, + "type": "sentiment", + "typeSchema": "public" + }, + "source_id": { + "name": "source_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "title": { + "name": "title", + "notNull": true, + "primaryKey": false, + "type": "varchar(1024)" + }, + "token_statistics": { + "name": "token_statistics", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "tsv": { + "generated": { + "as": "setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::\"char\")", + "type": "stored" + }, + "name": "tsv", + "notNull": false, + "primaryKey": false, + "type": "tsvector" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_article_category_id": { + "columnsFrom": ["category_id"], + "columnsTo": ["id"], + "name": "fk_article_category_id", + "onDelete": "set null", + "onUpdate": "no action", + "tableFrom": "article", + "tableTo": "category" + }, + "fk_article_source_id": { + "columnsFrom": ["source_id"], + "columnsTo": ["id"], + "name": "fk_article_source_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "article", + "tableTo": "source" + } + }, + "indexes": { + "gin_article_categories": { + "columns": [ + { + "asc": true, + "expression": "categories", + "isExpression": false, + "nulls": "last", + "opclass": "array_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_categories", + "with": {} + }, + "gin_article_link_trgm": { + "columns": [ + { + "asc": true, + "expression": "link", + "isExpression": false, + "nulls": "last", + "opclass": "gin_trgm_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_link_trgm", + "with": {} + }, + "gin_article_title_trgm": { + "columns": [ + { + "asc": true, + "expression": "title", + "isExpression": false, + "nulls": "last", + "opclass": "gin_trgm_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_title_trgm", + "with": {} + }, + "gin_article_tsv": { + "columns": [ + { + "asc": true, + "expression": "tsv", + "isExpression": false, + "nulls": "last", + "opclass": "tsvector_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_tsv", + "with": {} + }, + "idx_article_category_id": { + "columns": [ + { + "asc": true, + "expression": "category_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_article_category_id", + "with": {} + }, + "idx_article_clustered": { + "columns": [ + { + "asc": true, + "expression": "clustered", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_article_clustered", + "with": {} + }, + "idx_article_source_published_id": { + "columns": [ + { + "asc": true, + "expression": "source_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "published_at", + "isExpression": false, + "nulls": "first" + }, + { + "asc": false, + "expression": "id", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_article_source_published_id", + "with": {} + }, + "unq_article_hash": { + "columns": [ + { + "asc": true, + "expression": "hash", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_article_hash", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "article", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.bookmark": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "description": { + "name": "description", + "notNull": false, + "primaryKey": false, + "type": "varchar(512)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "is_public": { + "default": false, + "name": "is_public", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_bookmark_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_bookmark_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "bookmark", + "tableTo": "user" + } + }, + "indexes": { + "idx_bookmark_user_created": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_bookmark_user_created", + "with": {} + }, + "unq_bookmark_user_name": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "lower(\"name\")", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_bookmark_user_name", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "bookmark", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.bookmark_article": { + "checkConstraints": {}, + "columns": { + "article_id": { + "name": "article_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "bookmark_id": { + "name": "bookmark_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": { + "bookmark_article_pkey": { + "columns": ["bookmark_id", "article_id"], + "name": "bookmark_article_pkey" + } + }, + "foreignKeys": { + "fk_bookmark_article_article_id": { + "columnsFrom": ["article_id"], + "columnsTo": ["id"], + "name": "fk_bookmark_article_article_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "bookmark_article", + "tableTo": "article" + }, + "fk_bookmark_article_bookmark_id": { + "columnsFrom": ["bookmark_id"], + "columnsTo": ["id"], + "name": "fk_bookmark_article_bookmark_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "bookmark_article", + "tableTo": "bookmark" + } + }, + "indexes": { + "idx_bookmark_article_bookmark_id": { + "columns": [ + { + "asc": true, + "expression": "bookmark_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_bookmark_article_bookmark_id", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "bookmark_article", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.category": { + "checkConstraints": {}, + "columns": { + "candidates": { + "name": "candidates", + "notNull": true, + "primaryKey": false, + "type": "text[]" + }, + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "description": { + "name": "description", + "notNull": false, + "primaryKey": false, + "type": "varchar(512)" + }, + "embeddings": { + "name": "embeddings", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "slug": { + "name": "slug", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + }, + "weight": { + "default": 0, + "name": "weight", + "notNull": true, + "primaryKey": false, + "type": "integer" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "idx_category_weight": { + "columns": [ + { + "asc": true, + "expression": "weight", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_category_weight", + "with": {} + }, + "unq_category_name": { + "columns": [ + { + "asc": true, + "expression": "lower((name)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_category_name", + "with": {} + }, + "unq_category_slug": { + "columns": [ + { + "asc": true, + "expression": "lower((slug)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_category_slug", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "category", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.comment": { + "checkConstraints": {}, + "columns": { + "article_id": { + "name": "article_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "content": { + "name": "content", + "notNull": true, + "primaryKey": false, + "type": "varchar(512)" + }, + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "is_spam": { + "default": false, + "name": "is_spam", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "sentiment": { + "name": "sentiment", + "notNull": true, + "primaryKey": false, + "type": "sentiment", + "typeSchema": "public" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_comment_article_id": { + "columnsFrom": ["article_id"], + "columnsTo": ["id"], + "name": "fk_comment_article_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "comment", + "tableTo": "article" + }, + "fk_comment_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_comment_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "comment", + "tableTo": "user" + } + }, + "indexes": { + "idx_comment_article_created": { + "columns": [ + { + "asc": true, + "expression": "article_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_comment_article_created", + "with": {} + }, + "idx_comment_article_id": { + "columns": [ + { + "asc": true, + "expression": "article_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_comment_article_id", + "with": {} + }, + "idx_comment_user_id": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_comment_user_id", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "comment", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.followed_source": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "follower_id": { + "name": "follower_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "source_id": { + "name": "source_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_followed_source_follower_id": { + "columnsFrom": ["follower_id"], + "columnsTo": ["id"], + "name": "fk_followed_source_follower_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "followed_source", + "tableTo": "user" + }, + "fk_followed_source_source_id": { + "columnsFrom": ["source_id"], + "columnsTo": ["id"], + "name": "fk_followed_source_source_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "followed_source", + "tableTo": "source" + } + }, + "indexes": { + "idx_followed_source_follower_created": { + "columns": [ + { + "asc": true, + "expression": "follower_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_followed_source_follower_created", + "with": {} + }, + "idx_followed_source_follower_id": { + "columns": [ + { + "asc": true, + "expression": "follower_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_followed_source_follower_id", + "with": {} + }, + "idx_followed_source_source_id": { + "columns": [ + { + "asc": true, + "expression": "source_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_followed_source_source_id", + "with": {} + }, + "unq_followed_source_user_source": { + "columns": [ + { + "asc": true, + "expression": "follower_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "source_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_followed_source_user_source", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "followed_source", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.login_attempt": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_login_attempt_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_login_attempt_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "login_attempt", + "tableTo": "user" + } + }, + "indexes": { + "idx_login_attempt_user_created": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_login_attempt_user_created", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "login_attempt", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.login_history": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "device": { + "name": "device", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "ip_address": { + "name": "ip_address", + "notNull": false, + "primaryKey": false, + "type": "inet" + }, + "location": { + "name": "location", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_login_history_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_login_history_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "login_history", + "tableTo": "user" + } + }, + "indexes": { + "idx_login_history_ip_address": { + "columns": [ + { + "asc": true, + "expression": "ip_address", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_login_history_ip_address", + "with": {} + }, + "idx_login_history_user_created": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_login_history_user_created", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "login_history", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.refresh_token": { + "checkConstraints": {}, + "columns": { + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "token": { + "name": "token", + "notNull": true, + "primaryKey": false, + "type": "varchar(128)" + }, + "username": { + "name": "username", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "valid": { + "name": "valid", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "idx_refresh_token_username": { + "columns": [ + { + "asc": true, + "expression": "lower(\"username\")", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_refresh_token_username", + "with": {} + }, + "idx_refresh_token_valid": { + "columns": [ + { + "asc": true, + "expression": "valid", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_refresh_token_valid", + "with": {} + }, + "uniq_refresh_token_token": { + "columns": [ + { + "asc": true, + "expression": "token", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "uniq_refresh_token_token", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "refresh_token", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.source": { + "checkConstraints": {}, + "columns": { + "credibility": { + "name": "credibility", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "description": { + "name": "description", + "notNull": false, + "primaryKey": false, + "type": "varchar(1024)" + }, + "display_name": { + "name": "display_name", + "notNull": false, + "primaryKey": false, + "type": "varchar(255)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + }, + "url": { + "name": "url", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "unq_source_name": { + "columns": [ + { + "asc": true, + "expression": "lower((name)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_source_name", + "with": {} + }, + "unq_source_url": { + "columns": [ + { + "asc": true, + "expression": "lower((url)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_source_url", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "source", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.user": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "email": { + "name": "email", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "is_confirmed": { + "default": false, + "name": "is_confirmed", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "is_locked": { + "default": false, + "name": "is_locked", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "password": { + "name": "password", + "notNull": true, + "primaryKey": false, + "type": "varchar(512)" + }, + "roles": { + "default": "'{\"ROLE_USER\"}'", + "name": "roles", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)[]" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "idx_user_created_at": { + "columns": [ + { + "asc": true, + "expression": "created_at", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_user_created_at", + "with": {} + }, + "unq_user_email": { + "columns": [ + { + "asc": true, + "expression": "lower((email)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_user_email", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "user", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.verification_token": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "purpose": { + "name": "purpose", + "notNull": true, + "primaryKey": false, + "type": "token_purpose", + "typeSchema": "public" + }, + "token": { + "name": "token", + "notNull": false, + "primaryKey": false, + "type": "varchar(60)" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_verification_token_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_verification_token_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "verification_token", + "tableTo": "user" + } + }, + "indexes": { + "idx_verif_token_created_at": { + "columns": [ + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_verif_token_created_at", + "with": {} + }, + "unq_verif_token_token": { + "columns": [ + { + "asc": true, + "expression": "token", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_verif_token_token", + "where": "\"verification_token\".\"token\" IS NOT NULL", + "with": {} + }, + "unq_verif_user_purpose_token": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "purpose", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "token", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_verif_user_purpose_token", + "where": "\"verification_token\".\"token\" IS NOT NULL", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "verification_token", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + } + }, + "version": "7", + "views": {} +} diff --git a/packages/db/migrations/meta/_journal.json b/packages/db/migrations/meta/_journal.json index 71c1f92..5808a28 100644 --- a/packages/db/migrations/meta/_journal.json +++ b/packages/db/migrations/meta/_journal.json @@ -21,6 +21,13 @@ "tag": "0002_modern_joseph", "version": "7", "when": 1763920009482 + }, + { + "breakpoints": true, + "idx": 3, + "tag": "0003_categories", + "version": "7", + "when": 1764767993880 } ], "version": "7" diff --git a/packages/db/package.json b/packages/db/package.json index 45808a0..f377b82 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -1,9 +1,12 @@ { "dependencies": { + "@ai-sdk/google": "^2.0.44", + "@ai-sdk/openai": "^2.0.75", "@basango/domain": "workspace:*", "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", "@date-fns/utc": "^2.1.1", + "ai": "^5.0.105", "date-fns": "catalog:", "drizzle-orm": "^0.44.7", "mysql2": "^3.15.3", @@ -31,6 +34,7 @@ "private": true, "scripts": { "clean": "rm -rf .turbo node_modules", + "sync:categories": "bun ./src/synchronizers/categories.ts", "sync:data": "bun ./src/synchronizers/data.ts", "sync:tokens": "bun ./src/synchronizers/tokens.ts", "typecheck": "tsc --noEmit" diff --git a/packages/db/src/queries/articles.ts b/packages/db/src/queries/articles.ts index 30b11d6..8a3fe92 100644 --- a/packages/db/src/queries/articles.ts +++ b/packages/db/src/queries/articles.ts @@ -11,12 +11,12 @@ import { } from "@basango/domain/models"; import { md5 } from "@basango/encryption"; import type { SQL } from "drizzle-orm"; -import { count, desc, eq, getTableColumns, sql } from "drizzle-orm"; +import { count, desc, eq, getTableColumns, or, sql } from "drizzle-orm"; import * as uuid from "uuid"; import { Database } from "#db/client"; import { getSourceIdByName } from "#db/queries/sources"; -import { articles, sources } from "#db/schema"; +import { articles, categories, sources } from "#db/schema"; import { CreateArticleParams, GetArticlesParams } from "#db/types/articles"; import { GetDistributionsParams, GetPublicationsParams } from "#db/types/shared"; import { @@ -41,15 +41,17 @@ export async function createArticle(db: Database, params: CreateArticleParams) { }; } + const categoryList = params.categories ?? []; const data = { ...params, + categories: categoryList, hash: md5(params.link), readingTime: computeReadingTime(params.body), - sentiment: "neutral" as Sentiment, + sentiment: (params.sentiment ?? "neutral") as Sentiment, sourceId: await getSourceIdByName(db, params.sourceId), tokenStatistics: computeTokenStatistics({ body: params.body, - categories: params.categories, + categories: categoryList, title: params.title, }), }; @@ -103,7 +105,14 @@ function buildFilters(params: GetArticlesParams, pagination: PaginationState) { } if (params.category) { - filters.push(sql`${params.category} = ANY(${articles.categories})`); + const categoryFilter = or( + eq(categories.slug, params.category), + eq(articles.categoryId, params.category), + ); + + if (categoryFilter) { + filters.push(categoryFilter); + } } if (params.search?.trim()) { @@ -133,11 +142,15 @@ export async function getArticles(db: Database, params: GetArticlesParams) { const query = db .select({ ...getTableColumns(articles), + category: { + ...getTableColumns(categories), + }, source: { ...getTableColumns(sources), }, }) .from(articles) + .leftJoin(categories, eq(articles.categoryId, categories.id)) .innerJoin(sources, eq(articles.sourceId, sources.id)); const rows = await applyFilters(query, filters) diff --git a/packages/db/src/queries/sources.ts b/packages/db/src/queries/sources.ts index 30a388a..6e6fa2b 100644 --- a/packages/db/src/queries/sources.ts +++ b/packages/db/src/queries/sources.ts @@ -5,7 +5,7 @@ import * as uuid from "uuid"; import { Database } from "#db/client"; import { NotFoundError } from "#db/errors"; -import { articles, sources } from "#db/schema"; +import { articles, categories, sources } from "#db/schema"; import { CategoryShare, CategoryShares, @@ -144,17 +144,18 @@ export async function getSourceCategoryShares( ): Promise { const data = await db.execute(sql` SELECT - cat AS category, - COUNT(*)::int AS count, - ROUND((COUNT(*)::numeric / SUM(COUNT(*)) OVER ()) * 100, 2) AS percentage - FROM ( - SELECT NULLIF(BTRIM(c), '') AS cat - FROM ${articles} - CROSS JOIN LATERAL UNNEST(COALESCE(${articles.categories}, ARRAY[]::text[])) AS c - WHERE ${articles.sourceId} = ${params.id} - ) t - WHERE cat IS NOT NULL - GROUP BY cat + ${categories.id}::text AS "categoryId", + ${categories.slug} AS slug, + ${categories.name} AS category, + COUNT(${articles.id})::int AS count, + COALESCE( + ROUND((COUNT(*)::numeric / NULLIF(SUM(COUNT(*)) OVER (), 0)) * 100, 2), + 0 + )::float AS percentage + FROM ${articles} + JOIN ${categories} ON ${categories.id} = ${articles.categoryId} + WHERE ${articles.sourceId} = ${params.id} AND ${articles.clustered} = true + GROUP BY ${categories.id}, ${categories.slug}, ${categories.name} ORDER BY count DESC LIMIT ${params.limit ?? DEFAULT_CATEGORY_SHARES_LIMIT} `); diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index efa09c4..686d650 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -94,11 +94,33 @@ export const sources = pgTable( ], ); +export const categories = pgTable( + "category", + { + candidates: text().array().notNull(), + createdAt: timestamp("created_at").defaultNow().notNull(), + description: varchar({ length: 512 }), + embeddings: jsonb("embeddings").$type(), + id: uuid().primaryKey().notNull(), + name: varchar({ length: 255 }).notNull(), + slug: varchar({ length: 255 }).notNull(), + updatedAt: timestamp("updated_at"), + weight: integer().default(0).notNull(), + }, + (table) => [ + uniqueIndex("unq_category_name").using("btree", sql`lower((name)::text)`), + uniqueIndex("unq_category_slug").using("btree", sql`lower((slug)::text)`), + index("idx_category_weight").using("btree", table.weight.asc().nullsLast()), + ], +); + export const articles = pgTable( "article", { body: text().notNull(), categories: text().array(), + categoryId: uuid("category_id"), + clustered: boolean("clustered").default(false).notNull(), crawledAt: timestamp("crawled_at").defaultNow().notNull(), credibility: jsonb("credibility").$type(), excerpt: varchar({ length: 255 }).generatedAlwaysAs(sql`("left"(body, 200) || '...'::text)`), @@ -123,6 +145,8 @@ export const articles = pgTable( "gin", table.categories.asc().nullsLast().op("array_ops"), ), + index("idx_article_category_id").using("btree", table.categoryId.asc().nullsLast()), + index("idx_article_clustered").using("btree", table.clustered.asc().nullsLast()), index("gin_article_link_trgm").using("gin", table.link.asc().nullsLast().op("gin_trgm_ops")), index("gin_article_title_trgm").using("gin", table.title.asc().nullsLast().op("gin_trgm_ops")), index("gin_article_tsv").using("gin", table.tsv.asc().nullsLast().op("tsvector_ops")), @@ -133,6 +157,11 @@ export const articles = pgTable( table.id.desc().nullsFirst(), ), uniqueIndex("unq_article_hash").using("btree", table.hash.asc().nullsLast()), + foreignKey({ + columns: [table.categoryId], + foreignColumns: [categories.id], + name: "fk_article_category_id", + }).onDelete("set null"), foreignKey({ columns: [table.sourceId], foreignColumns: [sources.id], @@ -425,6 +454,10 @@ export const commentRelations = relations(comments, ({ one }) => ({ export const articleRelations = relations(articles, ({ one, many }) => ({ bookmarkArticles: many(bookmarkArticles), + category: one(categories, { + fields: [articles.categoryId], + references: [categories.id], + }), comments: many(comments), source: one(sources, { fields: [articles.sourceId], @@ -432,6 +465,10 @@ export const articleRelations = relations(articles, ({ one, many }) => ({ }), })); +export const categoryRelations = relations(categories, ({ many }) => ({ + articles: many(articles), +})); + export const bookmarkArticleRelations = relations(bookmarkArticles, ({ one }) => ({ article: one(articles, { fields: [bookmarkArticles.articleId], diff --git a/packages/db/src/services/category-classifier.ts b/packages/db/src/services/category-classifier.ts new file mode 100644 index 0000000..819ce3e --- /dev/null +++ b/packages/db/src/services/category-classifier.ts @@ -0,0 +1,218 @@ +import { logger } from "@basango/logger"; +import { desc, eq, inArray, sql } from "drizzle-orm"; + +import { Database } from "#db/client"; +import { articles, categories } from "#db/schema"; +import { DEFAULT_CATEGORY } from "#domain/constants"; +import { Categories } from "#domain/models"; + +type CategoryRow = typeof categories.$inferSelect; +type ArticleCategories = Pick; + +type CategoryScore = { + category: (typeof Categories)[number]; + matches: number; + score: number; +}; + +const BATCH_SIZE = 50_000; +const CATEGORY_MAP = new Map(Categories.map((category, index) => [category.slug, index])); +const CANDIDATE_MAP = buildCandidateMap(); +const FALLBACK_CATEGORY = Categories.find((category) => category.slug === DEFAULT_CATEGORY)!; + +export class CategoryClassifier { + constructor(private readonly db: Database) {} + + async classifyPendingArticles(limit: number = BATCH_SIZE) { + const canonical = await this.ensureCanonicalCategories(); + + if (canonical.size === 0) { + logger.warn("No canonical categories available for clustering"); + return { matched: 0, processed: 0, unmatched: 0 }; + } + + const pending = await this.db + .select({ + categories: articles.categories, + id: articles.id, + }) + .from(articles) + .where(eq(articles.clustered, false)) + .orderBy(desc(articles.publishedAt), desc(articles.id)) + .limit(limit); + + if (pending.length === 0) { + logger.info("No articles to cluster"); + return { matched: 0, processed: 0, unmatched: 0 }; + } + + let matched = 0; + let unmatched = 0; + + const fallbackRow = canonical.get(FALLBACK_CATEGORY.slug); + + for (const article of pending) { + const best = classifyCategory(article); + + const targetRow = canonical.get(best.category.slug) ?? fallbackRow; + + await this.db + .update(articles) + .set({ + categoryId: targetRow?.id ?? null, + clustered: true, + updatedAt: sql`now()`, + }) + .where(eq(articles.id, article.id)); + + if (targetRow) { + matched++; + logger.debug( + { + articleId: article.id, + category: best.category.slug, + matches: best.matches, + score: best.score, + }, + "Clustered article", + ); + } else { + unmatched++; + logger.debug({ articleId: article.id }, "No category match found"); + } + } + + const processed = pending.length; + logger.info({ matched, processed, unmatched }, "Category clustering run completed"); + return { matched, processed, unmatched }; + } + + private async ensureCanonicalCategories(): Promise> { + const payload = Categories.map( + (category) => + ({ + candidates: category.candidates, + description: category.description ?? null, + embeddings: null, + id: category.id, + name: category.name, + slug: category.slug, + weight: category.weight, + }) satisfies typeof categories.$inferInsert, + ); + + await this.db.insert(categories).values(payload).onConflictDoNothing(); + + const existing = await this.db.query.categories.findMany({ + where: inArray( + categories.slug, + Categories.map((category) => category.slug), + ), + }); + + const map = new Map(); + + for (const row of existing) { + map.set(row.slug, row); + } + + if (!map.has(FALLBACK_CATEGORY.slug)) { + logger.warn("Fallback main category is missing from canonical categories"); + } + + return map; + } +} + +function classifyCategory(article: ArticleCategories): CategoryScore { + const rawCategories = article.categories ?? []; + const normalizedCategories = Array.from( + new Set( + rawCategories + .map((value) => normalizeCategory(value)) + .filter((value): value is string => Boolean(value)), + ), + ); + + const scores = new Map(); + + for (const normalized of normalizedCategories) { + const categories = CANDIDATE_MAP.get(normalized); + if (!categories) continue; + + for (const category of categories) { + const current = + scores.get(category.slug) ?? + ({ + category, + matches: 0, + score: 0, + } satisfies CategoryScore); + + current.matches += 1; + current.score += category.weight; + scores.set(category.slug, current); + } + } + + if (scores.size === 0) { + return { category: FALLBACK_CATEGORY, matches: 0, score: 0 }; + } + + const [first, ...rest] = Array.from(scores.values()); + + const best = rest.reduce((winner, candidate) => { + if (candidate.score !== winner.score) { + return candidate.score > winner.score ? candidate : winner; + } + + if (candidate.category.weight !== winner.category.weight) { + return candidate.category.weight > winner.category.weight ? candidate : winner; + } + + if (candidate.matches !== winner.matches) { + return candidate.matches > winner.matches ? candidate : winner; + } + + const winnerOrder = CATEGORY_MAP.get(winner.category.slug) ?? Number.MAX_SAFE_INTEGER; + const candidateOrder = CATEGORY_MAP.get(candidate.category.slug) ?? Number.MAX_SAFE_INTEGER; + + return candidateOrder < winnerOrder ? candidate : winner; + }, first ?? { category: FALLBACK_CATEGORY, matches: 0, score: 0 }); + + return best; +} + +function buildCandidateMap(): Map { + const map = new Map(); + + for (const category of Categories) { + for (const candidate of category.candidates) { + const normalized = normalizeCategory(candidate); + if (!normalized) continue; + + const existing = map.get(normalized) ?? []; + if (!existing.some((item) => item.slug === category.slug)) { + existing.push(category); + } + map.set(normalized, existing); + } + } + + return map; +} + +export function normalizeCategory(value?: string | null): string | null { + const trimmed = value?.trim(); + if (!trimmed) return null; + + const normalized = trimmed + .normalize("NFD") + .replace(/\p{Diacritic}/gu, "") + .toLowerCase() + .replace(/[^a-z0-9]+/g, " ") + .trim() + .replace(/\s+/g, " "); + + return normalized.length > 0 ? normalized : null; +} diff --git a/packages/db/src/synchronizers/categories.ts b/packages/db/src/synchronizers/categories.ts new file mode 100644 index 0000000..7484088 --- /dev/null +++ b/packages/db/src/synchronizers/categories.ts @@ -0,0 +1,18 @@ +#!/usr/bin/env bun + +import { logger } from "@basango/logger"; + +import { connectDb } from "#db/client"; +import { CategoryClassifier } from "#db/services/category-classifier.js"; + +async function main() { + const db = await connectDb(); + const service = new CategoryClassifier(db); + + await service.classifyPendingArticles(); +} + +main().catch((error) => { + logger.error({ error }, "Category clustering failed"); + process.exit(1); +}); diff --git a/packages/db/src/types/articles.ts b/packages/db/src/types/articles.ts index e7532b5..d50aba1 100644 --- a/packages/db/src/types/articles.ts +++ b/packages/db/src/types/articles.ts @@ -1,9 +1,11 @@ import { ArticleMetadata, ID, Sentiment, TokenStatistics } from "@basango/domain/models"; export type CreateArticleParams = { + categoryId?: string | null; + clustered?: boolean; title: string; body: string; - categories: string[]; + categories?: string[]; link: string; sourceId: string; publishedAt: Date; diff --git a/packages/db/src/types/shared.ts b/packages/db/src/types/shared.ts index 8b739ff..6092a5a 100644 --- a/packages/db/src/types/shared.ts +++ b/packages/db/src/types/shared.ts @@ -1,9 +1,11 @@ import { DateRange, ID } from "@basango/domain/models"; export type CategoryShare = { + categoryId: string; category: string; count: number; percentage: number; + slug: string; }; export type CategoryShares = { diff --git a/packages/db/src/utils/computed.ts b/packages/db/src/utils/computed.ts index 8ddcb2f..2124090 100644 --- a/packages/db/src/utils/computed.ts +++ b/packages/db/src/utils/computed.ts @@ -28,21 +28,20 @@ export const computeTokenCount = ( export const computeTokenStatistics = (data: { title: string; body: string; - categories: string[]; + categories?: string[]; }): TokenStatistics => { - const [title, body, categories, excerpt] = [ - computeTokenCount(data.title), - computeTokenCount(data.body), - computeTokenCount(data.categories.join(",")), - computeTokenCount(data.body.substring(0, 200)), - ]; + const normalizedCategories = data.categories ?? []; + const titleTokens = computeTokenCount(data.title); + const bodyTokens = computeTokenCount(data.body); + const categoryTokens = computeTokenCount(normalizedCategories.join(",")); + const excerptTokens = computeTokenCount(data.body.substring(0, 200)); return { - body, - categories, - excerpt, - title, - total: title + body + categories + excerpt, + body: bodyTokens, + categories: categoryTokens, + excerpt: excerptTokens, + title: titleTokens, + total: titleTokens + bodyTokens + categoryTokens + excerptTokens, }; }; diff --git a/packages/domain/src/constants.ts b/packages/domain/src/constants.ts index 76f0111..bc6293f 100644 --- a/packages/domain/src/constants.ts +++ b/packages/domain/src/constants.ts @@ -32,3 +32,4 @@ export const DEFAULT_TOKEN_AUDIENCE = "basango_dashboard"; export const DEFAULT_TOKEN_ISSUER = "basango_api"; export const DEFAULT_ACCESS_TOKEN_TTL = "35m"; export const DEFAULT_REFRESH_TOKEN_TTL = "7d"; +export const DEFAULT_CATEGORY = "divers-autres"; diff --git a/packages/domain/src/models/articles.ts b/packages/domain/src/models/articles.ts index b197937..b96aaab 100644 --- a/packages/domain/src/models/articles.ts +++ b/packages/domain/src/models/articles.ts @@ -1,5 +1,6 @@ import z from "zod"; +import { categorySchema } from "./categories"; import { idSchema, sentimentSchema } from "./shared"; import { sourceSchema } from "./sources"; @@ -24,7 +25,10 @@ export const tokenStatisticsSchema = z.object({ export const articleSchema = z.object({ body: z.string().min(1), - categories: z.array(z.string()), + categories: z.array(z.string()).optional().default([]), + category: categorySchema.optional(), + categoryId: idSchema.optional(), + clustered: z.boolean().default(false), createdAt: z.coerce.date(), excerpt: z.string().optional(), hash: z.string().min(1), diff --git a/packages/domain/src/models/categories.ts b/packages/domain/src/models/categories.ts new file mode 100644 index 0000000..3f957c1 --- /dev/null +++ b/packages/domain/src/models/categories.ts @@ -0,0 +1,296 @@ +import z from "zod"; + +import { idSchema } from "./shared"; + +export const categorySchema = z.object({ + candidates: z.array(z.string()), + createdAt: z.coerce.date(), + description: z.string().max(512).optional(), + embeddings: z.array(z.number()).optional(), + id: idSchema, + name: z.string().min(1).max(255), + slug: z.string().min(1).max(255), + updatedAt: z.coerce.date().optional(), + weight: z.number().int(), +}); + +export type Category = z.infer; + +export const Categories: Category[] = [ + { + candidates: [ + "accident", + "actualite", + "actualité", + "a-la-une", + "en bref", + "en-clair", + "encontinu", + "flash", + "faits-divers", + "drame", + "enquetes", + "desintox", + "archives", + "insolite", + "national", + "featured", + "related-featured", + "top-featured", + "top-trending", + "news-dont-miss", + "news-just-in", + "la-rdc-a-la-une", + "example-1", + "example-2", + "example-3", + "example-4", + "beto-premium", + "fiches", + "suspension", + ], + createdAt: new Date(), + description: "Nouvelles de dernière minute, faits divers et informations générales.", + id: "06930299-71a3-735e-9dcd-055c321f2ca9", + name: "Actualités & Faits Divers", + slug: "actualites-faits-divers", + weight: 4, + }, + { + candidates: [ + "democratie", + "dialogue entre congolais", + "diplomatie", + "diplomatie-et-securite", + "election", + "élections", + "elections-2023", + "legislatives", + "presidentielle", + "parlement", + "politique", + "serment", + "si j'étais président", + "spécial elections", + "us-politics", + "ukraine-conflict", + "conférence des nations unies", + "nations unies", + "rebellion", + ], + createdAt: new Date(), + description: "Élections, gouvernance, institutions, diplomatie et conflits politiques.", + id: "06930299-71a3-7aa5-95a4-a7b39c421255", + name: "Politique & Gouvernement", + slug: "politique-gouvernement", + weight: 10, + }, + { + candidates: [ + "agrobusiness", + "banking", + "banques-et-finances", + "economico", + "economie", + "économie", + "finances", + "industrie", + "investments", + "mines", + "pme-entrepreneuriat", + "featured-economy", + "featured-markets", + "intl-markets", + "us-business", + "la-une-eco", + "emploi", + ], + createdAt: new Date(), + description: "Affaires, marchés financiers, entreprises, banques, emplois et entrepreneuriat.", + id: "06930299-71a3-7c5b-98b0-d58c8308496d", + name: "Économie & Finances", + slug: "economie-finances", + weight: 9, + }, + { + candidates: [ + "arts", + "culture", + "musique", + "livre", + "livres", + "patrimoine-traditions", + "identité culturelle", + "caricature", + "histoire", + ], + createdAt: new Date(), + description: "Art, musique, patrimoine, histoire, littérature et expression culturelle.", + id: "06930299-71a3-7d47-8df2-b201975437f4", + name: "Culture & Arts", + slug: "culture-arts", + weight: 2, + }, + { + candidates: ["sport", "sports", "football", "boxe", "can", "okapi sports"], + createdAt: new Date(), + description: "Compétitions sportives nationales et internationales, analyses et résultats.", + id: "06930299-71a3-7e65-9421-b418c8a161b7", + name: "Sports", + slug: "sports", + weight: 5, + }, + { + candidates: [ + "famille-genre", + "femme", + "jeunes", + "justice", + "criminalite", + "arrestation", + "kidnapping", + "viol", + "vol", + "manifestation", + "marche", + "salubrite", + "denonciation", + "evasion", + "sante", + "santé", + "necrologie", + "education", + "éducation", + "enseignement", + "religion", + "religion-spiritualite", + "message-des-voeux", + "style et beauté", + "societe", + "société", + ], + createdAt: new Date(), + description: "Questions sociales, éducation, santé, justice, genre et vie quotidienne.", + id: "06930299-71a3-7f8b-b5a3-413f512ec6d8", + name: "Société & Vie Quotidienne", + slug: "societe-vie-quotidienne", + weight: 6, + }, + { + candidates: [ + "climat-et-environnement", + "developpement-durable", + "biodiversite", + "ecologico", + "environnement", + "nature", + "eau", + "electricite", + "energie", + "inondation", + "science & env.", + "sciences", + "technologie", + "technologie-innovation", + "mc geek !", + "sur le net", + ], + createdAt: new Date(), + description: + "Recherche scientifique, innovation technologique, climat, environnement et énergie.", + id: "06930299-71a4-7096-8a7f-d69920882d95", + name: "Sciences, Technologies & Environnement", + slug: "sciences-technologies-environnement", + weight: 7, + }, + { + candidates: [ + "afrique", + "congo-brazzaville", + "congolais de l'étranger", + "diaspora", + "euro-zone", + "se-asia", + "middle-east", + "monde", + "world-news", + "grands-lacs", + "bandundu", + "bukavu", + "bunia", + "ituri", + "katanga", + "kinshasa", + "maniema", + "mbujimayi", + "provinces", + "info kin", + "tourisme", + "transport", + "route", + "infrastructures", + "ukraine-conflict", + ], + createdAt: new Date(), + description: "Actualités internationales, régions du monde et provinces locales.", + id: "06930299-71a4-724a-8975-ea7d21286c22", + name: "International & Régions", + slug: "international-regions", + weight: 8, + }, + { + candidates: [ + "analyses", + "opinion", + "opinions", + "tribune", + "grand-angle", + "grande interview", + "le débat", + "lettre-ouverte", + "l'invité de la campagne", + "l'invité du jour", + "émissions", + "magazine", + "magazine un", + "medias", + "communication", + "communications", + "parole aux auditeurs", + "parole d'enfant", + "revue de presse", + "tele-medias", + "multimedia", + "tv", + ], + createdAt: new Date(), + description: "Chroniques, analyses, tribunes, programmes et contenus médiatiques.", + id: "06930299-71a4-745b-8813-6bca9c6b3c56", + name: "Opinions & Médias", + slug: "opinions-medias", + weight: 3, + }, + { + candidates: [ + "beto-premium", + "example-1", + "example-2", + "example-3", + "example-4", + "fiches", + "publicite", + "okapi service", + "petro-chem-example-3", + "sans catégorie", + "uncategorized", + "lefonde", + "jdc", + ], + createdAt: new Date(), + description: "Rubriques expérimentales, catégories indéterminées et éléments divers.", + id: "06930299-71a4-756a-948b-e4a244b5887e", + name: "Divers & Autres", + slug: "divers-autres", + weight: 1, + }, +]; diff --git a/packages/domain/src/models/index.ts b/packages/domain/src/models/index.ts index cbd73c1..9cf9079 100644 --- a/packages/domain/src/models/index.ts +++ b/packages/domain/src/models/index.ts @@ -1,5 +1,6 @@ export * from "./articles"; export * from "./auth"; +export * from "./categories"; export * from "./crawler"; export * from "./reports"; export * from "./shared";