diff --git a/.docker/nginx/Dockerfile b/.docker/nginx/Dockerfile deleted file mode 100644 index 4520133..0000000 --- a/.docker/nginx/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM nginx:1.27.1-alpine - -COPY default.conf /etc/nginx/conf.d/default.conf - diff --git a/.docker/nginx/default.conf b/.docker/nginx/default.conf deleted file mode 100644 index 933ce34..0000000 --- a/.docker/nginx/default.conf +++ /dev/null @@ -1,37 +0,0 @@ -server { - listen 80; - server_name localhost; - root /var/www/public; - - add_header X-Frame-Options "SAMEORIGIN"; - add_header X-XSS-Protection "1; mode=block"; - add_header X-Content-Type-Options "nosniff"; - - index index.html index.htm index.php; - - charset utf-8; - - location / { - root /var/www/; - try_files /public/$uri /public/$uri /assets/$uri /index.php?$query_string; - } - - location = /favicon.ico { access_log off; log_not_found off; } - location = /robots.txt { access_log off; log_not_found off; } - - error_page 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 421 422 423 424 425 426 428 429 431 451 500 501 502 503 504 505 506 507 508 510 511 /error.html; - - location ~ \.php$ { - fastcgi_pass php:9000; - fastcgi_index index.php; - fastcgi_param SCRIPT_FILENAME $realpath_root$fastcgi_script_name; - include fastcgi_params; - fastcgi_buffers 16 16k; - fastcgi_buffer_size 32k; - } - - location ~ /\.(?!well-known).* { - deny all; - } -} - diff --git a/.docker/php/Dockerfile b/.docker/php/Dockerfile deleted file mode 100644 index 1c40bb1..0000000 --- a/.docker/php/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM php:8.4-fpm-alpine - -# Install dependencies -RUN apk --no-cache add curl git wget bash dpkg - -# Add PHP extensions -ADD https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions /usr/local/bin/ -RUN chmod +x /usr/local/bin/install-php-extensions - -RUN install-php-extensions opcache iconv soap -RUN install-php-extensions zip intl fileinfo -RUN install-php-extensions pdo redis mysqli pdo_mysql -RUN install-php-extensions gd -RUN install-php-extensions pgsql pdo_pgsql - -# Composer -RUN curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/bin/ --filename=composer - -WORKDIR /var/www - diff --git a/.env b/.env new file mode 100644 index 0000000..5ec6e5b --- /dev/null +++ b/.env @@ -0,0 +1,40 @@ +# api +BASANGO_API_HOST=localhost +BASANGO_API_PORT=3080 +BASANGO_API_ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000 +BASANGO_API_KEY=your_api_key_here +BASANGO_API_CRAWLER_TOKEN=dev +BASANGO_API_CRAWLER_ENDPOINT="http://localhost:3080/articles" +BASANGO_API_JWT_SECRET=your_jwt_secret_here + +# db +BASANGO_DATABASE_URL="postgresql://postgres:postgres@localhost:5432/app?serverVersion=16&charset=utf8" +BASANGO_DATABASE_LEGACY_HOST="localhost" +BASANGO_DATABASE_LEGACY_PASSWORD="root" +BASANGO_DATABASE_LEGACY_NAME="app" +BASANGO_DATABASE_LEGACY_USER="root" +BASANGO_DATABASE_LEGACY_PORT=3306 + +# logger +BASANGO_LOGGER_LEVEL=debug + +# crawler +BASANGO_CRAWLER_ROOT_PATH= +BASANGO_CRAWLER_DATA_PATH= +BASANGO_CRAWLER_LOGS_PATH= +BASANGO_CRAWLER_CONFIG_PATH= + +BASANGO_CRAWLER_UPDATE_DIRECTION=forward +BASANGO_CRAWLER_FETCH_USER_AGENT="Basango/0.1 (+https://github.com/bernard-ng/basango)" +BASANGO_CRAWLER_FETCH_MAX_RETRIES=3 +BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER=true + +BASANGO_CRAWLER_ASYNC_REDIS_URL="redis://localhost:6379/0" +BASANGO_CRAWLER_ASYNC_TTL_RESULT=3600 +BASANGO_CRAWLER_ASYNC_TTL_FAILURE=3600 +BASANGO_CRAWLER_ASYNC_QUEUE_LISTING="listing" +BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS="details" +BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING="processing" + +# encryption +BASANGO_ENCRYPTION_KEY=testkey diff --git a/Makefile b/Makefile index 2921bb7..09508ac 100644 --- a/Makefile +++ b/Makefile @@ -6,39 +6,35 @@ help: @echo Tasks: @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) -# ----------------------------------- -# Variables -# ----------------------------------- -BUN ?= bun -BUNX ?= bunx -PM2 ?= pm2 -PWD := $(shell pwd) -DRIZZLE_CONFIG ?= packages/db/drizzle.config.ts - # ----------------------------------- # Deployment # ----------------------------------- .PHONY: deploy deploy: - $(BUN) install --frozen-lockfile. # Install dependencies - $(BUN) run build:dashboard # Build dashboard app - cd packages/db # Change directory to packages/db - $(BUNX) drizzle-kit migrate # Run database migrations - cd $(PWD) # Change back to root directory - $(PM2) reload ecosystem.config.js --env production # Reload PM2 processes + ~/.bun/bin/bun install --frozen-lockfile. + ~/.bun/bin/bun run build:database + ~/.bun/bin/bun run migrate + pm2 reload ecosystem.config.js --env production - -# ----------------------------------- +# ----------------------------------- # PM2 Commands # ----------------------------------- .PHONY: start start: - $(PM2) start ecosystem.config.js --env production + pm2 start ecosystem.config.js --env production .PHONY: restart restart: - $(PM2) reload ecosystem.config.js --env production + pm2 reload ecosystem.config.js --env production .PHONY: stop stop: - $(PM2) stop ecosystem.config.js --env production + pm2 stop ecosystem.config.js --env production + +.PHONY: logs +logs: + pm2 logs --lines 100 --env production + +.PHONY: monit +monit: + pm2 monit --env production diff --git a/apps/api/.env b/apps/api/.env deleted file mode 100644 index bc898b8..0000000 --- a/apps/api/.env +++ /dev/null @@ -1,7 +0,0 @@ -NODE_ENV=development -BASANGO_API_HOST=localhost -BASANGO_API_PORT=3080 -BASANGO_API_ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000 -BASANGO_API_KEY=your_api_key_here -BASANGO_CRAWLER_TOKEN=dev -BASANGO_JWT_SECRET=your_jwt_secret_here diff --git a/apps/api/config/cors.json b/apps/api/config/cors.json deleted file mode 100644 index 8be0c02..0000000 --- a/apps/api/config/cors.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "cors": { - "allowedHeaders": [ - "Authorization", - "Content-Type", - "accept-language", - "x-trpc-source", - "x-user-locale", - "x-user-timezone", - "x-user-country" - ], - "allowMethods": ["GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"], - "exposeHeaders": ["Content-Length"], - "maxAge": 86400 - } -} diff --git a/apps/api/config/server.json b/apps/api/config/server.json deleted file mode 100644 index 5a46462..0000000 --- a/apps/api/config/server.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "server": { - "host": "%env(BASANGO_API_HOST)%", - "port": "%env(number:BASANGO_API_PORT)%", - "version": "1.0.0" - } -} diff --git a/apps/api/package.json b/apps/api/package.json index b47c36b..a0c7e5d 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -4,13 +4,10 @@ "@basango/domain": "workspace:*", "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", - "@devscast/config": "catalog:", "@hono/node-server": "^1.19.6", "@hono/trpc-server": "^0.4.0", "@hono/zod-openapi": "^1.1.4", - "@scalar/hono-api-reference": "^0.9.24", "@trpc/server": "^11.7.1", - "ai": "^5.0.89", "camelcase-keys": "^10.0.1", "date-fns": "catalog:", "hono-rate-limiter": "^0.4.2", diff --git a/apps/api/src/config.ts b/apps/api/src/config.ts deleted file mode 100644 index f2352ca..0000000 --- a/apps/api/src/config.ts +++ /dev/null @@ -1,45 +0,0 @@ -import path from "node:path"; - -import { loadConfig as defineConfig } from "@devscast/config"; -import { z } from "zod"; - -export const PROJECT_DIR = path.resolve(__dirname, "../"); - -const ServerConfigurationSchema = z.object({ - cors: z.object({ - allowedHeaders: z.array(z.string()).optional(), - allowMethods: z.array(z.string()).optional(), - exposeHeaders: z.array(z.string()).optional(), - maxAge: z.number().int().min(0).optional(), - origin: z - .array(z.string()) - .optional() - .default(["http://localhost:3000", "http://127.0.0.1:3000", "https://dashboard.basango.io"]), - }), - server: z.object({ - host: z.string().default("localhost"), - port: z.number().int().min(1).max(65535).default(4000), - version: z.string().default("1.0.0"), - }), -}); - -export const { env, config } = defineConfig({ - env: { - knownKeys: [ - "BASANGO_API_HOST", - "BASANGO_API_PORT", - "BASANGO_API_ALLOWED_ORIGINS", - "BASANGO_API_KEY", - "BASANGO_CRAWLER_TOKEN", - "BASANGO_JWT_SECRET", - ], - path: path.join(PROJECT_DIR, ".env"), - }, - schema: ServerConfigurationSchema, - sources: [ - path.join(PROJECT_DIR, "config", "server.json"), - path.join(PROJECT_DIR, "config", "cors.json"), - ], -}); - -export type ServerConfiguration = z.infer; diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 0b695a1..6fd599e 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -1,11 +1,10 @@ +import { config } from "@basango/domain/config"; import { trpcServer } from "@hono/trpc-server"; import { OpenAPIHono } from "@hono/zod-openapi"; -import { Scalar } from "@scalar/hono-api-reference"; import { cors } from "hono/cors"; import { logger } from "hono/logger"; import { secureHeaders } from "hono/secure-headers"; -import { config, env } from "#api/config"; import { routers } from "#api/rest/routers"; import { createTRPCContext } from "#api/trpc/init"; import { appRouter } from "#api/trpc/routers/_app"; @@ -18,11 +17,11 @@ app.use(secureHeaders()); app.use( "*", cors({ - allowHeaders: config.cors.allowedHeaders, - allowMethods: config.cors.allowMethods, - exposeHeaders: config.cors.exposeHeaders, - maxAge: config.cors.maxAge, - origin: ["http://localhost:3000", "http://127.0.0.1:3000", "https://dashboard.basango.io"], + allowHeaders: config.api.cors.allowedHeaders, + allowMethods: config.api.cors.allowMethods, + exposeHeaders: config.api.cors.exposeHeaders, + maxAge: config.api.cors.maxAge, + origin: config.api.cors.origin, }), ); @@ -34,49 +33,10 @@ app.use( }), ); -app.doc("/openapi", { - info: { - contact: { - email: "engineering@basango.io", - name: "Basango", - url: "https://basango.io", - }, - description: "Basango is a platform that leverages AI to revolutionize news curation.", - license: { - name: "AGPL-3.0 license", - url: "https://github.com/bernard-ng/basango/blob/main/LICENSE", - }, - title: "Basango API", - version: "0.0.1", - }, - openapi: "3.1.0", - security: [ - { - oauth2: [], - }, - { token: [] }, - ], - servers: [ - { - description: "Production API", - url: "https://api.basango.io", - }, - ], -}); - -// Register security scheme -app.openAPIRegistry.registerComponent("securitySchemes", "token", { - description: "Default authentication mechanism", - scheme: "bearer", - type: "http", - "x-speakeasy-example": env("BASANGO_API_KEY"), -}); - -app.get("/", Scalar({ pageTitle: "Basango API", theme: "saturn", url: "/openapi" })); app.route("/", routers); export default { fetch: app.fetch, - hostname: config.server.host, - port: config.server.port, + hostname: config.api.server.host, + port: config.api.server.port, }; diff --git a/apps/api/src/rest/middlewares/crawler.ts b/apps/api/src/rest/middlewares/crawler.ts index e8551ee..c865949 100644 --- a/apps/api/src/rest/middlewares/crawler.ts +++ b/apps/api/src/rest/middlewares/crawler.ts @@ -1,8 +1,7 @@ +import { config } from "@basango/domain/config"; import type { MiddlewareHandler } from "hono"; import { HTTPException } from "hono/http-exception"; -import { env } from "#api/config"; - export const withCrawlerAuth: MiddlewareHandler = async (c, next) => { const token = c.req.header("Authorization"); @@ -10,7 +9,7 @@ export const withCrawlerAuth: MiddlewareHandler = async (c, next) => { throw new HTTPException(401, { message: "Authorization header required" }); } - if (token !== env("BASANGO_CRAWLER_TOKEN")) { + if (token !== config.api.security.crawlerToken) { throw new HTTPException(403, { message: "Invalid token" }); } diff --git a/apps/api/src/trpc/routers/auth.ts b/apps/api/src/trpc/routers/auth.ts index e92646b..6d69bad 100644 --- a/apps/api/src/trpc/routers/auth.ts +++ b/apps/api/src/trpc/routers/auth.ts @@ -13,7 +13,7 @@ export const authRouter = createTRPCRouter({ if (!user || user.isLocked) { throw new TRPCError({ code: "UNAUTHORIZED", - message: "Invalid credentials.", + message: "Account is locked", }); } diff --git a/apps/api/src/utils/auth.ts b/apps/api/src/utils/auth.ts index 3474bcb..7155aa7 100644 --- a/apps/api/src/utils/auth.ts +++ b/apps/api/src/utils/auth.ts @@ -1,15 +1,8 @@ import { Database } from "@basango/db/client"; import { getUserById } from "@basango/db/queries"; -import { - DEFAULT_ACCESS_TOKEN_TTL, - DEFAULT_REFRESH_TOKEN_TTL, - DEFAULT_TOKEN_AUDIENCE, - DEFAULT_TOKEN_ISSUER, -} from "@basango/domain/constants"; +import { config } from "@basango/domain/config"; import { type JWTPayload, SignJWT, jwtVerify } from "jose"; -import { env } from "#api/config"; - export type Session = { user: { id: string; @@ -39,7 +32,7 @@ export type SessionTokens = { const encoder = new TextEncoder(); function getSecretKey() { - return encoder.encode(env("BASANGO_JWT_SECRET")); + return encoder.encode(config.api.security.jwtSecret); } export async function getSession(db: Database, accessToken?: string): Promise { @@ -74,24 +67,24 @@ async function createToken(session: Session, tokenType: TokenType, expiresIn: st }) .setProtectedHeader({ alg: "HS256" }) .setIssuedAt() - .setAudience(DEFAULT_TOKEN_AUDIENCE) - .setIssuer(DEFAULT_TOKEN_ISSUER) + .setAudience(config.api.security.audience) + .setIssuer(config.api.security.issuer) .setExpirationTime(expiresIn) .sign(getSecretKey()); } export async function createSessionTokens(session: Session): Promise { const [accessToken, refreshToken] = await Promise.all([ - createToken(session, "access", DEFAULT_ACCESS_TOKEN_TTL), - createToken(session, "refresh", DEFAULT_REFRESH_TOKEN_TTL), + createToken(session, "access", config.api.security.accessTokenTtl), + createToken(session, "refresh", config.api.security.refreshTokenTtl), ]); const issuedAt = Date.now(); const accessTokenExpiresAt = new Date( - issuedAt + formatTTL(DEFAULT_ACCESS_TOKEN_TTL), + issuedAt + formatTTL(config.api.security.accessTokenTtl), ).toISOString(); const refreshTokenExpiresAt = new Date( - issuedAt + formatTTL(DEFAULT_REFRESH_TOKEN_TTL), + issuedAt + formatTTL(config.api.security.refreshTokenTtl), ).toISOString(); return { @@ -118,8 +111,8 @@ async function verifyToken( try { const { payload } = await jwtVerify(token, getSecretKey(), { - audience: DEFAULT_TOKEN_AUDIENCE, - issuer: DEFAULT_TOKEN_ISSUER, + audience: config.api.security.audience, + issuer: config.api.security.issuer, }); if (payload.tokenType !== expectedType) { diff --git a/apps/crawler/.env b/apps/crawler/.env deleted file mode 100644 index e69875d..0000000 --- a/apps/crawler/.env +++ /dev/null @@ -1,21 +0,0 @@ -# paths -BASANGO_CRAWLER_ROOT_PATH= -BASANGO_CRAWLER_DATA_PATH= -BASANGO_CRAWLER_LOGS_PATH= -BASANGO_CRAWLER_CONFIG_PATH= - -# crawler settings -BASANGO_CRAWLER_UPDATE_DIRECTION=forward -BASANGO_CRAWLER_FETCH_USER_AGENT="Basango/0.1 (+https://github.com/bernard-ng/basango)" -BASANGO_CRAWLER_FETCH_MAX_RETRIES=3 -BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER=true - -BASANGO_CRAWLER_ASYNC_REDIS_URL="redis://localhost:6379/0" -BASANGO_CRAWLER_ASYNC_TTL_RESULT=3600 -BASANGO_CRAWLER_ASYNC_TTL_FAILURE=3600 -BASANGO_CRAWLER_ASYNC_QUEUE_LISTING="listing" -BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS="details" -BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING="processing" - -BASANGO_CRAWLER_TOKEN="dev" -BASANGO_CRAWLER_BACKEND_API_ENDPOINT="http://localhost:3080/articles" diff --git a/apps/crawler/config/pipeline.json b/apps/crawler/config/pipeline.json deleted file mode 100644 index bf2753b..0000000 --- a/apps/crawler/config/pipeline.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "fetch": { - "async": { - "prefix": "basango:crawler", - "queues": { - "details": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS)%", - "listing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_LISTING)%", - "processing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING)%" - }, - "redisUrl": "%env(BASANGO_CRAWLER_ASYNC_REDIS_URL)%", - "ttl": { - "default": 600, - "failure": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_FAILURE)%", - "result": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_RESULT)%" - } - }, - "client": { - "backoffInitial": 1, - "backoffMax": 30, - "backoffMultiplier": 2, - "followRedirects": true, - "maxRetries": "%env(number:BASANGO_CRAWLER_FETCH_MAX_RETRIES)%", - "respectRetryAfter": "%env(boolean:BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER)%", - "rotate": true, - "timeout": 20, - "userAgent": "%env(BASANGO_CRAWLER_FETCH_USER_AGENT)%", - "verifySsl": true - }, - "crawler": { - "direction": "%env(BASANGO_CRAWLER_UPDATE_DIRECTION)%", - "maxWorkers": 5, - "notify": false, - "useMultiThreading": false - } - }, - "paths": { - "config": "%env(BASANGO_CRAWLER_CONFIG_PATH)%", - "data": "%env(BASANGO_CRAWLER_DATA_PATH)%", - "root": "%env(BASANGO_CRAWLER_ROOT_PATH)%" - } -} diff --git a/apps/crawler/config/sources.json b/apps/crawler/config/sources.json deleted file mode 100644 index 6d838d0..0000000 --- a/apps/crawler/config/sources.json +++ /dev/null @@ -1,210 +0,0 @@ -{ - "sources": { - "html": [ - { - "paginationTemplate": "actualite", - "requiresDetails": true, - "requiresRateLimit": false, - "sourceDate": {}, - "sourceId": "radiookapi.net", - "sourceKind": "html", - "sourceSelectors": { - "articleBody": ".field-name-body", - "articleCategories": ".views-field-field-cat-gorie a", - "articleDate": "head > meta[property=\"article:published_time\"]", - "articleLink": ".views-field-title a", - "articles": ".view-content > .views-row.content-row", - "articleTitle": "h1.page-header", - "pagination": "ul.pagination > li.pager-last > a" - }, - "sourceUrl": "https://www.radiookapi.net", - "supportsCategories": false - }, - { - "categories": ["politique", "economie", "culture", "sport", "societe"], - "paginationTemplate": "index.php/category/{category}", - "requiresDetails": true, - "requiresRateLimit": false, - "sourceDate": {}, - "sourceId": "7sur7.cd", - "sourceKind": "html", - "sourceSelectors": { - "articleBody": "div[property=\"schema:text\"].field.field--name-body", - "articleDate": "head > meta[property=\"article:published_time\"]", - "articleLink": ".views-field-title a", - "articles": ".view-content > .row.views-row", - "articleTitle": ".views-field-title a", - "pagination": "ul.pagination > li.pager__item.pager__item--last > a" - }, - "sourceUrl": "https://7sur7.cd", - "supportsCategories": true - }, - { - "paginationTemplate": "articles.html", - "requiresDetails": true, - "requiresRateLimit": false, - "sourceDate": { - "format": "dd.MM.yyyy" - }, - "sourceId": "mediacongo.net", - "sourceKind": "html", - "sourceSelectors": { - "articleBody": ".article_ttext", - "articleCategories": "a.color_link", - "articleDate": ".article_other_about", - "articleLink": "a:first-child", - "articles": ".for_aitems > .article_other_item", - "articleTitle": "h1", - "pagination": "div.pagination > div > a:last-child" - }, - "sourceUrl": "https://www.mediacongo.net", - "supportsCategories": false - }, - { - "paginationTemplate": "actualite", - "requiresDetails": true, - "requiresRateLimit": false, - "sourceDate": {}, - "sourceId": "actualite.cd", - "sourceKind": "html", - "sourceSelectors": { - "articleBody": ".views-field.views-field-body .field-content", - "articleCategories": "#actu-cat", - "articleDate": "head > meta[property=\"article:published_time\"]", - "articleLink": "#actu-titre a", - "articles": "#views-bootstrap-taxonomy-term-page-2 > div > div", - "articleTitle": "h1.page-title" - }, - "sourceUrl": "https://actualite.cd", - "supportsCategories": false - } - ], - "wordpress": [ - { - "requiresRateLimit": true, - "sourceId": "beto.cd", - "sourceKind": "wordpress", - "sourceUrl": "https://beto.cd" - }, - { "sourceId": "newscd.net", "sourceKind": "wordpress", "sourceUrl": "https://newscd.net" }, - { - "sourceId": "africanewsrdc.net", - "sourceKind": "wordpress", - "sourceUrl": "https://www.africanewsrdc.net" - }, - { - "sourceId": "angazainstitute.ac.cd", - "sourceKind": "wordpress", - "sourceUrl": "https://angazainstitute.ac.cd" - }, - { "sourceId": "b-onetv.cd", "sourceKind": "wordpress", "sourceUrl": "https://b-onetv.cd" }, - { - "sourceId": "bukavufm.com", - "sourceKind": "wordpress", - "sourceUrl": "https://bukavufm.com" - }, - { - "sourceId": "changement7.net", - "sourceKind": "wordpress", - "sourceUrl": "https://changement7.net" - }, - { - "sourceId": "congoactu.net", - "sourceKind": "wordpress", - "sourceUrl": "https://congoactu.net" - }, - { - "sourceId": "congoindependant.com", - "sourceKind": "wordpress", - "sourceUrl": "https://www.congoindependant.com" - }, - { - "sourceId": "congoquotidien.com", - "sourceKind": "wordpress", - "sourceUrl": "https://www.congoquotidien.com" - }, - { - "sourceId": "cumulard.cd", - "sourceKind": "wordpress", - "sourceUrl": "https://www.cumulard.cd" - }, - { - "sourceId": "environews-rdc.net", - "sourceKind": "wordpress", - "sourceUrl": "https://environews-rdc.net" - }, - { - "sourceId": "freemediardc.info", - "sourceKind": "wordpress", - "sourceUrl": "https://www.freemediardc.info" - }, - { - "sourceId": "geopolismagazine.org", - "sourceKind": "wordpress", - "sourceUrl": "https://geopolismagazine.org" - }, - { - "sourceId": "habarirdc.net", - "sourceKind": "wordpress", - "sourceUrl": "https://habarirdc.net" - }, - { "sourceId": "infordc.com", "sourceKind": "wordpress", "sourceUrl": "https://infordc.com" }, - { - "sourceId": "kilalopress.net", - "sourceKind": "wordpress", - "sourceUrl": "https://kilalopress.net" - }, - { - "sourceId": "laprosperiteonline.net", - "sourceKind": "wordpress", - "sourceUrl": "https://laprosperiteonline.net" - }, - { - "sourceId": "laprunellerdc.cd", - "sourceKind": "wordpress", - "sourceUrl": "https://laprunellerdc.cd" - }, - { - "sourceId": "lesmedias.net", - "sourceKind": "wordpress", - "sourceUrl": "https://lesmedias.net" - }, - { - "sourceId": "lesvolcansnews.net", - "sourceKind": "wordpress", - "sourceUrl": "https://lesvolcansnews.net" - }, - { - "sourceId": "netic-news.net", - "sourceKind": "wordpress", - "sourceUrl": "https://www.netic-news.net" - }, - { - "sourceId": "objectif-infos.cd", - "sourceKind": "wordpress", - "sourceUrl": "https://objectif-infos.cd" - }, - { - "sourceId": "scooprdc.net", - "sourceKind": "wordpress", - "sourceUrl": "https://scooprdc.net" - }, - { - "sourceId": "journaldekinshasa.com", - "sourceKind": "wordpress", - "sourceUrl": "https://www.journaldekinshasa.com" - }, - { - "sourceId": "lepotentiel.cd", - "sourceKind": "wordpress", - "sourceUrl": "https://lepotentiel.cd" - }, - { "sourceId": "acturdc.com", "sourceKind": "wordpress", "sourceUrl": "https://acturdc.com" }, - { - "sourceId": "matininfos.net", - "sourceKind": "wordpress", - "sourceUrl": "https://matininfos.net" - } - ] - } -} diff --git a/apps/crawler/src/config.ts b/apps/crawler/src/config.ts deleted file mode 100644 index cf80ca7..0000000 --- a/apps/crawler/src/config.ts +++ /dev/null @@ -1,81 +0,0 @@ -import path from "node:path"; - -import { - HtmlSourceConfigSchema, - PageRangeSchema, - TimestampRangeSchema, - UpdateDirectionSchema, - WordPressSourceConfigSchema, -} from "@basango/domain/crawler"; -import { loadConfig as defineConfig } from "@devscast/config"; -import { z } from "zod"; - -export const PROJECT_DIR = path.resolve(__dirname, "../"); - -export const PipelineConfigSchema = z.object({ - fetch: z.object({ - async: z.object({ - prefix: z.string().default("basango:crawler:queue"), - queues: z.object({ - details: z.string().default("details"), - listing: z.string().default("listing"), - processing: z.string().default("processing"), - }), - redisUrl: z.string().default("redis://localhost:6379/0"), - ttl: z.object({ - default: z.number().int().positive().default(600), - failure: z.number().int().nonnegative().default(3600), - result: z.number().int().nonnegative().default(3600), - }), - }), - client: z.object({ - backoffInitial: z.number().nonnegative().default(1), - backoffMax: z.number().nonnegative().default(30), - backoffMultiplier: z.number().positive().default(2), - followRedirects: z.boolean().default(true), - maxRetries: z.number().int().nonnegative().default(3), - respectRetryAfter: z.boolean().default(true), - rotate: z.boolean().default(true), - timeout: z.number().positive().default(20), - userAgent: z.string().default("Basango/0.1 (+https://github.com/bernard-ng/basango)"), - verifySsl: z.boolean().default(true), - }), - crawler: z.object({ - category: z.string().optional(), - dateRange: TimestampRangeSchema.optional(), - direction: UpdateDirectionSchema.default("forward"), - isUpdate: z.boolean().default(false), - maxWorkers: z.number().int().positive().default(5), - notify: z.boolean().default(false), - pageRange: PageRangeSchema.optional(), - source: z.union([HtmlSourceConfigSchema, WordPressSourceConfigSchema]).optional(), - useMultiThreading: z.boolean().default(false), - }), - }), - paths: z.object({ - config: z.string().default(path.join(PROJECT_DIR, "config")), - data: z.string().default(path.join(PROJECT_DIR, "data", "datasets")), - root: z.string().default(PROJECT_DIR), - }), - sources: z.object({ - html: z.array(HtmlSourceConfigSchema).default([]), - wordpress: z.array(WordPressSourceConfigSchema).default([]), - }), -}); - -export const { config, env } = defineConfig({ - cwd: process.cwd(), - env: { - path: path.join(PROJECT_DIR, ".env"), - }, - schema: PipelineConfigSchema, - sources: [ - path.join(PROJECT_DIR, "config", "pipeline.json"), - path.join(PROJECT_DIR, "config", "sources.json"), - ], -}); - -export type PipelineConfig = z.infer; -export type FetchClientConfig = PipelineConfig["fetch"]["client"]; -export type FetchCrawlerConfig = PipelineConfig["fetch"]["crawler"]; -export type FetchAsyncConfig = PipelineConfig["fetch"]["async"]; diff --git a/apps/crawler/src/http/http-client.ts b/apps/crawler/src/http/http-client.ts index b90ebf5..99d2f9f 100644 --- a/apps/crawler/src/http/http-client.ts +++ b/apps/crawler/src/http/http-client.ts @@ -1,12 +1,12 @@ import { setTimeout as delay } from "node:timers/promises"; +import type { CrawlerHttpOptions } from "@basango/domain/config"; import { DEFAULT_RETRY_AFTER_HEADER, DEFAULT_TRANSIENT_HTTP_STATUSES, DEFAULT_USER_AGENT, } from "@basango/domain/constants"; -import { FetchClientConfig } from "#crawler/config"; import { UserAgents } from "#crawler/http/user-agent"; export type HttpHeaders = Record; @@ -71,7 +71,7 @@ const buildUrl = (url: string, params?: HttpParams): string => { * @param config - Fetch client configuration * @param attempt - Current attempt number */ -const computeBackoff = (config: FetchClientConfig, attempt: number): number => { +const computeBackoff = (config: CrawlerHttpOptions, attempt: number): number => { const base = Math.min( config.backoffInitial * config.backoffMultiplier ** attempt, config.backoffMax, @@ -101,26 +101,26 @@ const parseRetryAfter = (header: string): number => { * @author Bernard Ngandu */ export class BaseHttpClient { - protected readonly config: FetchClientConfig; + protected readonly options: CrawlerHttpOptions; protected readonly fetchImpl: typeof fetch; protected readonly sleep: (ms: number) => Promise; protected readonly headers: HttpHeaders; - constructor(config: FetchClientConfig, options: HttpClientOptions = {}) { - this.config = config; + constructor(options: CrawlerHttpOptions, clientOptions: HttpClientOptions = {}) { + this.options = options; const provider = - options.userAgentProvider ?? - new UserAgents(config.rotate, config.userAgent ?? DEFAULT_USER_AGENT); - const userAgent = provider.get() ?? config.userAgent ?? DEFAULT_USER_AGENT; + clientOptions.userAgentProvider ?? + new UserAgents(options.rotate, options.userAgent ?? DEFAULT_USER_AGENT); + const userAgent = provider.get() ?? options.userAgent ?? DEFAULT_USER_AGENT; const baseHeaders: HttpHeaders = { "User-Agent": userAgent }; - if (options.defaultHeaders) { - Object.assign(baseHeaders, options.defaultHeaders); + if (clientOptions.defaultHeaders) { + Object.assign(baseHeaders, clientOptions.defaultHeaders); } this.headers = baseHeaders; - this.fetchImpl = options.fetchImpl ?? fetch; - this.sleep = options.sleep ?? defaultSleep; + this.fetchImpl = clientOptions.fetchImpl ?? fetch; + this.sleep = clientOptions.sleep ?? defaultSleep; } protected buildHeaders(headers?: HttpHeaders): HeadersInit { @@ -136,13 +136,13 @@ export class BaseHttpClient { if (response) { const retryAfter = response.headers.get(retryAfterHeader); - if (retryAfter && this.config.respectRetryAfter) { + if (retryAfter && this.options.respectRetryAfter) { waitMs = parseRetryAfter(retryAfter); } } if (waitMs === 0) { - waitMs = computeBackoff(this.config, attempt); + waitMs = computeBackoff(this.options, attempt); } if (waitMs > 0) { @@ -161,7 +161,7 @@ export class SyncHttpClient extends BaseHttpClient { const retryAfterHeader = options.retryAfterHeader ?? DEFAULT_RETRY_AFTER_HEADER; const target = buildUrl(url, options.params); - const maxAttempts = this.config.maxRetries + 1; + const maxAttempts = this.options.maxRetries + 1; let attempt = 0; let lastError: unknown; @@ -169,14 +169,14 @@ export class SyncHttpClient extends BaseHttpClient { const controller = new AbortController(); let timeoutHandle: ReturnType | undefined; try { - timeoutHandle = setTimeout(() => controller.abort(), this.config.timeout * 1000); + timeoutHandle = setTimeout(() => controller.abort(), this.options.timeout * 1000); const headers = this.buildHeaders(options.headers); const init: RequestInit = { body: options.data as BodyInit | undefined, headers, method, - redirect: this.config.followRedirects ? "follow" : "manual", + redirect: this.options.followRedirects ? "follow" : "manual", signal: controller.signal, }; @@ -189,7 +189,7 @@ export class SyncHttpClient extends BaseHttpClient { if ( DEFAULT_TRANSIENT_HTTP_STATUSES.includes(response.status as number) && - attempt < this.config.maxRetries + attempt < this.options.maxRetries ) { await this.maybeDelay(attempt, response, retryAfterHeader); attempt += 1; @@ -209,12 +209,12 @@ export class SyncHttpClient extends BaseHttpClient { if (error instanceof DOMException && error.name === "AbortError") { lastError = error; - if (attempt >= this.config.maxRetries) { + if (attempt >= this.options.maxRetries) { throw error; } } else { lastError = error; - if (attempt >= this.config.maxRetries) { + if (attempt >= this.options.maxRetries) { throw error; } } diff --git a/apps/crawler/src/http/open-graph.ts b/apps/crawler/src/http/open-graph.ts index c1c5a8f..2af241b 100644 --- a/apps/crawler/src/http/open-graph.ts +++ b/apps/crawler/src/http/open-graph.ts @@ -1,8 +1,8 @@ +import { config } from "@basango/domain/config"; import { DEFAULT_OPEN_GRAPH_USER_AGENT } from "@basango/domain/constants"; import { ArticleMetadata } from "@basango/domain/models"; import { parse } from "node-html-parser"; -import { config } from "#crawler/config"; import { SyncHttpClient } from "#crawler/http/http-client"; import { UserAgents } from "#crawler/http/user-agent"; import { createAbsoluteUrl } from "#crawler/utils"; @@ -44,7 +44,7 @@ export class OpenGraph { private readonly client: Pick; constructor() { - const settings = config.fetch.client; + const settings = config.crawler.fetch.client; const provider = new UserAgents(true, DEFAULT_OPEN_GRAPH_USER_AGENT); this.client = new SyncHttpClient(settings, { @@ -89,16 +89,28 @@ export class OpenGraph { root.querySelector("link[rel='canonical']")?.getAttribute("href") ?? null, url ?? null, ]); + const author = pick([extract(root, "article:author"), extract(root, "og:article:author")]); + const publishedAt = pick([ + extract(root, "article:published_time"), + extract(root, "og:article:published_time"), + ]); + const updatedAt = pick([ + extract(root, "article:modified_time"), + extract(root, "og:article:modified_time"), + ]); if (!title && !description && !image && !canonical) { return undefined; } return { + author, description, image: createAbsoluteUrl(url, image ?? "") || undefined, + publishedAt, title, + updatedAt, url: createAbsoluteUrl(url, canonical ?? "") || undefined, - }; + } as ArticleMetadata; } } diff --git a/apps/crawler/src/process/async/handlers.ts b/apps/crawler/src/process/async/handlers.ts index b92c034..20c35ca 100644 --- a/apps/crawler/src/process/async/handlers.ts +++ b/apps/crawler/src/process/async/handlers.ts @@ -1,4 +1,4 @@ -import type { HtmlSourceConfig, WordPressSourceConfig } from "@basango/domain/crawler"; +import type { HtmlSourceOptions, WordPressSourceOptions } from "@basango/domain/config"; import { Article } from "@basango/domain/models"; import { logger } from "@basango/logger"; @@ -24,7 +24,7 @@ export const collectHtmlListing = async ( payload: ListingTaskPayload, manager: QueueManager = createQueueManager(), ): Promise => { - const source = resolveSourceConfig(payload.sourceId) as HtmlSourceConfig; + const source = resolveSourceConfig(payload.sourceId) as HtmlSourceOptions; if (source.sourceKind !== "html") { return await collectWordPressListing(payload, manager); } @@ -63,7 +63,7 @@ export const collectWordPressListing = async ( payload: ListingTaskPayload, manager: QueueManager = createQueueManager(), ): Promise => { - const source = resolveSourceConfig(payload.sourceId) as WordPressSourceConfig; + const source = resolveSourceConfig(payload.sourceId) as WordPressSourceOptions; if (source.sourceKind !== "wordpress") { return await collectHtmlListing(payload, manager); } diff --git a/apps/crawler/src/process/async/queue.ts b/apps/crawler/src/process/async/queue.ts index 9d8afaf..55795ec 100644 --- a/apps/crawler/src/process/async/queue.ts +++ b/apps/crawler/src/process/async/queue.ts @@ -1,9 +1,9 @@ import { randomUUID } from "node:crypto"; -import { JobsOptions, Queue, QueueOptions } from "bullmq"; +import { type CrawlerAsyncOptions, config } from "@basango/domain/config"; +import { JobsOptions, Queue } from "bullmq"; import IORedis from "ioredis"; -import { FetchAsyncConfig, config } from "#crawler/config"; import { DetailsTaskPayload, DetailsTaskPayloadSchema, @@ -20,28 +20,27 @@ export interface QueueBackend { export type QueueFactory = ( queueName: string, - settings: FetchAsyncConfig, + options: CrawlerAsyncOptions, connection?: IORedis, ) => QueueBackend; -const defaultQueueFactory: QueueFactory = (queueName, settings, connection) => { +const defaultQueueFactory: QueueFactory = (queueName, options, connection) => { const redisConnection = connection ?? - new IORedis(settings.redisUrl, { - ...parseRedisUrl(settings.redisUrl), + new IORedis(options.redisUrl, { + ...parseRedisUrl(options.redisUrl), maxRetriesPerRequest: null, }); - const options: QueueOptions = { - connection: redisConnection, - prefix: settings.prefix, - }; - const queue = new Queue(queueName, options); + const queue = new Queue(queueName, { + connection: redisConnection, + prefix: options.prefix, + }); return { add: async (name, data, opts) => { const job = await queue.add(name, data, { - removeOnComplete: settings.ttl.result === 0 ? true : undefined, - removeOnFail: settings.ttl.failure === 0 ? true : undefined, + removeOnComplete: options.ttl.result === 0 ? true : undefined, + removeOnFail: options.ttl.failure === 0 ? true : undefined, ...opts, }); return { id: job.id ?? randomUUID() }; @@ -55,7 +54,7 @@ export interface CreateQueueManagerOptions { } export interface QueueManager { - readonly settings: FetchAsyncConfig; + readonly options: CrawlerAsyncOptions; readonly connection: IORedis; enqueueListing: (payload: ListingTaskPayload) => Promise<{ id: string }>; enqueueArticle: (payload: DetailsTaskPayload) => Promise<{ id: string }>; @@ -66,17 +65,17 @@ export interface QueueManager { } export const createQueueManager = (options: CreateQueueManagerOptions = {}): QueueManager => { - const settings = config.fetch.async; + const asyncOptions = config.crawler.fetch.async; const connection = options.connection ?? - new IORedis(settings.redisUrl, { - ...parseRedisUrl(settings.redisUrl), + new IORedis(asyncOptions.redisUrl, { + ...parseRedisUrl(asyncOptions.redisUrl), maxRetriesPerRequest: null, }); const factory = options.queueFactory ?? defaultQueueFactory; - const ensureQueue = (queueName: string) => factory(queueName, settings, connection); + const ensureQueue = (queueName: string) => factory(queueName, asyncOptions, connection); return { close: async () => { @@ -85,25 +84,25 @@ export const createQueueManager = (options: CreateQueueManagerOptions = {}): Que connection, enqueueArticle: (payload) => { const data = DetailsTaskPayloadSchema.parse(payload); - const queue = ensureQueue(settings.queues.details); + const queue = ensureQueue(asyncOptions.queues.details); return queue.add("collect_article", data); }, enqueueListing: (payload) => { const data = ListingTaskPayloadSchema.parse(payload); - const queue = ensureQueue(settings.queues.listing); + const queue = ensureQueue(asyncOptions.queues.listing); return queue.add("collect_listing", data); }, enqueueProcessed: (payload) => { const data = ProcessingTaskPayloadSchema.parse(payload); - const queue = ensureQueue(settings.queues.processing); + const queue = ensureQueue(asyncOptions.queues.processing); return queue.add("forward_for_processing", data); }, iterQueueNames: () => [ - settings.queues.listing, - settings.queues.details, - settings.queues.processing, + asyncOptions.queues.listing, + asyncOptions.queues.details, + asyncOptions.queues.processing, ], - queueName: (suffix: string) => `${settings.prefix}:${suffix}`, - settings, + options: asyncOptions, + queueName: (suffix: string) => `${asyncOptions.prefix}:${suffix}`, }; }; diff --git a/apps/crawler/src/process/async/schemas.ts b/apps/crawler/src/process/async/schemas.ts index b492f16..5b51b2b 100644 --- a/apps/crawler/src/process/async/schemas.ts +++ b/apps/crawler/src/process/async/schemas.ts @@ -1,5 +1,4 @@ -import { PageRangeSchema, TimestampRangeSchema } from "@basango/domain/crawler"; -import { articleSchema } from "@basango/domain/models"; +import { PageRangeSchema, TimestampRangeSchema, articleSchema } from "@basango/domain/models"; import { z } from "zod"; export const ListingTaskPayloadSchema = z.object({ diff --git a/apps/crawler/src/process/async/worker.ts b/apps/crawler/src/process/async/worker.ts index 85155db..284afb5 100644 --- a/apps/crawler/src/process/async/worker.ts +++ b/apps/crawler/src/process/async/worker.ts @@ -45,7 +45,7 @@ export const startWorker = (options: WorkerOptions): WorkerHandle => { { concurrency: options.concurrency ?? 5, connection, - prefix: manager.settings.prefix, + prefix: manager.options.prefix, }, ); @@ -56,7 +56,7 @@ export const startWorker = (options: WorkerOptions): WorkerHandle => { const queueEvents = new QueueEvents(queueName, { connection, - prefix: manager.settings.prefix, + prefix: manager.options.prefix, }); workers.push(worker); diff --git a/apps/crawler/src/process/crawler.ts b/apps/crawler/src/process/crawler.ts index dd5595d..f9831e1 100644 --- a/apps/crawler/src/process/crawler.ts +++ b/apps/crawler/src/process/crawler.ts @@ -1,7 +1,6 @@ -import type { AnySourceConfig } from "@basango/domain/crawler"; +import { AnySourceOptions, CrawlerFetchingOptions, config } from "@basango/domain/config"; import logger from "@basango/logger"; -import { FetchCrawlerConfig, config } from "#crawler/config"; import { JsonlPersistor, Persistor } from "#crawler/process/persistence"; import { createPageRange, createTimestampRange } from "#crawler/utils"; @@ -13,11 +12,11 @@ export interface CrawlingOptions { } export const resolveCrawlerConfig = ( - source: AnySourceConfig, + source: AnySourceOptions, options: CrawlingOptions, -): FetchCrawlerConfig => { +): CrawlerFetchingOptions => { return { - ...config.fetch.crawler, + ...config.crawler.fetch.crawler, category: options.category, dateRange: createTimestampRange(options.dateRange), pageRange: createPageRange(options.pageRange), @@ -25,10 +24,10 @@ export const resolveCrawlerConfig = ( }; }; -export const createPersistors = (source: AnySourceConfig): Persistor[] => { +export const createPersistors = (source: AnySourceOptions): Persistor[] => { return [ new JsonlPersistor({ - directory: config.paths.data, + directory: config.crawler.paths.data, sourceId: source.sourceId, }), ]; diff --git a/apps/crawler/src/process/parsers/base.ts b/apps/crawler/src/process/parsers/base.ts index 1e19510..9d92268 100644 --- a/apps/crawler/src/process/parsers/base.ts +++ b/apps/crawler/src/process/parsers/base.ts @@ -1,8 +1,7 @@ -import type { AnySourceConfig } from "@basango/domain/crawler"; +import { AnySourceOptions, CrawlerFetchingOptions, config } from "@basango/domain/config"; import { Article } from "@basango/domain/models"; import { HTMLElement, parse as parseHtml } from "node-html-parser"; -import { FetchCrawlerConfig, config } from "#crawler/config"; import { SyncHttpClient } from "#crawler/http/http-client"; import { OpenGraph } from "#crawler/http/open-graph"; import type { Persistor } from "#crawler/process/persistence"; @@ -12,23 +11,23 @@ export interface CrawlerOptions { } export abstract class BaseCrawler { - protected readonly settings: FetchCrawlerConfig; - protected readonly source: AnySourceConfig; + protected readonly options: CrawlerFetchingOptions; + protected readonly source: AnySourceOptions; protected readonly http: SyncHttpClient; protected readonly persistors: Persistor[]; protected readonly openGraph: OpenGraph; - protected constructor(settings: FetchCrawlerConfig, options: CrawlerOptions = {}) { - if (!settings.source) { + protected constructor(options: CrawlerFetchingOptions, crawlerOptions: CrawlerOptions = {}) { + if (!options.source) { throw new Error("Crawler requires a bound source"); } - this.http = new SyncHttpClient(config.fetch.client); - this.persistors = options.persistors ?? []; + this.http = new SyncHttpClient(config.crawler.fetch.client); + this.persistors = crawlerOptions.persistors ?? []; this.openGraph = new OpenGraph(); - this.settings = settings; - this.source = settings.source as AnySourceConfig; + this.options = options; + this.source = options.source as AnySourceOptions; } /** diff --git a/apps/crawler/src/process/parsers/html.ts b/apps/crawler/src/process/parsers/html.ts index 826970f..92a0912 100644 --- a/apps/crawler/src/process/parsers/html.ts +++ b/apps/crawler/src/process/parsers/html.ts @@ -1,11 +1,10 @@ -import type { HtmlSourceConfig, TimestampRange } from "@basango/domain/crawler"; -import { Article } from "@basango/domain/models"; +import { CrawlerFetchingOptions, HtmlSourceOptions } from "@basango/domain/config"; +import { Article, TimestampRange } from "@basango/domain/models"; import { logger } from "@basango/logger"; import { fromUnixTime, getUnixTime, isMatch as isDateMatch, parse } from "date-fns"; import { HTMLElement } from "node-html-parser"; import TurndownService from "turndown"; -import { FetchCrawlerConfig } from "#crawler/config"; import { ArticleOutOfDateRangeError, InvalidArticleError, @@ -26,21 +25,21 @@ const md = new TurndownService({ * Crawler for generic HTML pages. */ export class HtmlCrawler extends BaseCrawler { - readonly source: HtmlSourceConfig; + readonly source: HtmlSourceOptions; private currentNode: string | null = null; - constructor(settings: FetchCrawlerConfig, options: { persistors?: Persistor[] } = {}) { + constructor(settings: CrawlerFetchingOptions, options: { persistors?: Persistor[] } = {}) { super(settings, options); if (!settings.source || settings.source.sourceKind !== "html") { throw new UnsupportedSourceKindError("HtmlCrawler requires a source of kind 'html'"); } - this.source = this.settings.source as HtmlSourceConfig; + this.source = this.options.source as HtmlSourceOptions; } async fetch(): Promise { - const pageRange = this.settings.pageRange ?? (await this.getPagination()); - const dateRange = this.settings.dateRange; + const pageRange = this.options.pageRange ?? (await this.getPagination()); + const dateRange = this.options.dateRange; const selectors = this.source.sourceSelectors; if (!selectors.articles) { @@ -218,7 +217,7 @@ export class HtmlCrawler extends BaseCrawler { */ private applyCategory(template: string): string { if (template.includes("{category}")) { - const replacement = this.settings.category ?? ""; + const replacement = this.options.category ?? ""; return template.replace("{category}", replacement); } return template; @@ -297,7 +296,7 @@ export class HtmlCrawler extends BaseCrawler { * @param selector - The CSS selector */ private extractCategories(root: HTMLElement, selector?: string | null): string[] { - if (!selector && this.settings.category) return [this.settings.category.toLowerCase()]; + if (!selector && this.options.category) return [this.options.category.toLowerCase()]; if (!selector) return []; const values: string[] = []; diff --git a/apps/crawler/src/process/parsers/wordpress.ts b/apps/crawler/src/process/parsers/wordpress.ts index 5a56248..315fd72 100644 --- a/apps/crawler/src/process/parsers/wordpress.ts +++ b/apps/crawler/src/process/parsers/wordpress.ts @@ -1,10 +1,9 @@ -import type { PageRange, TimestampRange, WordPressSourceConfig } from "@basango/domain/crawler"; -import { Article } from "@basango/domain/models"; +import { CrawlerFetchingOptions, WordPressSourceOptions } from "@basango/domain/config"; +import { Article, PageRange, TimestampRange } from "@basango/domain/models"; import { logger } from "@basango/logger"; import { fromUnixTime } from "date-fns"; import TurndownService from "turndown"; -import { FetchCrawlerConfig } from "#crawler/config"; import { ArticleOutOfDateRangeError, InvalidArticleError, @@ -33,7 +32,7 @@ interface WordPressPost { * Crawler for WordPress sites using the REST API. */ export class WordPressCrawler extends BaseCrawler { - readonly source: WordPressSourceConfig; + readonly source: WordPressSourceOptions; private categoryMap: Map = new Map(); public static readonly POST_QUERY = @@ -43,7 +42,7 @@ export class WordPressCrawler extends BaseCrawler { public static readonly TOTAL_PAGES_HEADER = "x-wp-totalpages"; public static readonly TOTAL_POSTS_HEADER = "x-wp-total"; - constructor(settings: FetchCrawlerConfig, options: { persistors?: Persistor[] } = {}) { + constructor(settings: CrawlerFetchingOptions, options: { persistors?: Persistor[] } = {}) { super(settings, options); if (!settings.source || settings.source.sourceKind !== "wordpress") { @@ -51,15 +50,15 @@ export class WordPressCrawler extends BaseCrawler { "WordPressCrawler requires a source of kind 'wordpress'", ); } - this.source = this.settings.source as WordPressSourceConfig; + this.source = this.options.source as WordPressSourceOptions; } /** * Fetch and process WordPress posts. */ async fetch(): Promise { - const pageRange = this.settings.pageRange ?? (await this.getPagination()); - const dateRange = this.settings.dateRange; + const pageRange = this.options.pageRange ?? (await this.getPagination()); + const dateRange = this.options.dateRange; for (let page = pageRange.start; page <= pageRange.end; page += 1) { const endpoint = this.buildEndpointUrl(page); diff --git a/apps/crawler/src/process/persistence.ts b/apps/crawler/src/process/persistence.ts index ded7dd7..e376f91 100644 --- a/apps/crawler/src/process/persistence.ts +++ b/apps/crawler/src/process/persistence.ts @@ -1,11 +1,11 @@ import fs from "node:fs"; import path from "node:path"; +import { config } from "@basango/domain/config"; import type { Article } from "@basango/domain/models"; import { md5 } from "@basango/encryption"; import logger from "@basango/logger"; -import { config, env } from "#crawler/config"; import { HttpError, SyncHttpClient } from "#crawler/http/http-client"; export interface Persistor { @@ -66,9 +66,9 @@ export const persist = async ( }; export const forward = async (payload: Partial
): Promise => { - const client = new SyncHttpClient(config.fetch.client); - const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT"); - const token = env("BASANGO_CRAWLER_TOKEN"); + const client = new SyncHttpClient(config.crawler.fetch.client); + const endpoint = config.crawler.backend.endpoint; + const token = config.crawler.backend.token; try { const response = await client.post(endpoint, { diff --git a/apps/crawler/src/scripts/push.ts b/apps/crawler/src/scripts/push.ts index 3bd10a8..2dd3f04 100644 --- a/apps/crawler/src/scripts/push.ts +++ b/apps/crawler/src/scripts/push.ts @@ -1,13 +1,13 @@ -#! /usr/bin/env bun +#!/usr/bin/env bun import fs from "node:fs"; import path from "node:path"; import { createInterface } from "node:readline"; import { parseArgs } from "node:util"; +import { config } from "@basango/domain/config"; import type { Article } from "@basango/domain/models"; import { logger } from "@basango/logger"; -import { config } from "#crawler/config"; import { forward } from "#crawler/process/persistence"; const USAGE = ` @@ -31,7 +31,7 @@ const main = async (): Promise => { return; } - const filePath = path.join(config.paths.data, `${sourceId}.jsonl`); + const filePath = path.join(config.crawler.paths.data, `${sourceId}.jsonl`); if (!fs.existsSync(filePath)) { logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL not found"); diff --git a/apps/crawler/src/scripts/sync.ts b/apps/crawler/src/scripts/sync.ts index 1143406..780d1c6 100644 --- a/apps/crawler/src/scripts/sync.ts +++ b/apps/crawler/src/scripts/sync.ts @@ -1,4 +1,4 @@ -#! /usr/bin/env bun +#!/usr/bin/env bun import { logger } from "@basango/logger"; diff --git a/apps/crawler/src/scripts/worker.ts b/apps/crawler/src/scripts/worker.ts index fbdf9b4..c61fae8 100644 --- a/apps/crawler/src/scripts/worker.ts +++ b/apps/crawler/src/scripts/worker.ts @@ -1,4 +1,4 @@ -#! /usr/bin/env bun +#!/usr/bin/env bun import { logger } from "@basango/logger"; diff --git a/apps/crawler/src/utils.ts b/apps/crawler/src/utils.ts index 8cb827c..3e646aa 100644 --- a/apps/crawler/src/utils.ts +++ b/apps/crawler/src/utils.ts @@ -1,28 +1,29 @@ +import { + AnySourceOptions, + HtmlSourceOptions, + WordPressSourceOptions, + config, +} from "@basango/domain/config"; import { DEFAULT_DATE_FORMAT } from "@basango/domain/constants"; import { - AnySourceConfig, DateSpecSchema, - HtmlSourceConfig, PageRange, PageRangeSchema, PageSpecSchema, TimestampRange, TimestampRangeSchema, - WordPressSourceConfig, -} from "@basango/domain/crawler"; +} from "@basango/domain/models"; import { format, fromUnixTime, getUnixTime, isMatch, parse } from "date-fns"; import type { RedisOptions } from "ioredis"; -import { config } from "#crawler/config"; - /** * Resolve a source configuration by its ID. * @param id - The source ID */ -export const resolveSourceConfig = (id: string): AnySourceConfig => { +export const resolveSourceConfig = (id: string): AnySourceOptions => { const source = - config.sources.html.find((s: HtmlSourceConfig) => s.sourceId === id) || - config.sources.wordpress.find((s: WordPressSourceConfig) => s.sourceId === id); + config.crawler.sources.html.find((s: HtmlSourceOptions) => s.sourceId === id) || + config.crawler.sources.wordpress.find((s: WordPressSourceOptions) => s.sourceId === id); if (source === undefined) { throw new Error(`Source '${id}' not found in configuration`); diff --git a/bun.lock b/bun.lock index abf2645..7f0b81c 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "basango", @@ -25,13 +26,10 @@ "@basango/domain": "workspace:*", "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", - "@devscast/config": "catalog:", "@hono/node-server": "^1.19.6", "@hono/trpc-server": "^0.4.0", "@hono/zod-openapi": "^1.1.4", - "@scalar/hono-api-reference": "^0.9.24", "@trpc/server": "^11.7.1", - "ai": "^5.0.89", "camelcase-keys": "^10.0.1", "date-fns": "catalog:", "hono-rate-limiter": "^0.4.2", @@ -149,7 +147,6 @@ "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", "@date-fns/utc": "^2.1.1", - "@devscast/config": "catalog:", "date-fns": "catalog:", "drizzle-orm": "^0.44.7", "mysql2": "^3.15.3", @@ -188,7 +185,7 @@ "packages/logger": { "name": "@basango/logger", "dependencies": { - "@devscast/config": "catalog:", + "@basango/domain": "workspace:*", "pino": "^10.1.0", "pino-pretty": "^13.1.2", }, @@ -245,7 +242,7 @@ }, }, "catalog": { - "@devscast/config": "^1.0.3", + "@devscast/config": "^1.1.1", "@types/bun": "^1.3.1", "@types/node": "^24.10.0", "@types/react": "^19.2.0", @@ -260,12 +257,6 @@ "packages": { "@0no-co/graphql.web": ["@0no-co/graphql.web@1.2.0", "", { "peerDependencies": { "graphql": "^14.0.0 || ^15.0.0 || ^16.0.0" }, "optionalPeers": ["graphql"] }, "sha512-/1iHy9TTr63gE1YcR5idjx8UREz1s0kFhydf3bBLCXyqjhkIc6igAzTOx3zPifCwFR87tsh/4Pa9cNts6d2otw=="], - "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.7", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.16", "@vercel/oidc": "3.0.3" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-/AI5AKi4vOK9SEb8Z1dfXkhsJ5NAfWsoJQc96B/mzn2KIrjw5occOjIwD06scuhV9xWlghCoXJT1sQD9QH/tyg=="], - - "@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], - - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.16", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-lsWQY9aDXHitw7C1QRYIbVGmgwyT98TF3MfM8alNIXKpdJdi+W782Rzd9f1RyOfgRmZ08gJ2EYNDhWNK7RqpEA=="], - "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], "@asteasolutions/zod-to-openapi": ["@asteasolutions/zod-to-openapi@8.1.0", "", { "dependencies": { "openapi3-ts": "^4.1.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-tQFxVs05J/6QXXqIzj6rTRk3nj1HFs4pe+uThwE95jL5II2JfpVXkK+CqkO7aT0Do5AYqO6LDrKpleLUFXgY+g=="], @@ -538,7 +529,7 @@ "@date-fns/utc": ["@date-fns/utc@2.1.1", "", {}, "sha512-SlJDfG6RPeEX8wEVv6ZB3kak4MmbtyiI2qX/5zuKdordbrhB/iaJ58GVMZgJ6P1sJaM1gMgENFYYeg1JWrCFrA=="], - "@devscast/config": ["@devscast/config@1.0.3", "", { "peerDependencies": { "ini": "^6.0.0", "yaml": "^2.8.1", "zod": "^4.1.12" }, "optionalPeers": ["ini", "yaml"] }, "sha512-/FjCA/MV1KR2tY44YBA4tdXNzQgoF75O+RQ4fbzvVWY77PXOama2Hf6YXeLcQsvxfItaXi2cFz8BaaVdqZYS8w=="], + "@devscast/config": ["@devscast/config@1.1.1", "", { "peerDependencies": { "ini": "^6.0.0", "yaml": "^2.8.1", "zod": "^4.1.12" }, "optionalPeers": ["ini", "yaml"] }, "sha512-PyGV43m6V8sO66EOsKXWkohisH90rQZIcEgbGB2yVJ+BAfwj1P3rUx3DifpndX/Go8Ng9YbkjCNYBKYk5FwSgQ=="], "@drizzle-team/brocli": ["@drizzle-team/brocli@0.10.2", "", {}, "sha512-z33Il7l5dKjUgGULTqBsQBQwckHh5AbIuxhdsIxDDiZAzBOrZO6q9ogcWC65kU382AfynTfgNumVcNIjuIua6w=="], @@ -944,14 +935,6 @@ "@reduxjs/toolkit": ["@reduxjs/toolkit@2.10.1", "", { "dependencies": { "@standard-schema/spec": "^1.0.0", "@standard-schema/utils": "^0.3.0", "immer": "^10.2.0", "redux": "^5.0.1", "redux-thunk": "^3.1.0", "reselect": "^5.1.0" }, "peerDependencies": { "react": "^16.9.0 || ^17.0.0 || ^18 || ^19", "react-redux": "^7.2.1 || ^8.1.3 || ^9.0.0" }, "optionalPeers": ["react", "react-redux"] }, "sha512-/U17EXQ9Do9Yx4DlNGU6eVNfZvFJfYpUtRRdLf19PbPjdWBxNlxGZXywQZ1p1Nz8nMkWplTI7iD/23m07nolDA=="], - "@scalar/core": ["@scalar/core@0.3.22", "", { "dependencies": { "@scalar/types": "0.4.0" } }, "sha512-6lzeRkvgkukSgge35kvxJKiJBny4rdGSaLTNzn/sF1F6JRfUo7I0AgqFxxSZWMD+EG4kGyNxAz0zciDSx2Cjvw=="], - - "@scalar/hono-api-reference": ["@scalar/hono-api-reference@0.9.24", "", { "dependencies": { "@scalar/core": "0.3.22" }, "peerDependencies": { "hono": "^4.10.3" } }, "sha512-NjPY3iMm/FqYRXAgr6V7qBhJGbSUQ8hbijFUMuqZo4pIjGEUNLeB5L9U2Gh4cDIPPWeso8mlc16jaX7dV0FrPw=="], - - "@scalar/openapi-types": ["@scalar/openapi-types@0.5.1", "", { "dependencies": { "zod": "4.1.11" } }, "sha512-8g7s9lPolyDFtijyh3Ob459tpezPuZbkXoFgJwBTHjPZ7ap+TvOJTvLk56CFwxVBVz2BxCzWJqxYyy3FUdeLoA=="], - - "@scalar/types": ["@scalar/types@0.4.0", "", { "dependencies": { "@scalar/openapi-types": "0.5.1", "nanoid": "5.1.5", "type-fest": "5.0.0", "zod": "4.1.11" } }, "sha512-vOD1GZez7kPdVA+UQit05QE9dbALfevhK9kqRTsqcPX7FvvZ9eQWSNl1GKmKtmRiAZGThv2agM5AvHRxkH2JSw=="], - "@sinclair/typebox": ["@sinclair/typebox@0.27.8", "", {}, "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA=="], "@sinonjs/commons": ["@sinonjs/commons@3.0.1", "", { "dependencies": { "type-detect": "4.0.8" } }, "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ=="], @@ -1100,8 +1083,6 @@ "@urql/exchange-retry": ["@urql/exchange-retry@1.3.2", "", { "dependencies": { "@urql/core": "^5.1.2", "wonka": "^6.3.2" } }, "sha512-TQMCz2pFJMfpNxmSfX1VSfTjwUIFx/mL+p1bnfM1xjjdla7Z+KnGMW/EhFbpckp3LyWAH4PgOsMwOMnIN+MBFg=="], - "@vercel/oidc": ["@vercel/oidc@3.0.3", "", {}, "sha512-yNEQvPcVrK9sIe637+I0jD6leluPxzwJKx/Haw6F4H77CdDsszUn5V3o96LPziXkSNE2B83+Z3mjqGKBK/R6Gg=="], - "@xmldom/xmldom": ["@xmldom/xmldom@0.8.11", "", {}, "sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw=="], "JSONStream": ["JSONStream@1.3.5", "", { "dependencies": { "jsonparse": "^1.2.0", "through": ">=2.2.7 <3" }, "bin": { "JSONStream": "./bin.js" } }, "sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ=="], @@ -1118,8 +1099,6 @@ "aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], - "ai": ["ai@5.0.89", "", { "dependencies": { "@ai-sdk/gateway": "2.0.7", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.16", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8Nq+ZojGacQrupoJEQLrTDzT5VtR3gyp5AaqFSV3tzsAXlYQ9Igb7QE3yeoEdzOk5IRfDwWL7mDCUD+oBg1hDA=="], - "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], "anser": ["anser@1.4.10", "", {}, "sha512-hCv9AqTQ8ycjpSd3upOJd7vFwW1JaoYQ7tpham03GJ1ca8/65rqn0RpaWpItOAd6ylW9wAw6luXYPJIyPFVOww=="], @@ -1478,8 +1457,6 @@ "eventemitter3": ["eventemitter3@5.0.1", "", {}, "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA=="], - "eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="], - "exec-async": ["exec-async@2.2.0", "", {}, "sha512-87OpwcEiMia/DeiKFzaQNBNFeN3XkkpYIh9FyOqq5mS2oKv3CBE67PXoEKcr6nodWdXNogTiQ0jE2NGuoffXPw=="], "execa": ["execa@5.1.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^6.0.0", "human-signals": "^2.1.0", "is-stream": "^2.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^4.0.1", "onetime": "^5.1.2", "signal-exit": "^3.0.3", "strip-final-newline": "^2.0.0" } }, "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg=="], @@ -1786,8 +1763,6 @@ "json-parse-even-better-errors": ["json-parse-even-better-errors@2.3.1", "", {}, "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w=="], - "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], - "json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], @@ -2410,8 +2385,6 @@ "swap-case": ["swap-case@1.1.2", "", { "dependencies": { "lower-case": "^1.1.1", "upper-case": "^1.1.1" } }, "sha512-BAmWG6/bx8syfc6qXPprof3Mn5vQgf5dwdUNJhsNqU9WdPt5P+ES/wQ5bxfijy8zwZgZZHslC3iAsxsuQMCzJQ=="], - "tagged-tag": ["tagged-tag@1.0.0", "", {}, "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng=="], - "tailwind-merge": ["tailwind-merge@3.3.1", "", {}, "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g=="], "tailwindcss": ["tailwindcss@4.1.17", "", {}, "sha512-j9Ee2YjuQqYT9bbRTfTZht9W/ytp5H+jJpZKiYdP/bpnXARAuELt9ofP0lPnmHjbga7SNQIxdTAXCmtKVYjN+Q=="], @@ -2816,14 +2789,6 @@ "@radix-ui/react-visually-hidden/@radix-ui/react-primitive": ["@radix-ui/react-primitive@2.1.3", "", { "dependencies": { "@radix-ui/react-slot": "1.2.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ=="], - "@scalar/openapi-types/zod": ["zod@4.1.11", "", {}, "sha512-WPsqwxITS2tzx1bzhIKsEs19ABD5vmCVa4xBo2tq/SrV4RNZtfws1EnCWQXM6yh8bD08a1idvkB5MZSBiZsjwg=="], - - "@scalar/types/nanoid": ["nanoid@5.1.5", "", { "bin": { "nanoid": "bin/nanoid.js" } }, "sha512-Ir/+ZpE9fDsNH0hQ3C68uyThDXzYcim2EqcZ8zn8Chtt1iylPT9xXJB0kPCnqzgcEGikO9RxSrh63MsmVCU7Fw=="], - - "@scalar/types/type-fest": ["type-fest@5.0.0", "", { "dependencies": { "tagged-tag": "^1.0.0" } }, "sha512-GeJop7+u7BYlQ6yQCAY1nBQiRSHR+6OdCEtd8Bwp9a3NK3+fWAVjOaPKJDteB9f6cIJ0wt4IfnScjLG450EpXA=="], - - "@scalar/types/zod": ["zod@4.1.11", "", {}, "sha512-WPsqwxITS2tzx1bzhIKsEs19ABD5vmCVa4xBo2tq/SrV4RNZtfws1EnCWQXM6yh8bD08a1idvkB5MZSBiZsjwg=="], - "@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.7.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" }, "bundled": true }, "sha512-pJdKGq/1iquWYtv1RRSljZklxHCOCAJFJrImO5ZLKPJVJlVUcs8yFwNQlqS0Lo8xT1VAXXTCZocF9n26FWEKsw=="], "@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.7.0", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-oAYoQnCYaQZKVS53Fq23ceWMRxq5EhQsE0x0RdQ55jT7wagMu5k+fS39v1fiSLrtrLQlXwVINenqhLMtTrV/1Q=="], diff --git a/compose.yaml b/compose.yaml index eea4d92..93444b3 100644 --- a/compose.yaml +++ b/compose.yaml @@ -48,29 +48,6 @@ services: networks: - basango_network - nginx: - build: .docker/nginx - ports: - - "8000:80" - volumes: - - ./projects/api-legacy/public:/var/www/public:delegated - depends_on: - - php - networks: - - basango_network - - php: - user: '${USER_ID:-1000}:${GROUP_ID:-1000}' - build: .docker/php - volumes: - - ./projects/api-legacy:/var/www:delegated - depends_on: - - mariadb - - postgres - - redis - networks: - - basango_network - adminer: image: adminer:latest depends_on: diff --git a/package.json b/package.json index 9601a37..0c1a3a8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "catalog": { - "@devscast/config": "^1.0.3", + "@devscast/config": "^1.1.1", "@types/bun": "^1.3.1", "@types/node": "^24.10.0", "@types/react": "^19.2.0", @@ -52,6 +52,7 @@ "dev:dashboard": "turbo dev --filter=@basango/dashboard", "format": "biome format --write && biome check --write && biome lint --write", "lint": "biome check && biome lint && manypkg check", + "migrate": "cd packages/db && bunx drizzle-kit migrate", "prepare": "husky", "start:api": "turbo start --filter=@basango/api", "start:dashboard": "turbo start --filter=@basango/dashboard", diff --git a/packages/db/.env b/packages/db/.env deleted file mode 100644 index e1b025c..0000000 --- a/packages/db/.env +++ /dev/null @@ -1,6 +0,0 @@ -BASANGO_DATABASE_URL="postgresql://postgres:postgres@localhost:5432/app?serverVersion=16&charset=utf8" - -BASANGO_SOURCE_DATABASE_HOST="localhost" -BASANGO_SOURCE_DATABASE_PASS="root" -BASANGO_SOURCE_DATABASE_NAME="app" -BASANGO_SOURCE_DATABASE_USER="root" diff --git a/packages/db/drizzle.config.ts b/packages/db/drizzle.config.ts index 27ce76f..54eab49 100644 --- a/packages/db/drizzle.config.ts +++ b/packages/db/drizzle.config.ts @@ -1,10 +1,9 @@ +import { config } from "@basango/domain/config"; import { defineConfig } from "drizzle-kit"; -import { env } from "./src/config"; - export default defineConfig({ dbCredentials: { - url: env("BASANGO_DATABASE_URL"), + url: config.database.url, }, dialect: "postgresql", out: "./migrations", diff --git a/packages/db/migrations/0002_modern_joseph.sql b/packages/db/migrations/0002_modern_joseph.sql new file mode 100644 index 0000000..3ee0bce --- /dev/null +++ b/packages/db/migrations/0002_modern_joseph.sql @@ -0,0 +1,2 @@ +ALTER TABLE "article" drop column "tsv";--> statement-breakpoint +ALTER TABLE "article" ADD COLUMN "tsv" "tsvector" GENERATED ALWAYS AS (setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::"char")) STORED;--> statement-breakpoint diff --git a/packages/db/migrations/meta/0002_snapshot.json b/packages/db/migrations/meta/0002_snapshot.json new file mode 100644 index 0000000..ec8943b --- /dev/null +++ b/packages/db/migrations/meta/0002_snapshot.json @@ -0,0 +1,1253 @@ +{ + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + }, + "dialect": "postgresql", + "enums": {}, + "id": "842ee997-0c79-4705-8027-02fa9d803227", + "policies": {}, + "prevId": "2b230826-2e33-4eb8-9d90-f5b15f260fc2", + "roles": {}, + "schemas": {}, + "sequences": {}, + "tables": { + "public.article": { + "checkConstraints": {}, + "columns": { + "body": { + "name": "body", + "notNull": true, + "primaryKey": false, + "type": "text" + }, + "categories": { + "name": "categories", + "notNull": false, + "primaryKey": false, + "type": "text[]" + }, + "crawled_at": { + "default": "now()", + "name": "crawled_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "credibility": { + "name": "credibility", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "excerpt": { + "generated": { + "as": "(\"left\"(body, 200) || '...'::text)", + "type": "stored" + }, + "name": "excerpt", + "notNull": false, + "primaryKey": false, + "type": "varchar(255)" + }, + "hash": { + "name": "hash", + "notNull": true, + "primaryKey": false, + "type": "varchar(32)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "image": { + "generated": { + "as": "(metadata ->> 'image'::text)", + "type": "stored" + }, + "name": "image", + "notNull": false, + "primaryKey": false, + "type": "varchar(1024)" + }, + "link": { + "name": "link", + "notNull": true, + "primaryKey": false, + "type": "varchar(1024)" + }, + "metadata": { + "name": "metadata", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "published_at": { + "name": "published_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "reading_time": { + "default": 1, + "name": "reading_time", + "notNull": false, + "primaryKey": false, + "type": "integer" + }, + "sentiment": { + "name": "sentiment", + "notNull": true, + "primaryKey": false, + "type": "sentiment", + "typeSchema": "public" + }, + "source_id": { + "name": "source_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "title": { + "name": "title", + "notNull": true, + "primaryKey": false, + "type": "varchar(1024)" + }, + "token_statistics": { + "name": "token_statistics", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "tsv": { + "generated": { + "as": "setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::\"char\")", + "type": "stored" + }, + "name": "tsv", + "notNull": false, + "primaryKey": false, + "type": "tsvector" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_article_source_id": { + "columnsFrom": ["source_id"], + "columnsTo": ["id"], + "name": "fk_article_source_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "article", + "tableTo": "source" + } + }, + "indexes": { + "gin_article_categories": { + "columns": [ + { + "asc": true, + "expression": "categories", + "isExpression": false, + "nulls": "last", + "opclass": "array_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_categories", + "with": {} + }, + "gin_article_link_trgm": { + "columns": [ + { + "asc": true, + "expression": "link", + "isExpression": false, + "nulls": "last", + "opclass": "gin_trgm_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_link_trgm", + "with": {} + }, + "gin_article_title_trgm": { + "columns": [ + { + "asc": true, + "expression": "title", + "isExpression": false, + "nulls": "last", + "opclass": "gin_trgm_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_title_trgm", + "with": {} + }, + "gin_article_tsv": { + "columns": [ + { + "asc": true, + "expression": "tsv", + "isExpression": false, + "nulls": "last", + "opclass": "tsvector_ops" + } + ], + "concurrently": false, + "isUnique": false, + "method": "gin", + "name": "gin_article_tsv", + "with": {} + }, + "idx_article_source_published_id": { + "columns": [ + { + "asc": true, + "expression": "source_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "published_at", + "isExpression": false, + "nulls": "first" + }, + { + "asc": false, + "expression": "id", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_article_source_published_id", + "with": {} + }, + "unq_article_hash": { + "columns": [ + { + "asc": true, + "expression": "hash", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_article_hash", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "article", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.bookmark": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "description": { + "name": "description", + "notNull": false, + "primaryKey": false, + "type": "varchar(512)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "is_public": { + "default": false, + "name": "is_public", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_bookmark_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_bookmark_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "bookmark", + "tableTo": "user" + } + }, + "indexes": { + "idx_bookmark_user_created": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_bookmark_user_created", + "with": {} + }, + "unq_bookmark_user_name": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "lower(\"name\")", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_bookmark_user_name", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "bookmark", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.bookmark_article": { + "checkConstraints": {}, + "columns": { + "article_id": { + "name": "article_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "bookmark_id": { + "name": "bookmark_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": { + "bookmark_article_pkey": { + "columns": ["bookmark_id", "article_id"], + "name": "bookmark_article_pkey" + } + }, + "foreignKeys": { + "fk_bookmark_article_article_id": { + "columnsFrom": ["article_id"], + "columnsTo": ["id"], + "name": "fk_bookmark_article_article_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "bookmark_article", + "tableTo": "article" + }, + "fk_bookmark_article_bookmark_id": { + "columnsFrom": ["bookmark_id"], + "columnsTo": ["id"], + "name": "fk_bookmark_article_bookmark_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "bookmark_article", + "tableTo": "bookmark" + } + }, + "indexes": { + "idx_bookmark_article_bookmark_id": { + "columns": [ + { + "asc": true, + "expression": "bookmark_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_bookmark_article_bookmark_id", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "bookmark_article", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.comment": { + "checkConstraints": {}, + "columns": { + "article_id": { + "name": "article_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "content": { + "name": "content", + "notNull": true, + "primaryKey": false, + "type": "varchar(512)" + }, + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "is_spam": { + "default": false, + "name": "is_spam", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "sentiment": { + "name": "sentiment", + "notNull": true, + "primaryKey": false, + "type": "sentiment", + "typeSchema": "public" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_comment_article_id": { + "columnsFrom": ["article_id"], + "columnsTo": ["id"], + "name": "fk_comment_article_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "comment", + "tableTo": "article" + }, + "fk_comment_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_comment_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "comment", + "tableTo": "user" + } + }, + "indexes": { + "idx_comment_article_created": { + "columns": [ + { + "asc": true, + "expression": "article_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_comment_article_created", + "with": {} + }, + "idx_comment_article_id": { + "columns": [ + { + "asc": true, + "expression": "article_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_comment_article_id", + "with": {} + }, + "idx_comment_user_id": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_comment_user_id", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "comment", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.followed_source": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "follower_id": { + "name": "follower_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "source_id": { + "name": "source_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_followed_source_follower_id": { + "columnsFrom": ["follower_id"], + "columnsTo": ["id"], + "name": "fk_followed_source_follower_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "followed_source", + "tableTo": "user" + }, + "fk_followed_source_source_id": { + "columnsFrom": ["source_id"], + "columnsTo": ["id"], + "name": "fk_followed_source_source_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "followed_source", + "tableTo": "source" + } + }, + "indexes": { + "idx_followed_source_follower_created": { + "columns": [ + { + "asc": true, + "expression": "follower_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_followed_source_follower_created", + "with": {} + }, + "idx_followed_source_follower_id": { + "columns": [ + { + "asc": true, + "expression": "follower_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_followed_source_follower_id", + "with": {} + }, + "idx_followed_source_source_id": { + "columns": [ + { + "asc": true, + "expression": "source_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_followed_source_source_id", + "with": {} + }, + "unq_followed_source_user_source": { + "columns": [ + { + "asc": true, + "expression": "follower_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "source_id", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_followed_source_user_source", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "followed_source", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.login_attempt": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_login_attempt_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_login_attempt_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "login_attempt", + "tableTo": "user" + } + }, + "indexes": { + "idx_login_attempt_user_created": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_login_attempt_user_created", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "login_attempt", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.login_history": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "device": { + "name": "device", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "ip_address": { + "name": "ip_address", + "notNull": false, + "primaryKey": false, + "type": "inet" + }, + "location": { + "name": "location", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_login_history_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_login_history_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "login_history", + "tableTo": "user" + } + }, + "indexes": { + "idx_login_history_ip_address": { + "columns": [ + { + "asc": true, + "expression": "ip_address", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_login_history_ip_address", + "with": {} + }, + "idx_login_history_user_created": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_login_history_user_created", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "login_history", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.refresh_token": { + "checkConstraints": {}, + "columns": { + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "token": { + "name": "token", + "notNull": true, + "primaryKey": false, + "type": "varchar(128)" + }, + "username": { + "name": "username", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "valid": { + "name": "valid", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "idx_refresh_token_username": { + "columns": [ + { + "asc": true, + "expression": "lower(\"username\")", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_refresh_token_username", + "with": {} + }, + "idx_refresh_token_valid": { + "columns": [ + { + "asc": true, + "expression": "valid", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_refresh_token_valid", + "with": {} + }, + "uniq_refresh_token_token": { + "columns": [ + { + "asc": true, + "expression": "token", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "uniq_refresh_token_token", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "refresh_token", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.source": { + "checkConstraints": {}, + "columns": { + "credibility": { + "name": "credibility", + "notNull": false, + "primaryKey": false, + "type": "jsonb" + }, + "description": { + "name": "description", + "notNull": false, + "primaryKey": false, + "type": "varchar(1024)" + }, + "display_name": { + "name": "display_name", + "notNull": false, + "primaryKey": false, + "type": "varchar(255)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + }, + "url": { + "name": "url", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "unq_source_name": { + "columns": [ + { + "asc": true, + "expression": "lower((name)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_source_name", + "with": {} + }, + "unq_source_url": { + "columns": [ + { + "asc": true, + "expression": "lower((url)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_source_url", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "source", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.user": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "email": { + "name": "email", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "is_confirmed": { + "default": false, + "name": "is_confirmed", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "is_locked": { + "default": false, + "name": "is_locked", + "notNull": true, + "primaryKey": false, + "type": "boolean" + }, + "name": { + "name": "name", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)" + }, + "password": { + "name": "password", + "notNull": true, + "primaryKey": false, + "type": "varchar(512)" + }, + "roles": { + "default": "'{\"ROLE_USER\"}'", + "name": "roles", + "notNull": true, + "primaryKey": false, + "type": "varchar(255)[]" + }, + "updated_at": { + "name": "updated_at", + "notNull": false, + "primaryKey": false, + "type": "timestamp" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": {}, + "indexes": { + "idx_user_created_at": { + "columns": [ + { + "asc": true, + "expression": "created_at", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_user_created_at", + "with": {} + }, + "unq_user_email": { + "columns": [ + { + "asc": true, + "expression": "lower((email)::text)", + "isExpression": true, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_user_email", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "user", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + }, + "public.verification_token": { + "checkConstraints": {}, + "columns": { + "created_at": { + "default": "now()", + "name": "created_at", + "notNull": true, + "primaryKey": false, + "type": "timestamp" + }, + "id": { + "name": "id", + "notNull": true, + "primaryKey": true, + "type": "uuid" + }, + "purpose": { + "name": "purpose", + "notNull": true, + "primaryKey": false, + "type": "token_purpose", + "typeSchema": "public" + }, + "token": { + "name": "token", + "notNull": false, + "primaryKey": false, + "type": "varchar(60)" + }, + "user_id": { + "name": "user_id", + "notNull": true, + "primaryKey": false, + "type": "uuid" + } + }, + "compositePrimaryKeys": {}, + "foreignKeys": { + "fk_verification_token_user_id": { + "columnsFrom": ["user_id"], + "columnsTo": ["id"], + "name": "fk_verification_token_user_id", + "onDelete": "cascade", + "onUpdate": "no action", + "tableFrom": "verification_token", + "tableTo": "user" + } + }, + "indexes": { + "idx_verif_token_created_at": { + "columns": [ + { + "asc": false, + "expression": "created_at", + "isExpression": false, + "nulls": "first" + } + ], + "concurrently": false, + "isUnique": false, + "method": "btree", + "name": "idx_verif_token_created_at", + "with": {} + }, + "unq_verif_token_token": { + "columns": [ + { + "asc": true, + "expression": "token", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_verif_token_token", + "where": "\"verification_token\".\"token\" IS NOT NULL", + "with": {} + }, + "unq_verif_user_purpose_token": { + "columns": [ + { + "asc": true, + "expression": "user_id", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "purpose", + "isExpression": false, + "nulls": "last" + }, + { + "asc": true, + "expression": "token", + "isExpression": false, + "nulls": "last" + } + ], + "concurrently": false, + "isUnique": true, + "method": "btree", + "name": "unq_verif_user_purpose_token", + "where": "\"verification_token\".\"token\" IS NOT NULL", + "with": {} + } + }, + "isRLSEnabled": false, + "name": "verification_token", + "policies": {}, + "schema": "", + "uniqueConstraints": {} + } + }, + "version": "7", + "views": {} +} diff --git a/packages/db/migrations/meta/_journal.json b/packages/db/migrations/meta/_journal.json index b49dac3..71c1f92 100644 --- a/packages/db/migrations/meta/_journal.json +++ b/packages/db/migrations/meta/_journal.json @@ -14,6 +14,13 @@ "tag": "0001_init", "version": "7", "when": 1762775267679 + }, + { + "breakpoints": true, + "idx": 2, + "tag": "0002_modern_joseph", + "version": "7", + "when": 1763920009482 } ], "version": "7" diff --git a/packages/db/package.json b/packages/db/package.json index 2cec4db..45808a0 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -4,7 +4,6 @@ "@basango/encryption": "workspace:*", "@basango/logger": "workspace:*", "@date-fns/utc": "^2.1.1", - "@devscast/config": "catalog:", "date-fns": "catalog:", "drizzle-orm": "^0.44.7", "mysql2": "^3.15.3", diff --git a/packages/db/src/client.ts b/packages/db/src/client.ts index dc42ac1..f3014aa 100644 --- a/packages/db/src/client.ts +++ b/packages/db/src/client.ts @@ -1,14 +1,14 @@ +import { config } from "@basango/domain/config"; import { drizzle } from "drizzle-orm/node-postgres"; import { Pool } from "pg"; -import { env } from "#db/config"; import * as schema from "#db/schema"; const isDevelopment = process.env.NODE_ENV === "development"; const pool = new Pool({ allowExitOnIdle: true, - connectionString: env("BASANGO_DATABASE_URL"), + connectionString: config.database.url, connectionTimeoutMillis: 15_000, idleTimeoutMillis: isDevelopment ? 5_000 : 60_000, max: isDevelopment ? 8 : 12, diff --git a/packages/db/src/config.ts b/packages/db/src/config.ts deleted file mode 100644 index df46ad7..0000000 --- a/packages/db/src/config.ts +++ /dev/null @@ -1,20 +0,0 @@ -import path from "node:path"; - -import { loadConfig } from "@devscast/config"; -import { z } from "zod"; - -const PROJECT_DIR = path.resolve(__dirname, "../"); - -export const { env, config } = loadConfig({ - env: { - knownKeys: [ - "BASANGO_DATABASE_URL", - "BASANGO_SOURCE_DATABASE_HOST", - "BASANGO_SOURCE_DATABASE_USER", - "BASANGO_SOURCE_DATABASE_PASS", - "BASANGO_SOURCE_DATABASE_NAME", - ] as const, - path: path.join(PROJECT_DIR, ".env"), - }, - schema: z.object({}), -}); diff --git a/packages/db/src/queries/articles.ts b/packages/db/src/queries/articles.ts index 4099560..30b11d6 100644 --- a/packages/db/src/queries/articles.ts +++ b/packages/db/src/queries/articles.ts @@ -12,7 +12,7 @@ import { import { md5 } from "@basango/encryption"; import type { SQL } from "drizzle-orm"; import { count, desc, eq, getTableColumns, sql } from "drizzle-orm"; -import { v7 as uuidV7 } from "uuid"; +import * as uuid from "uuid"; import { Database } from "#db/client"; import { getSourceIdByName } from "#db/queries/sources"; @@ -56,7 +56,7 @@ export async function createArticle(db: Database, params: CreateArticleParams) { const [result] = await db .insert(articles) - .values({ id: uuidV7(), ...data }) + .values({ id: uuid.v7(), ...data }) .returning({ id: articles.id, sourceId: articles.sourceId, diff --git a/packages/db/src/queries/sources.ts b/packages/db/src/queries/sources.ts index 6949faa..415d6e3 100644 --- a/packages/db/src/queries/sources.ts +++ b/packages/db/src/queries/sources.ts @@ -1,7 +1,7 @@ import { DEFAULT_CATEGORY_SHARES_LIMIT, DEFAULT_TIMEZONE } from "@basango/domain/constants"; import { ID, Publication, Publications } from "@basango/domain/models"; import { eq, sql } from "drizzle-orm"; -import { v7 as uuidV7 } from "uuid"; +import * as uuid from "uuid"; import { Database } from "#db/client"; import { NotFoundError } from "#db/errors"; @@ -32,7 +32,7 @@ export async function getSources(db: Database) { export async function createSource(db: Database, params: CreateSourceParams) { const [result] = await db .insert(sources) - .values({ id: uuidV7(), ...params }) + .values({ id: uuid.v7(), ...params }) .returning(); return result; diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index 5a5e200..efa09c4 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -114,10 +114,7 @@ export const articles = pgTable( title: varchar({ length: 1024 }).notNull(), tokenStatistics: jsonb("token_statistics").$type(), tsv: tsvector("tsv").generatedAlwaysAs( - sql`( - setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::"char") - || setweight(to_tsvector('french'::regconfig, COALESCE(body, ''::text)), 'B'::"char") - )`, + sql`setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::"char")`, ), updatedAt: timestamp("updated_at"), }, diff --git a/packages/db/src/synchronizers/data.ts b/packages/db/src/synchronizers/data.ts index f8273eb..89a9201 100644 --- a/packages/db/src/synchronizers/data.ts +++ b/packages/db/src/synchronizers/data.ts @@ -2,10 +2,10 @@ /** biome-ignore-all lint/correctness/noUnusedPrivateClassMembers: false positive */ +import { config } from "@basango/domain/config"; import { RowDataPacket } from "mysql2/promise"; import { Pool, PoolClient } from "pg"; -import { env } from "#db/config"; import { computeReadingTime } from "#db/utils/computed"; type SourceOptions = { @@ -598,13 +598,13 @@ async function main() { const engine = new Engine( { - database: env("BASANGO_SOURCE_DATABASE_NAME"), - host: env("BASANGO_SOURCE_DATABASE_HOST"), - password: env("BASANGO_SOURCE_DATABASE_PASS"), - user: env("BASANGO_SOURCE_DATABASE_USER"), + database: config.database.legacy.name, + host: config.database.legacy.host, + password: config.database.legacy.password, + user: config.database.legacy.user, }, { - database: env("BASANGO_DATABASE_URL"), + database: config.database.url, }, ); diff --git a/packages/db/src/synchronizers/tokens.ts b/packages/db/src/synchronizers/tokens.ts index 7c26a84..3619d88 100644 --- a/packages/db/src/synchronizers/tokens.ts +++ b/packages/db/src/synchronizers/tokens.ts @@ -1,8 +1,8 @@ #!/usr/bin/env bun +import { config } from "@basango/domain/config"; import { Pool } from "pg"; -import { env } from "#db/config"; import { computeTokenStatistics } from "#db/utils/computed"; type ArticleRow = { @@ -114,7 +114,7 @@ class Engine { } async function main() { - const engine = new Engine(env("BASANGO_DATABASE_URL")); + const engine = new Engine(config.database.url); try { await engine.synchronize(); diff --git a/packages/domain/config/api.json b/packages/domain/config/api.json new file mode 100644 index 0000000..f46a9d1 --- /dev/null +++ b/packages/domain/config/api.json @@ -0,0 +1,31 @@ +{ + "api": { + "cors": { + "allowedHeaders": [ + "Authorization", + "Content-Type", + "accept-language", + "x-trpc-source", + "x-user-locale", + "x-user-timezone", + "x-user-country" + ], + "allowMethods": ["GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"], + "exposeHeaders": ["Content-Length"], + "maxAge": 86400 + }, + "security": { + "accessTokenTtl": "15m", + "audience": "basango_dashboard", + "crawlerToken": "%env(BASANGO_API_CRAWLER_TOKEN)%", + "issuer": "basango_api", + "jwtSecret": "%env(BASANGO_API_JWT_SECRET)%", + "refreshTokenTtl": "7d" + }, + "server": { + "host": "%env(BASANGO_API_HOST)%", + "port": "%env(number:BASANGO_API_PORT)%", + "version": "1.0.0" + } + } +} diff --git a/packages/domain/config/crawler.json b/packages/domain/config/crawler.json new file mode 100644 index 0000000..8e2456c --- /dev/null +++ b/packages/domain/config/crawler.json @@ -0,0 +1,262 @@ +{ + "crawler": { + "backend": { + "endpoint": "%env(BASANGO_API_CRAWLER_ENDPOINT)%", + "token": "%env(BASANGO_API_CRAWLER_TOKEN)%" + }, + "fetch": { + "async": { + "prefix": "basango:crawler", + "queues": { + "details": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS)%", + "listing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_LISTING)%", + "processing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING)%" + }, + "redisUrl": "%env(BASANGO_CRAWLER_ASYNC_REDIS_URL)%", + "ttl": { + "default": 600, + "failure": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_FAILURE)%", + "result": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_RESULT)%" + } + }, + "client": { + "backoffInitial": 1, + "backoffMax": 30, + "backoffMultiplier": 2, + "followRedirects": true, + "maxRetries": "%env(number:BASANGO_CRAWLER_FETCH_MAX_RETRIES)%", + "respectRetryAfter": "%env(boolean:BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER)%", + "rotate": true, + "timeout": 20, + "userAgent": "%env(BASANGO_CRAWLER_FETCH_USER_AGENT)%", + "verifySsl": true + }, + "crawler": { + "direction": "%env(BASANGO_CRAWLER_UPDATE_DIRECTION)%", + "maxWorkers": 5, + "notify": false, + "useMultiThreading": false + } + }, + "paths": { + "data": "%env(BASANGO_CRAWLER_DATA_PATH)%", + "root": "%env(BASANGO_CRAWLER_ROOT_PATH)%" + }, + "sources": { + "html": [ + { + "paginationTemplate": "actualite", + "requiresDetails": true, + "requiresRateLimit": false, + "sourceDate": {}, + "sourceId": "radiookapi.net", + "sourceKind": "html", + "sourceSelectors": { + "articleBody": ".field-name-body", + "articleCategories": ".views-field-field-cat-gorie a", + "articleDate": "head > meta[property=\"article:published_time\"]", + "articleLink": ".views-field-title a", + "articles": ".view-content > .views-row.content-row", + "articleTitle": "h1.page-header", + "pagination": "ul.pagination > li.pager-last > a" + }, + "sourceUrl": "https://www.radiookapi.net", + "supportsCategories": false + }, + { + "categories": ["politique", "economie", "culture", "sport", "societe"], + "paginationTemplate": "index.php/category/{category}", + "requiresDetails": true, + "requiresRateLimit": false, + "sourceDate": {}, + "sourceId": "7sur7.cd", + "sourceKind": "html", + "sourceSelectors": { + "articleBody": "div[property=\"schema:text\"].field.field--name-body", + "articleDate": "head > meta[property=\"article:published_time\"]", + "articleLink": ".views-field-title a", + "articles": ".view-content > .row.views-row", + "articleTitle": ".views-field-title a", + "pagination": "ul.pagination > li.pager__item.pager__item--last > a" + }, + "sourceUrl": "https://7sur7.cd", + "supportsCategories": true + }, + { + "paginationTemplate": "articles.html", + "requiresDetails": true, + "requiresRateLimit": false, + "sourceDate": { + "format": "dd.MM.yyyy" + }, + "sourceId": "mediacongo.net", + "sourceKind": "html", + "sourceSelectors": { + "articleBody": ".article_ttext", + "articleCategories": "a.color_link", + "articleDate": ".article_other_about", + "articleLink": "a:first-child", + "articles": ".for_aitems > .article_other_item", + "articleTitle": "h1", + "pagination": "div.pagination > div > a:last-child" + }, + "sourceUrl": "https://www.mediacongo.net", + "supportsCategories": false + }, + { + "paginationTemplate": "actualite", + "requiresDetails": true, + "requiresRateLimit": false, + "sourceDate": {}, + "sourceId": "actualite.cd", + "sourceKind": "html", + "sourceSelectors": { + "articleBody": ".views-field.views-field-body .field-content", + "articleCategories": "#actu-cat", + "articleDate": "head > meta[property=\"article:published_time\"]", + "articleLink": "#actu-titre a", + "articles": "#views-bootstrap-taxonomy-term-page-2 > div > div", + "articleTitle": "h1.page-title" + }, + "sourceUrl": "https://actualite.cd", + "supportsCategories": false + } + ], + "wordpress": [ + { + "requiresRateLimit": true, + "sourceId": "beto.cd", + "sourceKind": "wordpress", + "sourceUrl": "https://beto.cd" + }, + { "sourceId": "newscd.net", "sourceKind": "wordpress", "sourceUrl": "https://newscd.net" }, + { + "sourceId": "africanewsrdc.net", + "sourceKind": "wordpress", + "sourceUrl": "https://www.africanewsrdc.net" + }, + { + "sourceId": "angazainstitute.ac.cd", + "sourceKind": "wordpress", + "sourceUrl": "https://angazainstitute.ac.cd" + }, + { "sourceId": "b-onetv.cd", "sourceKind": "wordpress", "sourceUrl": "https://b-onetv.cd" }, + { + "sourceId": "bukavufm.com", + "sourceKind": "wordpress", + "sourceUrl": "https://bukavufm.com" + }, + { + "sourceId": "changement7.net", + "sourceKind": "wordpress", + "sourceUrl": "https://changement7.net" + }, + { + "sourceId": "congoactu.net", + "sourceKind": "wordpress", + "sourceUrl": "https://congoactu.net" + }, + { + "sourceId": "congoindependant.com", + "sourceKind": "wordpress", + "sourceUrl": "https://www.congoindependant.com" + }, + { + "sourceId": "congoquotidien.com", + "sourceKind": "wordpress", + "sourceUrl": "https://www.congoquotidien.com" + }, + { + "sourceId": "cumulard.cd", + "sourceKind": "wordpress", + "sourceUrl": "https://www.cumulard.cd" + }, + { + "sourceId": "environews-rdc.net", + "sourceKind": "wordpress", + "sourceUrl": "https://environews-rdc.net" + }, + { + "sourceId": "freemediardc.info", + "sourceKind": "wordpress", + "sourceUrl": "https://www.freemediardc.info" + }, + { + "sourceId": "geopolismagazine.org", + "sourceKind": "wordpress", + "sourceUrl": "https://geopolismagazine.org" + }, + { + "sourceId": "habarirdc.net", + "sourceKind": "wordpress", + "sourceUrl": "https://habarirdc.net" + }, + { + "sourceId": "infordc.com", + "sourceKind": "wordpress", + "sourceUrl": "https://infordc.com" + }, + { + "sourceId": "kilalopress.net", + "sourceKind": "wordpress", + "sourceUrl": "https://kilalopress.net" + }, + { + "sourceId": "laprosperiteonline.net", + "sourceKind": "wordpress", + "sourceUrl": "https://laprosperiteonline.net" + }, + { + "sourceId": "laprunellerdc.cd", + "sourceKind": "wordpress", + "sourceUrl": "https://laprunellerdc.cd" + }, + { + "sourceId": "lesmedias.net", + "sourceKind": "wordpress", + "sourceUrl": "https://lesmedias.net" + }, + { + "sourceId": "lesvolcansnews.net", + "sourceKind": "wordpress", + "sourceUrl": "https://lesvolcansnews.net" + }, + { + "sourceId": "netic-news.net", + "sourceKind": "wordpress", + "sourceUrl": "https://www.netic-news.net" + }, + { + "sourceId": "objectif-infos.cd", + "sourceKind": "wordpress", + "sourceUrl": "https://objectif-infos.cd" + }, + { + "sourceId": "scooprdc.net", + "sourceKind": "wordpress", + "sourceUrl": "https://scooprdc.net" + }, + { + "sourceId": "journaldekinshasa.com", + "sourceKind": "wordpress", + "sourceUrl": "https://www.journaldekinshasa.com" + }, + { + "sourceId": "lepotentiel.cd", + "sourceKind": "wordpress", + "sourceUrl": "https://lepotentiel.cd" + }, + { + "sourceId": "acturdc.com", + "sourceKind": "wordpress", + "sourceUrl": "https://acturdc.com" + }, + { + "sourceId": "matininfos.net", + "sourceKind": "wordpress", + "sourceUrl": "https://matininfos.net" + } + ] + } + } +} diff --git a/packages/domain/config/database.json b/packages/domain/config/database.json new file mode 100644 index 0000000..f16a2a8 --- /dev/null +++ b/packages/domain/config/database.json @@ -0,0 +1,12 @@ +{ + "database": { + "legacy": { + "host": "%env(BASANGO_DATABASE_LEGACY_HOST)%", + "name": "%env(BASANGO_DATABASE_LEGACY_NAME)%", + "password": "%env(BASANGO_DATABASE_LEGACY_PASSWORD)%", + "port": "%env(number:BASANGO_DATABASE_LEGACY_PORT)%", + "user": "%env(BASANGO_DATABASE_LEGACY_USER)%" + }, + "url": "%env(BASANGO_DATABASE_URL)%" + } +} diff --git a/packages/domain/config/encryption.json b/packages/domain/config/encryption.json new file mode 100644 index 0000000..3af5c28 --- /dev/null +++ b/packages/domain/config/encryption.json @@ -0,0 +1,9 @@ +{ + "encryption": { + "algorithm": "aes-256-gcm", + "authTagLength": 16, + "bcryptSaltRounds": 12, + "ivLength": 16, + "key": "%env(BASANGO_ENCRYPTION_KEY)%" + } +} diff --git a/packages/domain/config/logger.json b/packages/domain/config/logger.json new file mode 100644 index 0000000..d8c6254 --- /dev/null +++ b/packages/domain/config/logger.json @@ -0,0 +1,5 @@ +{ + "logger": { + "level": "%env(BASANGO_LOGGER_LEVEL)%" + } +} diff --git a/packages/domain/config/shared.json b/packages/domain/config/shared.json new file mode 100644 index 0000000..9809fcd --- /dev/null +++ b/packages/domain/config/shared.json @@ -0,0 +1,15 @@ +{ + "shared": { + "categorySharesLimit": 10, + "dateFormat": "yyyy-LL-dd", + "dateTimeFormat": "yyyy-LL-dd'T'HH:mm:ss", + "name": "Basango", + "pagination": { + "defaultLimit": 20, + "maxLimit": 100, + "page": 1 + }, + "publicationGraphDays": 30, + "timezone": "Africa/Lubumbashi" + } +} diff --git a/packages/domain/package.json b/packages/domain/package.json index 3f1dc9a..efd2ae0 100644 --- a/packages/domain/package.json +++ b/packages/domain/package.json @@ -7,6 +7,7 @@ "@basango/tsconfig": "workspace:*" }, "exports": { + "./config": "./src/config/index.ts", "./constants": "./src/constants.ts", "./crawler": "./src/crawler/index.ts", "./models": "./src/models/index.ts" diff --git a/packages/domain/src/config/api.ts b/packages/domain/src/config/api.ts new file mode 100644 index 0000000..2f10e3a --- /dev/null +++ b/packages/domain/src/config/api.ts @@ -0,0 +1,29 @@ +import z from "zod"; + +export const ApiConfigurationSchema = z.object({ + cors: z.object({ + allowedHeaders: z.array(z.string()).default([]), + allowMethods: z.array(z.string()).default([]), + exposeHeaders: z.array(z.string()).default([]), + maxAge: z.number().int().min(0).optional(), + origin: z + .array(z.string()) + .optional() + .default(["http://localhost:3000", "http://127.0.0.1:3000", "https://dashboard.basango.io"]), + }), + security: z.object({ + accessTokenTtl: z.string(), + audience: z.string(), + crawlerToken: z.string(), + issuer: z.string(), + jwtSecret: z.string(), + refreshTokenTtl: z.string(), + }), + server: z.object({ + host: z.string().default("localhost"), + port: z.number().int().min(1).max(65535).default(3080), + version: z.string().default("1.0.0"), + }), +}); + +export type ApiConfiguration = z.infer; diff --git a/packages/domain/src/config/crawler.ts b/packages/domain/src/config/crawler.ts new file mode 100644 index 0000000..6d80679 --- /dev/null +++ b/packages/domain/src/config/crawler.ts @@ -0,0 +1,107 @@ +import { z } from "zod"; + +import { SOURCE_KINDS } from "../constants"; +import { PageRangeSchema, TimestampRangeSchema, UpdateDirectionSchema } from "../models"; + +export const SourceKindSchema = z.enum(SOURCE_KINDS); + +export const SourceDateSchema = z.object({ + format: z.string().default("yyyy-LL-dd HH:mm"), +}); + +const SourceOptionsSchema = z.object({ + categories: z.array(z.string()).default([]), + requiresDetails: z.boolean().default(false), + requiresRateLimit: z.boolean().default(false), + sourceDate: SourceDateSchema, + sourceId: z.string(), + sourceKind: SourceKindSchema, + sourceUrl: z.url(), + supportsCategories: z.boolean().default(false), +}); + +export const HtmlSourceOptionsSchema = SourceOptionsSchema.extend({ + paginationTemplate: z.string(), + sourceKind: z.literal("html"), + sourceSelectors: z.object({ + articleBody: z.string(), + articleCategories: z.string().optional(), + articleDate: z.string(), + articleLink: z.string(), + articles: z.string(), + articleTitle: z.string(), + pagination: z.string().default("ul.pagination > li a"), + }), +}); + +export const WordPressSourceOptionsSchema = SourceOptionsSchema.extend({ + sourceDate: SourceDateSchema.default(SourceDateSchema.parse({ format: "yyyy-LL-dd'T'HH:mm:ss" })), + sourceKind: z.literal("wordpress"), +}); + +export const CrawlerConfigurationSchema = z.object({ + backend: z.object({ + endpoint: z.url(), + token: z.string(), + }), + fetch: z.object({ + async: z.object({ + prefix: z.string().default("basango:crawler:queue"), + queues: z.object({ + details: z.string().default("details"), + listing: z.string().default("listing"), + processing: z.string().default("processing"), + }), + redisUrl: z.string().default("redis://localhost:6379/0"), + ttl: z.object({ + default: z.number().int().positive().default(600), + failure: z.number().int().nonnegative().default(3600), + result: z.number().int().nonnegative().default(3600), + }), + }), + client: z.object({ + backoffInitial: z.number().nonnegative().default(1), + backoffMax: z.number().nonnegative().default(30), + backoffMultiplier: z.number().positive().default(2), + followRedirects: z.boolean().default(true), + maxRetries: z.number().int().nonnegative().default(3), + respectRetryAfter: z.boolean().default(true), + rotate: z.boolean().default(true), + timeout: z.number().positive().default(20), + userAgent: z.string().default("Basango/0.1 (+https://github.com/bernard-ng/basango)"), + verifySsl: z.boolean().default(true), + }), + crawler: z.object({ + category: z.string().optional(), + dateRange: TimestampRangeSchema.optional(), + direction: UpdateDirectionSchema.default("forward"), + isUpdate: z.boolean().default(false), + maxWorkers: z.number().int().positive().default(5), + notify: z.boolean().default(false), + pageRange: PageRangeSchema.optional(), + source: z.union([HtmlSourceOptionsSchema, WordPressSourceOptionsSchema]).optional(), + useMultiThreading: z.boolean().default(false), + }), + }), + paths: z.object({ + data: z.string(), + root: z.string(), + }), + sources: z.object({ + html: z.array(HtmlSourceOptionsSchema).default([]), + wordpress: z.array(WordPressSourceOptionsSchema).default([]), + }), +}); + +// types +export type SourceKind = z.infer; +export type SourceDate = z.infer; +export type HtmlSourceOptions = z.infer; +export type WordPressSourceOptions = z.infer; +export type AnySourceOptions = HtmlSourceOptions | WordPressSourceOptions; + +export type CrawlerConfiguration = z.infer; +export type CrawlerHttpOptions = CrawlerConfiguration["fetch"]["client"]; +export type CrawlerFetchingOptions = CrawlerConfiguration["fetch"]["crawler"]; +export type CrawlerAsyncOptions = CrawlerConfiguration["fetch"]["async"]; +export type CrawlerBackendOptions = CrawlerConfiguration["backend"]; diff --git a/packages/domain/src/config/database.ts b/packages/domain/src/config/database.ts new file mode 100644 index 0000000..5c31b9d --- /dev/null +++ b/packages/domain/src/config/database.ts @@ -0,0 +1,15 @@ +import z from "zod"; + +export const DatabaseConfigurationSchema = z.object({ + legacy: z.object({ + host: z.string().min(1), + name: z.string().min(1), + password: z.string().min(1), + port: z.number().optional(), + user: z.string().min(1), + }), + url: z.string().min(1), +}); + +// types +export type DatabaseConfiguration = z.infer; diff --git a/packages/domain/src/config/encryption.ts b/packages/domain/src/config/encryption.ts new file mode 100644 index 0000000..869b6a5 --- /dev/null +++ b/packages/domain/src/config/encryption.ts @@ -0,0 +1,18 @@ +import z from "zod"; + +import { + DEFAULT_AUTH_TAG_LENGTH, + DEFAULT_BCRYPT_SALT_ROUNDS, + DEFAULT_IV_LENGTH, +} from "../constants"; + +export const EncryptionConfigurationSchema = z.object({ + algorithm: z.enum(["aes-128-gcm", "aes-192-gcm", "aes-256-gcm"]), + authTagLength: z.number().nonnegative().default(DEFAULT_AUTH_TAG_LENGTH), + bcryptSaltRounds: z.number().nonnegative().default(DEFAULT_BCRYPT_SALT_ROUNDS), + ivLength: z.number().nonnegative().default(DEFAULT_IV_LENGTH), + key: z.string(), +}); + +// types +export type EncryptionConfiguration = z.infer; diff --git a/packages/domain/src/config/index.ts b/packages/domain/src/config/index.ts new file mode 100644 index 0000000..ccf3b95 --- /dev/null +++ b/packages/domain/src/config/index.ts @@ -0,0 +1,72 @@ +import path from "node:path"; + +import { defineConfig } from "@devscast/config"; +import z from "zod"; + +import { ApiConfigurationSchema } from "./api"; +import { CrawlerConfigurationSchema } from "./crawler"; +import { DatabaseConfigurationSchema } from "./database"; +import { EncryptionConfigurationSchema } from "./encryption"; +import { LoggerConfigurationSchema } from "./logger"; +import { SharedConfigurationSchema } from "./shared"; + +export * from "./api"; +export * from "./crawler"; +export * from "./database"; +export * from "./encryption"; +export * from "./logger"; +export * from "./shared"; + +const root = path.resolve(__dirname, "../../../../"); +const domain = path.join(root, "packages", "domain", "config"); + +export const { env, config } = defineConfig({ + env: { + knownKeys: [ + "NODE_ENV", + "BASANGO_API_HOST", + "BASANGO_API_PORT", + "BASANGO_API_ALLOWED_ORIGINS", + "BASANGO_API_KEY", + "BASANGO_API_CRAWLER_TOKEN", + "BASANGO_API_JWT_SECRET", + "BASANGO_DATABASE_URL", + "BASANGO_DATABASE_LEGACY_HOST", + "BASANGO_DATABASE_LEGACY_PASSWORD", + "BASANGO_DATABASE_LEGACY_NAME", + "BASANGO_DATABASE_LEGACY_USER", + "BASANGO_CRAWLER_ROOT_PATH", + "BASANGO_CRAWLER_DATA_PATH", + "BASANGO_CRAWLER_LOGS_PATH", + "BASANGO_CRAWLER_CONFIG_PATH", + "BASANGO_CRAWLER_UPDATE_DIRECTION", + "BASANGO_CRAWLER_FETCH_USER_AGENT", + "BASANGO_CRAWLER_FETCH_MAX_RETRIES", + "BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER", + "BASANGO_CRAWLER_ASYNC_REDIS_URL", + "BASANGO_CRAWLER_ASYNC_TTL_RESULT", + "BASANGO_CRAWLER_ASYNC_TTL_FAILURE", + "BASANGO_CRAWLER_ASYNC_QUEUE_LISTING", + "BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS", + "BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING", + "BASANGO_ENCRYPTION_KEY", + ] as const, + path: path.join(root, ".env"), + }, + schema: z.object({ + api: ApiConfigurationSchema, + crawler: CrawlerConfigurationSchema, + database: DatabaseConfigurationSchema, + encryption: EncryptionConfigurationSchema, + logger: LoggerConfigurationSchema, + shared: SharedConfigurationSchema, + }), + sources: [ + path.join(domain, "api.json"), + path.join(domain, "crawler.json"), + path.join(domain, "database.json"), + path.join(domain, "encryption.json"), + path.join(domain, "logger.json"), + path.join(domain, "shared.json"), + ], +}); diff --git a/packages/domain/src/config/logger.ts b/packages/domain/src/config/logger.ts new file mode 100644 index 0000000..ecad1ba --- /dev/null +++ b/packages/domain/src/config/logger.ts @@ -0,0 +1,8 @@ +import z from "zod"; + +export const LoggerConfigurationSchema = z.object({ + level: z.string().default("info"), +}); + +// types +export type LoggerConfiguration = z.infer; diff --git a/packages/domain/src/config/shared.ts b/packages/domain/src/config/shared.ts new file mode 100644 index 0000000..ee3906b --- /dev/null +++ b/packages/domain/src/config/shared.ts @@ -0,0 +1,17 @@ +import z from "zod"; + +export const SharedConfigurationSchema = z.object({ + categorySharesLimit: z.number().int().min(1).default(10), + dateFormat: z.string(), + dateTimeFormat: z.string(), + name: z.string().default("Basango"), + pagination: z.object({ + defaultLimit: z.number().int().min(1).max(100), + maxLimit: z.number().int().min(1).max(100), + page: z.number().int().min(1), + }), + publicationGraphDays: z.number().int().min(1), + timezone: z.string(), +}); + +export type SharedConfiguration = z.infer; diff --git a/packages/domain/src/constants.ts b/packages/domain/src/constants.ts index e044a75..76f0111 100644 --- a/packages/domain/src/constants.ts +++ b/packages/domain/src/constants.ts @@ -1,10 +1,8 @@ -// Domain-specific constants and types export const BIAS = ["neutral", "slightly", "partisan", "extreme"] as const; export const RELIABILITY = ["trusted", "reliable", "average", "low_trust", "unreliable"] as const; export const TRANSPARENCY = ["high", "medium", "low"] as const; export const SENTIMENT = ["positive", "neutral", "negative"] as const; -// Crawler-related constants and types export const UPDATE_DIRECTIONS = ["forward", "backward"] as const; export const SOURCE_KINDS = ["wordpress", "html"] as const; @@ -32,5 +30,5 @@ export const DEFAULT_AUTH_TAG_LENGTH = 16; export const DEFAULT_BCRYPT_SALT_ROUNDS = 12; export const DEFAULT_TOKEN_AUDIENCE = "basango_dashboard"; export const DEFAULT_TOKEN_ISSUER = "basango_api"; -export const DEFAULT_ACCESS_TOKEN_TTL = "15m"; +export const DEFAULT_ACCESS_TOKEN_TTL = "35m"; export const DEFAULT_REFRESH_TOKEN_TTL = "7d"; diff --git a/packages/domain/src/crawler/config.ts b/packages/domain/src/crawler/config.ts deleted file mode 100644 index 8224955..0000000 --- a/packages/domain/src/crawler/config.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { z } from "zod"; - -import { SOURCE_KINDS } from "#domain/constants"; - -// schemas -export const SourceKindSchema = z.enum(SOURCE_KINDS); - -export const SourceDateSchema = z.object({ - format: z.string().default("yyyy-LL-dd HH:mm"), -}); - -const SourceConfigSchema = z.object({ - categories: z.array(z.string()).default([]), - requiresDetails: z.boolean().default(false), - requiresRateLimit: z.boolean().default(false), - sourceDate: SourceDateSchema, - sourceId: z.string(), - sourceKind: SourceKindSchema, - sourceUrl: z.url(), - supportsCategories: z.boolean().default(false), -}); - -export const HtmlSourceConfigSchema = SourceConfigSchema.extend({ - paginationTemplate: z.string(), - sourceKind: z.literal("html"), - sourceSelectors: z.object({ - articleBody: z.string(), - articleCategories: z.string().optional(), - articleDate: z.string(), - articleLink: z.string(), - articles: z.string(), - articleTitle: z.string(), - pagination: z.string().default("ul.pagination > li a"), - }), -}); - -export const WordPressSourceConfigSchema = SourceConfigSchema.extend({ - sourceDate: SourceDateSchema.default(SourceDateSchema.parse({ format: "yyyy-LL-dd'T'HH:mm:ss" })), - sourceKind: z.literal("wordpress"), -}); - -// types -export type SourceKind = z.infer; -export type SourceDate = z.infer; -export type HtmlSourceConfig = z.infer; -export type WordPressSourceConfig = z.infer; -export type AnySourceConfig = HtmlSourceConfig | WordPressSourceConfig; diff --git a/packages/domain/src/crawler/index.ts b/packages/domain/src/crawler/index.ts deleted file mode 100644 index e8d9c43..0000000 --- a/packages/domain/src/crawler/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export * from "./config"; -export * from "./schemas"; diff --git a/packages/domain/src/models/articles.ts b/packages/domain/src/models/articles.ts index aeef020..8614e8e 100644 --- a/packages/domain/src/models/articles.ts +++ b/packages/domain/src/models/articles.ts @@ -1,185 +1,65 @@ -import { z } from "@hono/zod-openapi"; - -import { idSchema, sentimentSchema } from "#domain/models/shared"; +import z from "zod"; +import { idSchema, sentimentSchema } from "./shared"; import { sourceSchema } from "./sources"; // schemas export const articleMetadataSchema = z.object({ - author: z.string().optional().openapi({ - description: "The author of the article.", - example: "John Doe", - }), - description: z.string().optional().openapi({ - description: "A brief description or summary of the article.", - example: "This article discusses the latest advancements in AI technology.", - }), - image: z.url().optional().openapi({ - description: "The URL of the main image associated with the article.", - example: "https://example.com/image.jpg", - }), - publishedAt: z.date().optional().openapi({ - description: "The publication date of the article as a Date object.", - example: "2023-01-01T00:00:00Z", - }), - title: z.string().optional().openapi({ - description: "The title of the article for metadata purposes.", - example: "The Rise of AI", - }), - updatedAt: z.date().optional().openapi({ - description: "The last updated date of the article as a Date object.", - example: "2023-01-02T12:00:00Z", - }), - url: z.url().optional().openapi({ - description: "The canonical URL of the article.", - example: "https://example.com/article", - }), + author: z.string().optional(), + description: z.string().optional(), + image: z.url().optional(), + publishedAt: z.date().optional(), + title: z.string().optional(), + updatedAt: z.date().optional(), + url: z.url().optional(), }); export const tokenStatisticsSchema = z.object({ - body: z.number().optional().default(0).openapi({ - description: "The number of tokens in the article body.", - example: 250, - }), - categories: z.number().optional().default(0).openapi({ - description: "The number of tokens in the article categories.", - example: 3, - }), - excerpt: z.number().optional().default(0).openapi({ - description: "The number of tokens in the article excerpt.", - example: 50, - }), - title: z.number().optional().default(0).openapi({ - description: "The number of tokens in the article title.", - example: 10, - }), - total: z.number().optional().default(0).openapi({ - description: "The total number of tokens in the article.", - example: 313, - }), + body: z.number().optional().default(0), + categories: z.number().optional().default(0), + excerpt: z.number().optional().default(0), + title: z.number().optional().default(0), + total: z.number().optional().default(0), }); export const articleSchema = z.object({ - body: z.string().min(1).openapi({ - description: "The main content of the article.", - example: "This is the body of the article...", - }), - categories: z.array(z.string()).openapi({ - description: "The categories or tags associated with the article.", - example: ["Technology", "AI"], - }), - createdAt: z.date().openapi({ - description: "The date and time when the article was created in the system.", - example: "2023-01-01T12:00:00Z", - }), - excerpt: z.string().optional().openapi({ - description: "A brief excerpt or summary of the article.", - example: "This article discusses the latest advancements in AI technology.", - }), - hash: z.string().min(1).openapi({ - description: "The unique hash of the article link.", - example: "d41d8cd98f00b204e9800998ecf8427e", - }), + body: z.string().min(1), + categories: z.array(z.string()), + createdAt: z.date(), + excerpt: z.string().optional(), + hash: z.string().min(1), id: idSchema, - image: z.url().optional().openapi({ - description: "The URL of the main image associated with the article.", - example: "https://example.com/image.jpg", - }), - link: z.string().url().openapi({ - description: "The URL of the article.", - example: "https://example.com/article", - }), + image: z.url().optional(), + link: z.url(), metadata: articleMetadataSchema.optional(), - publishedAt: z.date().openapi({ - description: "The publication date of the article as a Date object.", - example: "2023-01-01T00:00:00Z", - }), - readingTime: z.number().int().min(1).openapi({ - description: "Estimated reading time of the article in minutes.", - example: 5, - }), + publishedAt: z.date(), + readingTime: z.number().int().min(1), source: sourceSchema.optional(), - sourceId: z.union([z.uuid(), z.string().min(1)]).openapi({ - description: "The unique identifier of the source from which the article was crawled.", - example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g", - }), - title: z.string().min(1).openapi({ - description: "The title of the article.", - example: "The Rise of AI", - }), + sourceId: z.union([z.uuid(), z.string().min(1)]), + title: z.string().min(1), tokenStatistics: tokenStatisticsSchema.optional(), - updatedAt: z.date().optional().openapi({ - description: "The date and time when the article was last updated in the system.", - example: "2023-01-02T12:00:00Z", - }), + updatedAt: z.date().optional(), }); // API -export const createArticleSchema = z - .object({ - body: z.string().min(1).openapi({ - description: "The main content of the article.", - example: "This is the body of the article...", - }), - categories: z - .array(z.string()) - .openapi({ - description: "The categories or tags associated with the article.", - example: ["Technology", "AI"], - }) - .optional() - .default([]), - hash: z.string().min(1).openapi({ - description: "The unique hash of the article link.", - example: "d41d8cd98f00b204e9800998ecf8427e", - }), - link: z.string().url().openapi({ - description: "The URL of the article.", - example: "https://example.com/article", - }), - metadata: articleMetadataSchema.optional(), - publishedAt: z - .string() - .refine((value) => !Number.isNaN(Date.parse(value)), { - message: "Invalid date format", - }) - .transform((value) => new Date(value)) - .openapi({ - description: "The publication date of the article in ISO 8601 format.", - example: "2023-01-01T00:00:00Z", - }), - sourceId: z.string().openapi({ - description: "The unique identifier of the source from which the article was crawled.", - example: "radiookapi.net", - }), - title: z.string().min(1).openapi({ - description: "The title of the article.", - example: "The Rise of AI", - }), - }) - .openapi("CreateArticle"); +export const createArticleSchema = z.object({ + body: z.string().min(1), + categories: z.array(z.string()).optional().default([]), + hash: z.string().min(1), + link: z.url(), + metadata: articleMetadataSchema.optional(), + publishedAt: z.coerce.date(), + sourceId: z.string(), + title: z.string().min(1), +}); -export const createArticleResponseSchema = z - .object({ id: idSchema, sourceId: idSchema }) - .openapi("CreateArticleResponse"); +export const createArticleResponseSchema = z.object({ id: idSchema, sourceId: idSchema }); export const getArticlesSchema = z.object({ - category: z.string().min(1).max(255).optional().openapi({ - description: "Filter articles by a specific category.", - example: "Technology", - }), - cursor: z.string().nullable().optional().openapi({ - description: "Optional cursor for fetching the next page of articles.", - }), - limit: z.number().int().min(1).max(100).optional().openapi({ - default: 10, - description: "Maximum number of articles to return per page.", - example: 20, - }), - search: z.string().max(512).optional().openapi({ - description: "Full-text search query applied to article titles and bodies.", - example: "gouvernement congolais", - }), + category: z.string().min(1).max(255).optional(), + cursor: z.string().nullable().optional(), + limit: z.number().int().min(1).max(100).optional(), + search: z.string().max(512).optional(), sentiment: sentimentSchema.optional(), sourceId: idSchema.optional(), }); diff --git a/packages/domain/src/models/auth.ts b/packages/domain/src/models/auth.ts index 610a63a..dea516f 100644 --- a/packages/domain/src/models/auth.ts +++ b/packages/domain/src/models/auth.ts @@ -1,18 +1,10 @@ -import { z } from "@hono/zod-openapi"; +import z from "zod"; export const loginSchema = z.object({ - email: z.email().openapi({ - description: "Email address used to authenticate the user.", - example: "user@example.com", - }), - password: z.string().min(8).openapi({ - description: "Account password.", - example: "••••••••", - }), + email: z.email(), + password: z.string().min(8), }); export const refreshSessionSchema = z.object({ - refreshToken: z.string().min(1).openapi({ - description: "Refresh token returned when logging in.", - }), + refreshToken: z.string().min(1), }); diff --git a/packages/domain/src/crawler/schemas.ts b/packages/domain/src/models/crawler.ts similarity index 95% rename from packages/domain/src/crawler/schemas.ts rename to packages/domain/src/models/crawler.ts index 75cc8ac..5a86518 100644 --- a/packages/domain/src/crawler/schemas.ts +++ b/packages/domain/src/models/crawler.ts @@ -1,6 +1,6 @@ -import { z } from "zod"; +import z from "zod"; -import { UPDATE_DIRECTIONS } from "#domain/constants"; +import { UPDATE_DIRECTIONS } from "../constants"; // schemas export const UpdateDirectionSchema = z.enum(UPDATE_DIRECTIONS); diff --git a/packages/domain/src/models/index.ts b/packages/domain/src/models/index.ts index c163752..cbd73c1 100644 --- a/packages/domain/src/models/index.ts +++ b/packages/domain/src/models/index.ts @@ -1,5 +1,6 @@ export * from "./articles"; export * from "./auth"; +export * from "./crawler"; export * from "./reports"; export * from "./shared"; export * from "./sources"; diff --git a/packages/domain/src/models/reports.ts b/packages/domain/src/models/reports.ts index 01831f9..57875ec 100644 --- a/packages/domain/src/models/reports.ts +++ b/packages/domain/src/models/reports.ts @@ -1,30 +1,17 @@ -import { z } from "@hono/zod-openapi"; +import z from "zod"; -import { deltaSchema } from "#domain/models/shared"; +import { deltaSchema } from "./shared"; -export const overviewMetricSchema = z - .object({ - delta: deltaSchema.openapi({ - description: "Change measured over the last 30 days compared to the previous 30-day window.", - }), - total: z.number().int().nonnegative().openapi({ - description: "Total count across the entire dataset.", - example: 12584, - }), - }) - .openapi({ - description: "Aggregated metric with total count and delta metadata.", - }); +export const overviewMetricSchema = z.object({ + delta: deltaSchema, + total: z.number().int().nonnegative(), +}); -export const dashboardOverviewSchema = z - .object({ - articles: overviewMetricSchema, - sources: overviewMetricSchema, - users: overviewMetricSchema, - }) - .openapi({ - description: "Dashboard overview metrics for key entities.", - }); +export const dashboardOverviewSchema = z.object({ + articles: overviewMetricSchema, + sources: overviewMetricSchema, + users: overviewMetricSchema, +}); export type OverviewMetric = z.infer; export type DashboardOverview = z.infer; diff --git a/packages/domain/src/models/shared.ts b/packages/domain/src/models/shared.ts index 31f88b3..f5a7679 100644 --- a/packages/domain/src/models/shared.ts +++ b/packages/domain/src/models/shared.ts @@ -1,138 +1,50 @@ import { z } from "@hono/zod-openapi"; -import { BIAS, RELIABILITY, SENTIMENT, TRANSPARENCY } from "#domain/constants"; +import { BIAS, RELIABILITY, SENTIMENT, TRANSPARENCY } from "../constants"; // schemas -export const idSchema = z.uuid().openapi({ - description: "The unique identifier of the resource.", - example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g", +export const idSchema = z.uuid(); + +export const dateRangeSchema = z.object({ + end: z.coerce.date(), + start: z.coerce.date(), }); -export const dateRangeSchema = z - .object({ - end: z.date().openapi({ - description: "The end date of the range.", - example: "2023-01-30T23:59:59Z", - }), - start: z.date().openapi({ - description: "The start date of the range.", - example: "2023-01-01T00:00:00Z", - }), - }) - .openapi({ - description: "Inclusive date range for publication metrics.", - }); +export const limitSchema = z.number().int().min(1).max(100); +export const sentimentSchema = z.enum(SENTIMENT); +export const biasSchema = z.enum(BIAS); +export const reliabilitySchema = z.enum(RELIABILITY); +export const transparencySchema = z.enum(TRANSPARENCY); -export const limitSchema = z.number().int().min(1).max(100).openapi({ - default: 10, - description: "The maximum number of items to return.", - example: 10, +export const credibilitySchema = z.object({ + bias: biasSchema.default("neutral"), + reliability: reliabilitySchema.default("average"), + transparency: transparencySchema.default("medium"), }); -export const sentimentSchema = z.enum(SENTIMENT).openapi({ - description: "Sentiment detected for the article.", - example: "positive", +export const deviceSchema = z.object({ + client: z.string().optional(), + device: z.string().optional(), + isBot: z.boolean(), + operatingSystem: z.string().optional(), }); -export const biasSchema = z.enum(BIAS).openapi({ - description: "The bias level of the source.", - example: "neutral", +export const geoLocationSchema = z.object({ + accuracyRadius: z.number().optional(), + city: z.string().optional(), + country: z.string().optional(), + latitude: z.number().optional(), + longitude: z.number().optional(), + timeZone: z.string().optional(), }); -export const reliabilitySchema = z.enum(RELIABILITY).openapi({ - description: "The reliability level of the source.", - example: "trusted", +export const distrubtionSchema = z.object({ + count: z.number().int(), + id: idSchema, + name: z.string(), + percentage: z.number(), }); -export const transparencySchema = z.enum(TRANSPARENCY).openapi({ - description: "The transparency level of the source.", - example: "high", -}); - -export const credibilitySchema = z - .object({ - bias: biasSchema.default("neutral"), - reliability: reliabilitySchema.default("average"), - transparency: transparencySchema.default("medium"), - }) - .openapi({ - description: "Credibility information about the resource.", - }); - -export const deviceSchema = z - .object({ - client: z.string().optional().openapi({ - description: "The client software of the device.", - example: "Chrome 90", - }), - device: z.string().optional().openapi({ - description: "The device model.", - example: "Dell XPS 13", - }), - isBot: z.boolean().openapi({ - description: "Indicates if the device is a bot.", - example: false, - }), - operatingSystem: z.string().optional().openapi({ - description: "The operating system of the device.", - example: "Windows 10", - }), - }) - .openapi({ - description: "Information about the user's device.", - }); - -export const geoLocationSchema = z - .object({ - accuracyRadius: z.number().optional().openapi({ - description: "The accuracy radius in kilometers.", - example: 50, - }), - city: z.string().optional().openapi({ - description: "The city of the user.", - example: "San Francisco", - }), - country: z.string().optional().openapi({ - description: "The country of the user.", - example: "United States", - }), - latitude: z.number().optional().openapi({ - description: "The latitude of the user's location.", - example: 37.7749, - }), - longitude: z.number().optional().openapi({ - description: "The longitude of the user's location.", - example: -122.4194, - }), - timeZone: z.string().optional().openapi({ - description: "The time zone of the user.", - example: "America/Los_Angeles", - }), - }) - .openapi({ - description: "Geolocation information about the user.", - }); - -export const distrubtionSchema = z - .object({ - count: z.number().int().openapi({ - description: "The count of items in the distribution.", - example: 42, - }), - id: idSchema, - name: z.string().openapi({ - description: "The name of the distribution.", - example: "Technology", - }), - percentage: z.number().openapi({ - description: "The percentage of items in the distribution.", - example: 12.5, - }), - }) - .openapi({ - description: "Distribution information.", - }); - export const getDistributionsSchema = z.object({ id: idSchema.optional(), limit: limitSchema.optional(), @@ -143,172 +55,60 @@ export const getPublicationsSchema = z.object({ range: dateRangeSchema.optional(), }); -export const distributionsSchema = z - .object({ - items: z.array(distrubtionSchema).openapi({ - description: "List of distributions.", - }), - total: z.number().int().openapi({ - description: "Total number of distributions.", - example: 100, - }), - }) - .openapi({ - description: "Distributions data.", - }); +export const distributionsSchema = z.object({ + items: z.array(distrubtionSchema), + total: z.number().int(), +}); -export const publicationSchema = z - .object({ - count: z.number().int().openapi({ - description: "The number of articles published on that date.", - example: 42, - }), - date: z.string().openapi({ - description: "The date of the publication.", - example: "2023-01-15", - }), - }) - .openapi({ - description: "Publication metrics for a specific date.", - }); +export const publicationSchema = z.object({ + count: z.number().int(), + date: z.string(), +}); -export const deltaSchema = z - .object({ - delta: z.number().openapi({ - description: "The absolute change in value.", - example: 10, - }), - percentage: z.number().openapi({ - description: "The percentage change in value.", - example: 25.0, - }), - sign: z.enum(["+", "-"]).openapi({ - description: "The sign of the change.", - example: "+", - }), - variant: z.enum(["increase", "decrease", "positive"]).openapi({ - description: "The variant of the change.", - example: "increase", - }), - }) - .openapi({ - description: "Delta information representing change over time.", - }); +export const deltaSchema = z.object({ + delta: z.number(), + percentage: z.number(), + sign: z.enum(["+", "-"]), + variant: z.enum(["increase", "decrease", "positive"]), +}); -export const publicationMetaSchema = z - .object({ - current: z.number().openapi({ - description: "The current total value.", - example: 150, - }), - delta: deltaSchema, - previous: z.number().openapi({ - description: "The previous total value.", - example: 120, - }), - }) - .openapi({ - description: "Metadata for publication metrics.", - }); +export const publicationMetaSchema = z.object({ + current: z.number(), + delta: deltaSchema, + previous: z.number(), +}); -export const publicationsSchema = z - .object({ - items: z.array(publicationSchema).openapi({ - description: "List of publication metrics for the source.", - }), - meta: publicationMetaSchema.optional(), - }) - .openapi({ - description: "Publication metrics for the source.", - }); +export const publicationsSchema = z.object({ + items: z.array(publicationSchema), + meta: publicationMetaSchema.optional(), +}); -export const paginationCursorSchema = z - .object({ - date: z.string().openapi({ - description: "The date associated with the last item in the current page.", - example: "2023-01-15", - }), - id: z.string().openapi({ - description: "The unique identifier of the last item in the current page.", - example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g", - }), - }) - .openapi({ - description: "Cursor information for pagination.", - }); +export const paginationCursorSchema = z.object({ + date: z.string(), + id: z.string(), +}); -export const paginationRequestSchema = z - .object({ - cursor: z.string().nullable().optional().openapi({ - description: "The pagination cursor for cursor-based pagination.", - example: - "eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==", - }), - limit: limitSchema.optional(), - page: z.number().int().min(1).optional().openapi({ - description: "The page number to retrieve.", - example: 1, - }), - }) - .openapi({ - description: "Pagination request parameters.", - }); +export const paginationRequestSchema = z.object({ + cursor: z.string().nullable().optional(), + limit: limitSchema.optional(), + page: z.number().nonnegative().default(1).optional(), +}); -export const paginationStateSchema = z - .object({ - cursor: z.string().nullable().openapi({ - description: "The current pagination cursor.", - example: - "eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==", - }), - limit: z.number().int().openapi({ - description: "The number of items per page.", - example: 10, - }), - offset: z.number().int().openapi({ - description: "The offset for the current page.", - example: 0, - }), - page: z.number().int().openapi({ - description: "The current page number.", - example: 1, - }), - payload: paginationCursorSchema.nullable().openapi({ - description: "The decoded payload from the pagination cursor.", - }), - }) - .openapi({ - description: "Internal pagination state.", - }); +export const paginationStateSchema = z.object({ + cursor: z.string().nullable(), + limit: z.number().int(), + offset: z.number().int(), + page: z.number().int(), + payload: paginationCursorSchema.nullable(), +}); -export const paginationMetaSchema = z - .object({ - current: z.number().int().openapi({ - description: "The current page number or offset.", - example: 1, - }), - cursor: z.string().nullable().openapi({ - description: "The current pagination cursor.", - example: - "eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==", - }), - hasNext: z.boolean().openapi({ - description: "Indicates if there is a next page available.", - example: true, - }), - limit: z.number().int().openapi({ - description: "The number of items per page.", - example: 10, - }), - nextCursor: z.string().nullable().openapi({ - description: "The next pagination cursor, if available.", - example: - "eyJkYXRlIjoiMjAyMy0wMS0yMCIsImlkIjoiZDRmNWU2ZTAtNzY4Ny00Y2E3LTg5ZTItYjY0ZGI3Y2E3ZGIifQ==", - }), - }) - .openapi({ - description: "Pagination metadata.", - }); +export const paginationMetaSchema = z.object({ + current: z.number().int(), + cursor: z.string().nullable(), + hasNext: z.boolean(), + limit: z.number().int(), + nextCursor: z.string().nullable(), +}); // types export type PaginatedResult = { diff --git a/packages/domain/src/models/sources.ts b/packages/domain/src/models/sources.ts index ff5cef4..5150588 100644 --- a/packages/domain/src/models/sources.ts +++ b/packages/domain/src/models/sources.ts @@ -1,37 +1,17 @@ -import { z } from "@hono/zod-openapi"; +import z from "zod"; -import { - credibilitySchema, - idSchema, - limitSchema, - publicationsSchema, -} from "#domain/models/shared"; +import { credibilitySchema, idSchema, limitSchema, publicationsSchema } from "./shared"; // schemas export const sourceSchema = z.object({ - articles: z.number().int().min(0).optional().openapi({ - description: "The total number of articles from this source.", - example: 1250, - }), + articles: z.number().int().min(0).optional(), credibility: credibilitySchema.optional(), - description: z.string().max(1024).optional().openapi({ - description: "A brief description of the source.", - example: "Radio Okapi is a Congolese radio station that provides news and information.", - }), - displayName: z.string().min(1).max(255).optional().openapi({ - description: "The display name of the source.", - example: "Radio Okapi", - }), + description: z.string().max(1024).optional(), + displayName: z.string().min(1).max(255).optional(), id: idSchema, - name: z.string().min(1).max(255).openapi({ - description: "The name of the source.", - example: "radiookapi.com", - }), + name: z.string().min(1).max(255), publications: publicationsSchema.optional(), - url: z.url().max(255).openapi({ - description: "The URL of the source.", - example: "https://techcrunch.com", - }), + url: z.url().max(255), }); export const createSourceSchema = sourceSchema.pick({ diff --git a/packages/encryption/.env b/packages/encryption/.env deleted file mode 100644 index e365018..0000000 --- a/packages/encryption/.env +++ /dev/null @@ -1 +0,0 @@ -BASANGO_ENCRYPTION_KEY=testkey diff --git a/packages/encryption/src/index.ts b/packages/encryption/src/index.ts index 3ed7cc3..6b3e8d5 100644 --- a/packages/encryption/src/index.ts +++ b/packages/encryption/src/index.ts @@ -1,18 +1,10 @@ import crypto from "node:crypto"; -import { - DEFAULT_AUTH_TAG_LENGTH, - DEFAULT_BCRYPT_SALT_ROUNDS, - DEFAULT_ENCRYPTION_ALGORITHM, - DEFAULT_IV_LENGTH, -} from "@basango/domain/constants"; -import { createEnvAccessor } from "@devscast/config"; +import { config } from "@basango/domain/config"; import * as bcrypt from "bcrypt"; -export const env = createEnvAccessor(["BASANGO_ENCRYPTION_KEY"] as const); - function getKey(): Buffer { - const key = env("BASANGO_ENCRYPTION_KEY"); + const key = config.encryption.key; if (Buffer.from(key, "hex").length !== 32) { throw new Error("BASANGO_ENCRYPTION_KEY must be a 64-character hex string (32 bytes)."); @@ -20,6 +12,12 @@ function getKey(): Buffer { return Buffer.from(key, "hex"); } +const getEncryptionSettings = () => ({ + algorithm: config.encryption.algorithm as crypto.CipherGCMTypes, + authTagLength: config.encryption.authTagLength, + ivLength: config.encryption.ivLength, +}); + /** * Encrypts a plaintext string using AES-256-GCM. * @param text The plaintext string to encrypt. @@ -27,8 +25,9 @@ function getKey(): Buffer { */ export function encrypt(text: string): string { const key = getKey(); - const iv = crypto.randomBytes(DEFAULT_IV_LENGTH); - const cipher = crypto.createCipheriv(DEFAULT_ENCRYPTION_ALGORITHM, key, iv); + const { algorithm, ivLength } = getEncryptionSettings(); + const iv = crypto.randomBytes(ivLength); + const cipher = crypto.createCipheriv(algorithm, key, iv); let encrypted = cipher.update(text, "utf8", "hex"); encrypted += cipher.final("hex"); @@ -50,17 +49,15 @@ export function encrypt(text: string): string { */ export function decrypt(encryptedPayload: string): string { const key = getKey(); + const { algorithm, authTagLength, ivLength } = getEncryptionSettings(); const dataBuffer = Buffer.from(encryptedPayload, "base64"); // Extract IV, auth tag, and encrypted data - const iv = dataBuffer.subarray(0, DEFAULT_IV_LENGTH); - const authTag = dataBuffer.subarray( - DEFAULT_IV_LENGTH, - DEFAULT_IV_LENGTH + DEFAULT_AUTH_TAG_LENGTH, - ); - const encryptedText = dataBuffer.subarray(DEFAULT_IV_LENGTH + DEFAULT_AUTH_TAG_LENGTH); + const iv = dataBuffer.subarray(0, ivLength); + const authTag = dataBuffer.subarray(ivLength, ivLength + authTagLength); + const encryptedText = dataBuffer.subarray(ivLength + authTagLength); - const decipher = crypto.createDecipheriv(DEFAULT_ENCRYPTION_ALGORITHM, key, iv); + const decipher = crypto.createDecipheriv(algorithm, key, iv); decipher.setAuthTag(authTag); let decrypted = decipher.update(encryptedText.toString("hex"), "hex", "utf8"); @@ -82,7 +79,8 @@ export function generateRandomBytes(size: number): string { } export async function hashPassword(password: string): Promise { - return bcrypt.hash(password, DEFAULT_BCRYPT_SALT_ROUNDS); + const rounds = config.encryption.bcryptSaltRounds; + return bcrypt.hash(password, rounds); } export async function verifyPassword(password: string, hashed: string): Promise { diff --git a/packages/logger/package.json b/packages/logger/package.json index 9c5ab91..96c57f0 100644 --- a/packages/logger/package.json +++ b/packages/logger/package.json @@ -1,6 +1,6 @@ { "dependencies": { - "@devscast/config": "catalog:", + "@basango/domain": "workspace:*", "pino": "^10.1.0", "pino-pretty": "^13.1.2" }, diff --git a/packages/logger/src/index.ts b/packages/logger/src/index.ts index 38540e7..4bf6993 100644 --- a/packages/logger/src/index.ts +++ b/packages/logger/src/index.ts @@ -1,12 +1,9 @@ -import { createEnvAccessor } from "@devscast/config"; +import { config } from "@basango/domain/config"; import pino from "pino"; -const env = createEnvAccessor(["LOG_LEVEL", "NODE_ENV"] as const); - export const logger = pino({ - level: env("LOG_LEVEL", { default: "info" }), - // Use pretty printing in development, structured JSON in production - ...(env("NODE_ENV") !== "production" && { + level: config.logger.level, + ...(process.env.NODE_ENV !== "production" && { transport: { options: { colorize: true, diff --git a/packages/logger/tsconfig.json b/packages/logger/tsconfig.json index c2116b5..3d60716 100644 --- a/packages/logger/tsconfig.json +++ b/packages/logger/tsconfig.json @@ -1,4 +1,10 @@ { + "compilerOptions": { + "paths": { + "#domain/*": ["../domain/src/*"], + "#logger/*": ["./src/*"] + } + }, "exclude": ["node_modules"], "extends": "@basango/tsconfig/base.json", "include": ["src"]