refactor: centralize configuration
This commit is contained in:
@@ -1,4 +0,0 @@
|
||||
FROM nginx:1.27.1-alpine
|
||||
|
||||
COPY default.conf /etc/nginx/conf.d/default.conf
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
root /var/www/public;
|
||||
|
||||
add_header X-Frame-Options "SAMEORIGIN";
|
||||
add_header X-XSS-Protection "1; mode=block";
|
||||
add_header X-Content-Type-Options "nosniff";
|
||||
|
||||
index index.html index.htm index.php;
|
||||
|
||||
charset utf-8;
|
||||
|
||||
location / {
|
||||
root /var/www/;
|
||||
try_files /public/$uri /public/$uri /assets/$uri /index.php?$query_string;
|
||||
}
|
||||
|
||||
location = /favicon.ico { access_log off; log_not_found off; }
|
||||
location = /robots.txt { access_log off; log_not_found off; }
|
||||
|
||||
error_page 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 421 422 423 424 425 426 428 429 431 451 500 501 502 503 504 505 506 507 508 510 511 /error.html;
|
||||
|
||||
location ~ \.php$ {
|
||||
fastcgi_pass php:9000;
|
||||
fastcgi_index index.php;
|
||||
fastcgi_param SCRIPT_FILENAME $realpath_root$fastcgi_script_name;
|
||||
include fastcgi_params;
|
||||
fastcgi_buffers 16 16k;
|
||||
fastcgi_buffer_size 32k;
|
||||
}
|
||||
|
||||
location ~ /\.(?!well-known).* {
|
||||
deny all;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
FROM php:8.4-fpm-alpine
|
||||
|
||||
# Install dependencies
|
||||
RUN apk --no-cache add curl git wget bash dpkg
|
||||
|
||||
# Add PHP extensions
|
||||
ADD https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions /usr/local/bin/
|
||||
RUN chmod +x /usr/local/bin/install-php-extensions
|
||||
|
||||
RUN install-php-extensions opcache iconv soap
|
||||
RUN install-php-extensions zip intl fileinfo
|
||||
RUN install-php-extensions pdo redis mysqli pdo_mysql
|
||||
RUN install-php-extensions gd
|
||||
RUN install-php-extensions pgsql pdo_pgsql
|
||||
|
||||
# Composer
|
||||
RUN curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/bin/ --filename=composer
|
||||
|
||||
WORKDIR /var/www
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
# api
|
||||
BASANGO_API_HOST=localhost
|
||||
BASANGO_API_PORT=3080
|
||||
BASANGO_API_ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
|
||||
BASANGO_API_KEY=your_api_key_here
|
||||
BASANGO_API_CRAWLER_TOKEN=dev
|
||||
BASANGO_API_CRAWLER_ENDPOINT="http://localhost:3080/articles"
|
||||
BASANGO_API_JWT_SECRET=your_jwt_secret_here
|
||||
|
||||
# db
|
||||
BASANGO_DATABASE_URL="postgresql://postgres:postgres@localhost:5432/app?serverVersion=16&charset=utf8"
|
||||
BASANGO_DATABASE_LEGACY_HOST="localhost"
|
||||
BASANGO_DATABASE_LEGACY_PASSWORD="root"
|
||||
BASANGO_DATABASE_LEGACY_NAME="app"
|
||||
BASANGO_DATABASE_LEGACY_USER="root"
|
||||
BASANGO_DATABASE_LEGACY_PORT=3306
|
||||
|
||||
# logger
|
||||
BASANGO_LOGGER_LEVEL=debug
|
||||
|
||||
# crawler
|
||||
BASANGO_CRAWLER_ROOT_PATH=
|
||||
BASANGO_CRAWLER_DATA_PATH=
|
||||
BASANGO_CRAWLER_LOGS_PATH=
|
||||
BASANGO_CRAWLER_CONFIG_PATH=
|
||||
|
||||
BASANGO_CRAWLER_UPDATE_DIRECTION=forward
|
||||
BASANGO_CRAWLER_FETCH_USER_AGENT="Basango/0.1 (+https://github.com/bernard-ng/basango)"
|
||||
BASANGO_CRAWLER_FETCH_MAX_RETRIES=3
|
||||
BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER=true
|
||||
|
||||
BASANGO_CRAWLER_ASYNC_REDIS_URL="redis://localhost:6379/0"
|
||||
BASANGO_CRAWLER_ASYNC_TTL_RESULT=3600
|
||||
BASANGO_CRAWLER_ASYNC_TTL_FAILURE=3600
|
||||
BASANGO_CRAWLER_ASYNC_QUEUE_LISTING="listing"
|
||||
BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS="details"
|
||||
BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING="processing"
|
||||
|
||||
# encryption
|
||||
BASANGO_ENCRYPTION_KEY=testkey
|
||||
@@ -1,7 +0,0 @@
|
||||
NODE_ENV=development
|
||||
BASANGO_API_HOST=localhost
|
||||
BASANGO_API_PORT=3080
|
||||
BASANGO_API_ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
|
||||
BASANGO_API_KEY=your_api_key_here
|
||||
BASANGO_CRAWLER_TOKEN=dev
|
||||
BASANGO_JWT_SECRET=your_jwt_secret_here
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"cors": {
|
||||
"allowedHeaders": [
|
||||
"Authorization",
|
||||
"Content-Type",
|
||||
"accept-language",
|
||||
"x-trpc-source",
|
||||
"x-user-locale",
|
||||
"x-user-timezone",
|
||||
"x-user-country"
|
||||
],
|
||||
"allowMethods": ["GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"],
|
||||
"exposeHeaders": ["Content-Length"],
|
||||
"maxAge": 86400
|
||||
}
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"server": {
|
||||
"host": "%env(BASANGO_API_HOST)%",
|
||||
"port": "%env(number:BASANGO_API_PORT)%",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
}
|
||||
@@ -4,13 +4,10 @@
|
||||
"@basango/domain": "workspace:*",
|
||||
"@basango/encryption": "workspace:*",
|
||||
"@basango/logger": "workspace:*",
|
||||
"@devscast/config": "catalog:",
|
||||
"@hono/node-server": "^1.19.6",
|
||||
"@hono/trpc-server": "^0.4.0",
|
||||
"@hono/zod-openapi": "^1.1.4",
|
||||
"@scalar/hono-api-reference": "^0.9.24",
|
||||
"@trpc/server": "^11.7.1",
|
||||
"ai": "^5.0.89",
|
||||
"camelcase-keys": "^10.0.1",
|
||||
"date-fns": "catalog:",
|
||||
"hono-rate-limiter": "^0.4.2",
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
import path from "node:path";
|
||||
|
||||
import { loadConfig as defineConfig } from "@devscast/config";
|
||||
import { z } from "zod";
|
||||
|
||||
export const PROJECT_DIR = path.resolve(__dirname, "../");
|
||||
|
||||
const ServerConfigurationSchema = z.object({
|
||||
cors: z.object({
|
||||
allowedHeaders: z.array(z.string()).optional(),
|
||||
allowMethods: z.array(z.string()).optional(),
|
||||
exposeHeaders: z.array(z.string()).optional(),
|
||||
maxAge: z.number().int().min(0).optional(),
|
||||
origin: z
|
||||
.array(z.string())
|
||||
.optional()
|
||||
.default(["http://localhost:3000", "http://127.0.0.1:3000", "https://dashboard.basango.io"]),
|
||||
}),
|
||||
server: z.object({
|
||||
host: z.string().default("localhost"),
|
||||
port: z.number().int().min(1).max(65535).default(4000),
|
||||
version: z.string().default("1.0.0"),
|
||||
}),
|
||||
});
|
||||
|
||||
export const { env, config } = defineConfig({
|
||||
env: {
|
||||
knownKeys: [
|
||||
"BASANGO_API_HOST",
|
||||
"BASANGO_API_PORT",
|
||||
"BASANGO_API_ALLOWED_ORIGINS",
|
||||
"BASANGO_API_KEY",
|
||||
"BASANGO_CRAWLER_TOKEN",
|
||||
"BASANGO_JWT_SECRET",
|
||||
],
|
||||
path: path.join(PROJECT_DIR, ".env"),
|
||||
},
|
||||
schema: ServerConfigurationSchema,
|
||||
sources: [
|
||||
path.join(PROJECT_DIR, "config", "server.json"),
|
||||
path.join(PROJECT_DIR, "config", "cors.json"),
|
||||
],
|
||||
});
|
||||
|
||||
export type ServerConfiguration = z.infer<typeof ServerConfigurationSchema>;
|
||||
+8
-48
@@ -1,11 +1,10 @@
|
||||
import { config } from "@basango/domain/config";
|
||||
import { trpcServer } from "@hono/trpc-server";
|
||||
import { OpenAPIHono } from "@hono/zod-openapi";
|
||||
import { Scalar } from "@scalar/hono-api-reference";
|
||||
import { cors } from "hono/cors";
|
||||
import { logger } from "hono/logger";
|
||||
import { secureHeaders } from "hono/secure-headers";
|
||||
|
||||
import { config, env } from "#api/config";
|
||||
import { routers } from "#api/rest/routers";
|
||||
import { createTRPCContext } from "#api/trpc/init";
|
||||
import { appRouter } from "#api/trpc/routers/_app";
|
||||
@@ -18,11 +17,11 @@ app.use(secureHeaders());
|
||||
app.use(
|
||||
"*",
|
||||
cors({
|
||||
allowHeaders: config.cors.allowedHeaders,
|
||||
allowMethods: config.cors.allowMethods,
|
||||
exposeHeaders: config.cors.exposeHeaders,
|
||||
maxAge: config.cors.maxAge,
|
||||
origin: ["http://localhost:3000", "http://127.0.0.1:3000", "https://dashboard.basango.io"],
|
||||
allowHeaders: config.api.cors.allowedHeaders,
|
||||
allowMethods: config.api.cors.allowMethods,
|
||||
exposeHeaders: config.api.cors.exposeHeaders,
|
||||
maxAge: config.api.cors.maxAge,
|
||||
origin: config.api.cors.origin,
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -34,49 +33,10 @@ app.use(
|
||||
}),
|
||||
);
|
||||
|
||||
app.doc("/openapi", {
|
||||
info: {
|
||||
contact: {
|
||||
email: "engineering@basango.io",
|
||||
name: "Basango",
|
||||
url: "https://basango.io",
|
||||
},
|
||||
description: "Basango is a platform that leverages AI to revolutionize news curation.",
|
||||
license: {
|
||||
name: "AGPL-3.0 license",
|
||||
url: "https://github.com/bernard-ng/basango/blob/main/LICENSE",
|
||||
},
|
||||
title: "Basango API",
|
||||
version: "0.0.1",
|
||||
},
|
||||
openapi: "3.1.0",
|
||||
security: [
|
||||
{
|
||||
oauth2: [],
|
||||
},
|
||||
{ token: [] },
|
||||
],
|
||||
servers: [
|
||||
{
|
||||
description: "Production API",
|
||||
url: "https://api.basango.io",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Register security scheme
|
||||
app.openAPIRegistry.registerComponent("securitySchemes", "token", {
|
||||
description: "Default authentication mechanism",
|
||||
scheme: "bearer",
|
||||
type: "http",
|
||||
"x-speakeasy-example": env("BASANGO_API_KEY"),
|
||||
});
|
||||
|
||||
app.get("/", Scalar({ pageTitle: "Basango API", theme: "saturn", url: "/openapi" }));
|
||||
app.route("/", routers);
|
||||
|
||||
export default {
|
||||
fetch: app.fetch,
|
||||
hostname: config.server.host,
|
||||
port: config.server.port,
|
||||
hostname: config.api.server.host,
|
||||
port: config.api.server.port,
|
||||
};
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { config } from "@basango/domain/config";
|
||||
import type { MiddlewareHandler } from "hono";
|
||||
import { HTTPException } from "hono/http-exception";
|
||||
|
||||
import { env } from "#api/config";
|
||||
|
||||
export const withCrawlerAuth: MiddlewareHandler = async (c, next) => {
|
||||
const token = c.req.header("Authorization");
|
||||
|
||||
@@ -10,7 +9,7 @@ export const withCrawlerAuth: MiddlewareHandler = async (c, next) => {
|
||||
throw new HTTPException(401, { message: "Authorization header required" });
|
||||
}
|
||||
|
||||
if (token !== env("BASANGO_CRAWLER_TOKEN")) {
|
||||
if (token !== config.api.security.crawlerToken) {
|
||||
throw new HTTPException(403, { message: "Invalid token" });
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ export const authRouter = createTRPCRouter({
|
||||
if (!user || user.isLocked) {
|
||||
throw new TRPCError({
|
||||
code: "UNAUTHORIZED",
|
||||
message: "Invalid credentials.",
|
||||
message: "Account is locked",
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
+10
-17
@@ -1,15 +1,8 @@
|
||||
import { Database } from "@basango/db/client";
|
||||
import { getUserById } from "@basango/db/queries";
|
||||
import {
|
||||
DEFAULT_ACCESS_TOKEN_TTL,
|
||||
DEFAULT_REFRESH_TOKEN_TTL,
|
||||
DEFAULT_TOKEN_AUDIENCE,
|
||||
DEFAULT_TOKEN_ISSUER,
|
||||
} from "@basango/domain/constants";
|
||||
import { config } from "@basango/domain/config";
|
||||
import { type JWTPayload, SignJWT, jwtVerify } from "jose";
|
||||
|
||||
import { env } from "#api/config";
|
||||
|
||||
export type Session = {
|
||||
user: {
|
||||
id: string;
|
||||
@@ -39,7 +32,7 @@ export type SessionTokens = {
|
||||
const encoder = new TextEncoder();
|
||||
|
||||
function getSecretKey() {
|
||||
return encoder.encode(env("BASANGO_JWT_SECRET"));
|
||||
return encoder.encode(config.api.security.jwtSecret);
|
||||
}
|
||||
|
||||
export async function getSession(db: Database, accessToken?: string): Promise<Session | null> {
|
||||
@@ -74,24 +67,24 @@ async function createToken(session: Session, tokenType: TokenType, expiresIn: st
|
||||
})
|
||||
.setProtectedHeader({ alg: "HS256" })
|
||||
.setIssuedAt()
|
||||
.setAudience(DEFAULT_TOKEN_AUDIENCE)
|
||||
.setIssuer(DEFAULT_TOKEN_ISSUER)
|
||||
.setAudience(config.api.security.audience)
|
||||
.setIssuer(config.api.security.issuer)
|
||||
.setExpirationTime(expiresIn)
|
||||
.sign(getSecretKey());
|
||||
}
|
||||
|
||||
export async function createSessionTokens(session: Session): Promise<SessionTokens> {
|
||||
const [accessToken, refreshToken] = await Promise.all([
|
||||
createToken(session, "access", DEFAULT_ACCESS_TOKEN_TTL),
|
||||
createToken(session, "refresh", DEFAULT_REFRESH_TOKEN_TTL),
|
||||
createToken(session, "access", config.api.security.accessTokenTtl),
|
||||
createToken(session, "refresh", config.api.security.refreshTokenTtl),
|
||||
]);
|
||||
|
||||
const issuedAt = Date.now();
|
||||
const accessTokenExpiresAt = new Date(
|
||||
issuedAt + formatTTL(DEFAULT_ACCESS_TOKEN_TTL),
|
||||
issuedAt + formatTTL(config.api.security.accessTokenTtl),
|
||||
).toISOString();
|
||||
const refreshTokenExpiresAt = new Date(
|
||||
issuedAt + formatTTL(DEFAULT_REFRESH_TOKEN_TTL),
|
||||
issuedAt + formatTTL(config.api.security.refreshTokenTtl),
|
||||
).toISOString();
|
||||
|
||||
return {
|
||||
@@ -118,8 +111,8 @@ async function verifyToken(
|
||||
|
||||
try {
|
||||
const { payload } = await jwtVerify<VerifiedJWTPayload>(token, getSecretKey(), {
|
||||
audience: DEFAULT_TOKEN_AUDIENCE,
|
||||
issuer: DEFAULT_TOKEN_ISSUER,
|
||||
audience: config.api.security.audience,
|
||||
issuer: config.api.security.issuer,
|
||||
});
|
||||
|
||||
if (payload.tokenType !== expectedType) {
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
# paths
|
||||
BASANGO_CRAWLER_ROOT_PATH=
|
||||
BASANGO_CRAWLER_DATA_PATH=
|
||||
BASANGO_CRAWLER_LOGS_PATH=
|
||||
BASANGO_CRAWLER_CONFIG_PATH=
|
||||
|
||||
# crawler settings
|
||||
BASANGO_CRAWLER_UPDATE_DIRECTION=forward
|
||||
BASANGO_CRAWLER_FETCH_USER_AGENT="Basango/0.1 (+https://github.com/bernard-ng/basango)"
|
||||
BASANGO_CRAWLER_FETCH_MAX_RETRIES=3
|
||||
BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER=true
|
||||
|
||||
BASANGO_CRAWLER_ASYNC_REDIS_URL="redis://localhost:6379/0"
|
||||
BASANGO_CRAWLER_ASYNC_TTL_RESULT=3600
|
||||
BASANGO_CRAWLER_ASYNC_TTL_FAILURE=3600
|
||||
BASANGO_CRAWLER_ASYNC_QUEUE_LISTING="listing"
|
||||
BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS="details"
|
||||
BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING="processing"
|
||||
|
||||
BASANGO_CRAWLER_TOKEN="dev"
|
||||
BASANGO_CRAWLER_BACKEND_API_ENDPOINT="http://localhost:3080/articles"
|
||||
@@ -1,41 +0,0 @@
|
||||
{
|
||||
"fetch": {
|
||||
"async": {
|
||||
"prefix": "basango:crawler",
|
||||
"queues": {
|
||||
"details": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS)%",
|
||||
"listing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_LISTING)%",
|
||||
"processing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING)%"
|
||||
},
|
||||
"redisUrl": "%env(BASANGO_CRAWLER_ASYNC_REDIS_URL)%",
|
||||
"ttl": {
|
||||
"default": 600,
|
||||
"failure": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_FAILURE)%",
|
||||
"result": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_RESULT)%"
|
||||
}
|
||||
},
|
||||
"client": {
|
||||
"backoffInitial": 1,
|
||||
"backoffMax": 30,
|
||||
"backoffMultiplier": 2,
|
||||
"followRedirects": true,
|
||||
"maxRetries": "%env(number:BASANGO_CRAWLER_FETCH_MAX_RETRIES)%",
|
||||
"respectRetryAfter": "%env(boolean:BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER)%",
|
||||
"rotate": true,
|
||||
"timeout": 20,
|
||||
"userAgent": "%env(BASANGO_CRAWLER_FETCH_USER_AGENT)%",
|
||||
"verifySsl": true
|
||||
},
|
||||
"crawler": {
|
||||
"direction": "%env(BASANGO_CRAWLER_UPDATE_DIRECTION)%",
|
||||
"maxWorkers": 5,
|
||||
"notify": false,
|
||||
"useMultiThreading": false
|
||||
}
|
||||
},
|
||||
"paths": {
|
||||
"config": "%env(BASANGO_CRAWLER_CONFIG_PATH)%",
|
||||
"data": "%env(BASANGO_CRAWLER_DATA_PATH)%",
|
||||
"root": "%env(BASANGO_CRAWLER_ROOT_PATH)%"
|
||||
}
|
||||
}
|
||||
@@ -1,210 +0,0 @@
|
||||
{
|
||||
"sources": {
|
||||
"html": [
|
||||
{
|
||||
"paginationTemplate": "actualite",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {},
|
||||
"sourceId": "radiookapi.net",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": ".field-name-body",
|
||||
"articleCategories": ".views-field-field-cat-gorie a",
|
||||
"articleDate": "head > meta[property=\"article:published_time\"]",
|
||||
"articleLink": ".views-field-title a",
|
||||
"articles": ".view-content > .views-row.content-row",
|
||||
"articleTitle": "h1.page-header",
|
||||
"pagination": "ul.pagination > li.pager-last > a"
|
||||
},
|
||||
"sourceUrl": "https://www.radiookapi.net",
|
||||
"supportsCategories": false
|
||||
},
|
||||
{
|
||||
"categories": ["politique", "economie", "culture", "sport", "societe"],
|
||||
"paginationTemplate": "index.php/category/{category}",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {},
|
||||
"sourceId": "7sur7.cd",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": "div[property=\"schema:text\"].field.field--name-body",
|
||||
"articleDate": "head > meta[property=\"article:published_time\"]",
|
||||
"articleLink": ".views-field-title a",
|
||||
"articles": ".view-content > .row.views-row",
|
||||
"articleTitle": ".views-field-title a",
|
||||
"pagination": "ul.pagination > li.pager__item.pager__item--last > a"
|
||||
},
|
||||
"sourceUrl": "https://7sur7.cd",
|
||||
"supportsCategories": true
|
||||
},
|
||||
{
|
||||
"paginationTemplate": "articles.html",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {
|
||||
"format": "dd.MM.yyyy"
|
||||
},
|
||||
"sourceId": "mediacongo.net",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": ".article_ttext",
|
||||
"articleCategories": "a.color_link",
|
||||
"articleDate": ".article_other_about",
|
||||
"articleLink": "a:first-child",
|
||||
"articles": ".for_aitems > .article_other_item",
|
||||
"articleTitle": "h1",
|
||||
"pagination": "div.pagination > div > a:last-child"
|
||||
},
|
||||
"sourceUrl": "https://www.mediacongo.net",
|
||||
"supportsCategories": false
|
||||
},
|
||||
{
|
||||
"paginationTemplate": "actualite",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {},
|
||||
"sourceId": "actualite.cd",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": ".views-field.views-field-body .field-content",
|
||||
"articleCategories": "#actu-cat",
|
||||
"articleDate": "head > meta[property=\"article:published_time\"]",
|
||||
"articleLink": "#actu-titre a",
|
||||
"articles": "#views-bootstrap-taxonomy-term-page-2 > div > div",
|
||||
"articleTitle": "h1.page-title"
|
||||
},
|
||||
"sourceUrl": "https://actualite.cd",
|
||||
"supportsCategories": false
|
||||
}
|
||||
],
|
||||
"wordpress": [
|
||||
{
|
||||
"requiresRateLimit": true,
|
||||
"sourceId": "beto.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://beto.cd"
|
||||
},
|
||||
{ "sourceId": "newscd.net", "sourceKind": "wordpress", "sourceUrl": "https://newscd.net" },
|
||||
{
|
||||
"sourceId": "africanewsrdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.africanewsrdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "angazainstitute.ac.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://angazainstitute.ac.cd"
|
||||
},
|
||||
{ "sourceId": "b-onetv.cd", "sourceKind": "wordpress", "sourceUrl": "https://b-onetv.cd" },
|
||||
{
|
||||
"sourceId": "bukavufm.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://bukavufm.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "changement7.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://changement7.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "congoactu.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://congoactu.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "congoindependant.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.congoindependant.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "congoquotidien.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.congoquotidien.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "cumulard.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.cumulard.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "environews-rdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://environews-rdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "freemediardc.info",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.freemediardc.info"
|
||||
},
|
||||
{
|
||||
"sourceId": "geopolismagazine.org",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://geopolismagazine.org"
|
||||
},
|
||||
{
|
||||
"sourceId": "habarirdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://habarirdc.net"
|
||||
},
|
||||
{ "sourceId": "infordc.com", "sourceKind": "wordpress", "sourceUrl": "https://infordc.com" },
|
||||
{
|
||||
"sourceId": "kilalopress.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://kilalopress.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "laprosperiteonline.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://laprosperiteonline.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "laprunellerdc.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://laprunellerdc.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "lesmedias.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://lesmedias.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "lesvolcansnews.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://lesvolcansnews.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "netic-news.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.netic-news.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "objectif-infos.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://objectif-infos.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "scooprdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://scooprdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "journaldekinshasa.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.journaldekinshasa.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "lepotentiel.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://lepotentiel.cd"
|
||||
},
|
||||
{ "sourceId": "acturdc.com", "sourceKind": "wordpress", "sourceUrl": "https://acturdc.com" },
|
||||
{
|
||||
"sourceId": "matininfos.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://matininfos.net"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
import path from "node:path";
|
||||
|
||||
import {
|
||||
HtmlSourceConfigSchema,
|
||||
PageRangeSchema,
|
||||
TimestampRangeSchema,
|
||||
UpdateDirectionSchema,
|
||||
WordPressSourceConfigSchema,
|
||||
} from "@basango/domain/crawler";
|
||||
import { loadConfig as defineConfig } from "@devscast/config";
|
||||
import { z } from "zod";
|
||||
|
||||
export const PROJECT_DIR = path.resolve(__dirname, "../");
|
||||
|
||||
export const PipelineConfigSchema = z.object({
|
||||
fetch: z.object({
|
||||
async: z.object({
|
||||
prefix: z.string().default("basango:crawler:queue"),
|
||||
queues: z.object({
|
||||
details: z.string().default("details"),
|
||||
listing: z.string().default("listing"),
|
||||
processing: z.string().default("processing"),
|
||||
}),
|
||||
redisUrl: z.string().default("redis://localhost:6379/0"),
|
||||
ttl: z.object({
|
||||
default: z.number().int().positive().default(600),
|
||||
failure: z.number().int().nonnegative().default(3600),
|
||||
result: z.number().int().nonnegative().default(3600),
|
||||
}),
|
||||
}),
|
||||
client: z.object({
|
||||
backoffInitial: z.number().nonnegative().default(1),
|
||||
backoffMax: z.number().nonnegative().default(30),
|
||||
backoffMultiplier: z.number().positive().default(2),
|
||||
followRedirects: z.boolean().default(true),
|
||||
maxRetries: z.number().int().nonnegative().default(3),
|
||||
respectRetryAfter: z.boolean().default(true),
|
||||
rotate: z.boolean().default(true),
|
||||
timeout: z.number().positive().default(20),
|
||||
userAgent: z.string().default("Basango/0.1 (+https://github.com/bernard-ng/basango)"),
|
||||
verifySsl: z.boolean().default(true),
|
||||
}),
|
||||
crawler: z.object({
|
||||
category: z.string().optional(),
|
||||
dateRange: TimestampRangeSchema.optional(),
|
||||
direction: UpdateDirectionSchema.default("forward"),
|
||||
isUpdate: z.boolean().default(false),
|
||||
maxWorkers: z.number().int().positive().default(5),
|
||||
notify: z.boolean().default(false),
|
||||
pageRange: PageRangeSchema.optional(),
|
||||
source: z.union([HtmlSourceConfigSchema, WordPressSourceConfigSchema]).optional(),
|
||||
useMultiThreading: z.boolean().default(false),
|
||||
}),
|
||||
}),
|
||||
paths: z.object({
|
||||
config: z.string().default(path.join(PROJECT_DIR, "config")),
|
||||
data: z.string().default(path.join(PROJECT_DIR, "data", "datasets")),
|
||||
root: z.string().default(PROJECT_DIR),
|
||||
}),
|
||||
sources: z.object({
|
||||
html: z.array(HtmlSourceConfigSchema).default([]),
|
||||
wordpress: z.array(WordPressSourceConfigSchema).default([]),
|
||||
}),
|
||||
});
|
||||
|
||||
export const { config, env } = defineConfig({
|
||||
cwd: process.cwd(),
|
||||
env: {
|
||||
path: path.join(PROJECT_DIR, ".env"),
|
||||
},
|
||||
schema: PipelineConfigSchema,
|
||||
sources: [
|
||||
path.join(PROJECT_DIR, "config", "pipeline.json"),
|
||||
path.join(PROJECT_DIR, "config", "sources.json"),
|
||||
],
|
||||
});
|
||||
|
||||
export type PipelineConfig = z.infer<typeof PipelineConfigSchema>;
|
||||
export type FetchClientConfig = PipelineConfig["fetch"]["client"];
|
||||
export type FetchCrawlerConfig = PipelineConfig["fetch"]["crawler"];
|
||||
export type FetchAsyncConfig = PipelineConfig["fetch"]["async"];
|
||||
@@ -1,12 +1,12 @@
|
||||
import { setTimeout as delay } from "node:timers/promises";
|
||||
|
||||
import type { CrawlerHttpOptions } from "@basango/domain/config";
|
||||
import {
|
||||
DEFAULT_RETRY_AFTER_HEADER,
|
||||
DEFAULT_TRANSIENT_HTTP_STATUSES,
|
||||
DEFAULT_USER_AGENT,
|
||||
} from "@basango/domain/constants";
|
||||
|
||||
import { FetchClientConfig } from "#crawler/config";
|
||||
import { UserAgents } from "#crawler/http/user-agent";
|
||||
|
||||
export type HttpHeaders = Record<string, string>;
|
||||
@@ -71,7 +71,7 @@ const buildUrl = (url: string, params?: HttpParams): string => {
|
||||
* @param config - Fetch client configuration
|
||||
* @param attempt - Current attempt number
|
||||
*/
|
||||
const computeBackoff = (config: FetchClientConfig, attempt: number): number => {
|
||||
const computeBackoff = (config: CrawlerHttpOptions, attempt: number): number => {
|
||||
const base = Math.min(
|
||||
config.backoffInitial * config.backoffMultiplier ** attempt,
|
||||
config.backoffMax,
|
||||
@@ -101,26 +101,26 @@ const parseRetryAfter = (header: string): number => {
|
||||
* @author Bernard Ngandu <bernard@devscast.tech>
|
||||
*/
|
||||
export class BaseHttpClient {
|
||||
protected readonly config: FetchClientConfig;
|
||||
protected readonly options: CrawlerHttpOptions;
|
||||
protected readonly fetchImpl: typeof fetch;
|
||||
protected readonly sleep: (ms: number) => Promise<void>;
|
||||
protected readonly headers: HttpHeaders;
|
||||
|
||||
constructor(config: FetchClientConfig, options: HttpClientOptions = {}) {
|
||||
this.config = config;
|
||||
constructor(options: CrawlerHttpOptions, clientOptions: HttpClientOptions = {}) {
|
||||
this.options = options;
|
||||
const provider =
|
||||
options.userAgentProvider ??
|
||||
new UserAgents(config.rotate, config.userAgent ?? DEFAULT_USER_AGENT);
|
||||
const userAgent = provider.get() ?? config.userAgent ?? DEFAULT_USER_AGENT;
|
||||
clientOptions.userAgentProvider ??
|
||||
new UserAgents(options.rotate, options.userAgent ?? DEFAULT_USER_AGENT);
|
||||
const userAgent = provider.get() ?? options.userAgent ?? DEFAULT_USER_AGENT;
|
||||
|
||||
const baseHeaders: HttpHeaders = { "User-Agent": userAgent };
|
||||
if (options.defaultHeaders) {
|
||||
Object.assign(baseHeaders, options.defaultHeaders);
|
||||
if (clientOptions.defaultHeaders) {
|
||||
Object.assign(baseHeaders, clientOptions.defaultHeaders);
|
||||
}
|
||||
|
||||
this.headers = baseHeaders;
|
||||
this.fetchImpl = options.fetchImpl ?? fetch;
|
||||
this.sleep = options.sleep ?? defaultSleep;
|
||||
this.fetchImpl = clientOptions.fetchImpl ?? fetch;
|
||||
this.sleep = clientOptions.sleep ?? defaultSleep;
|
||||
}
|
||||
|
||||
protected buildHeaders(headers?: HttpHeaders): HeadersInit {
|
||||
@@ -136,13 +136,13 @@ export class BaseHttpClient {
|
||||
|
||||
if (response) {
|
||||
const retryAfter = response.headers.get(retryAfterHeader);
|
||||
if (retryAfter && this.config.respectRetryAfter) {
|
||||
if (retryAfter && this.options.respectRetryAfter) {
|
||||
waitMs = parseRetryAfter(retryAfter);
|
||||
}
|
||||
}
|
||||
|
||||
if (waitMs === 0) {
|
||||
waitMs = computeBackoff(this.config, attempt);
|
||||
waitMs = computeBackoff(this.options, attempt);
|
||||
}
|
||||
|
||||
if (waitMs > 0) {
|
||||
@@ -161,7 +161,7 @@ export class SyncHttpClient extends BaseHttpClient {
|
||||
const retryAfterHeader = options.retryAfterHeader ?? DEFAULT_RETRY_AFTER_HEADER;
|
||||
const target = buildUrl(url, options.params);
|
||||
|
||||
const maxAttempts = this.config.maxRetries + 1;
|
||||
const maxAttempts = this.options.maxRetries + 1;
|
||||
let attempt = 0;
|
||||
let lastError: unknown;
|
||||
|
||||
@@ -169,14 +169,14 @@ export class SyncHttpClient extends BaseHttpClient {
|
||||
const controller = new AbortController();
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
|
||||
try {
|
||||
timeoutHandle = setTimeout(() => controller.abort(), this.config.timeout * 1000);
|
||||
timeoutHandle = setTimeout(() => controller.abort(), this.options.timeout * 1000);
|
||||
|
||||
const headers = this.buildHeaders(options.headers);
|
||||
const init: RequestInit = {
|
||||
body: options.data as BodyInit | undefined,
|
||||
headers,
|
||||
method,
|
||||
redirect: this.config.followRedirects ? "follow" : "manual",
|
||||
redirect: this.options.followRedirects ? "follow" : "manual",
|
||||
signal: controller.signal,
|
||||
};
|
||||
|
||||
@@ -189,7 +189,7 @@ export class SyncHttpClient extends BaseHttpClient {
|
||||
|
||||
if (
|
||||
DEFAULT_TRANSIENT_HTTP_STATUSES.includes(response.status as number) &&
|
||||
attempt < this.config.maxRetries
|
||||
attempt < this.options.maxRetries
|
||||
) {
|
||||
await this.maybeDelay(attempt, response, retryAfterHeader);
|
||||
attempt += 1;
|
||||
@@ -209,12 +209,12 @@ export class SyncHttpClient extends BaseHttpClient {
|
||||
|
||||
if (error instanceof DOMException && error.name === "AbortError") {
|
||||
lastError = error;
|
||||
if (attempt >= this.config.maxRetries) {
|
||||
if (attempt >= this.options.maxRetries) {
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
lastError = error;
|
||||
if (attempt >= this.config.maxRetries) {
|
||||
if (attempt >= this.options.maxRetries) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { config } from "@basango/domain/config";
|
||||
import { DEFAULT_OPEN_GRAPH_USER_AGENT } from "@basango/domain/constants";
|
||||
import { ArticleMetadata } from "@basango/domain/models";
|
||||
import { parse } from "node-html-parser";
|
||||
|
||||
import { config } from "#crawler/config";
|
||||
import { SyncHttpClient } from "#crawler/http/http-client";
|
||||
import { UserAgents } from "#crawler/http/user-agent";
|
||||
import { createAbsoluteUrl } from "#crawler/utils";
|
||||
@@ -44,7 +44,7 @@ export class OpenGraph {
|
||||
private readonly client: Pick<SyncHttpClient, "get">;
|
||||
|
||||
constructor() {
|
||||
const settings = config.fetch.client;
|
||||
const settings = config.crawler.fetch.client;
|
||||
const provider = new UserAgents(true, DEFAULT_OPEN_GRAPH_USER_AGENT);
|
||||
|
||||
this.client = new SyncHttpClient(settings, {
|
||||
@@ -89,16 +89,28 @@ export class OpenGraph {
|
||||
root.querySelector("link[rel='canonical']")?.getAttribute("href") ?? null,
|
||||
url ?? null,
|
||||
]);
|
||||
const author = pick([extract(root, "article:author"), extract(root, "og:article:author")]);
|
||||
const publishedAt = pick([
|
||||
extract(root, "article:published_time"),
|
||||
extract(root, "og:article:published_time"),
|
||||
]);
|
||||
const updatedAt = pick([
|
||||
extract(root, "article:modified_time"),
|
||||
extract(root, "og:article:modified_time"),
|
||||
]);
|
||||
|
||||
if (!title && !description && !image && !canonical) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
author,
|
||||
description,
|
||||
image: createAbsoluteUrl(url, image ?? "") || undefined,
|
||||
publishedAt,
|
||||
title,
|
||||
updatedAt,
|
||||
url: createAbsoluteUrl(url, canonical ?? "") || undefined,
|
||||
};
|
||||
} as ArticleMetadata;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { HtmlSourceConfig, WordPressSourceConfig } from "@basango/domain/crawler";
|
||||
import type { HtmlSourceOptions, WordPressSourceOptions } from "@basango/domain/config";
|
||||
import { Article } from "@basango/domain/models";
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
@@ -24,7 +24,7 @@ export const collectHtmlListing = async (
|
||||
payload: ListingTaskPayload,
|
||||
manager: QueueManager = createQueueManager(),
|
||||
): Promise<number> => {
|
||||
const source = resolveSourceConfig(payload.sourceId) as HtmlSourceConfig;
|
||||
const source = resolveSourceConfig(payload.sourceId) as HtmlSourceOptions;
|
||||
if (source.sourceKind !== "html") {
|
||||
return await collectWordPressListing(payload, manager);
|
||||
}
|
||||
@@ -63,7 +63,7 @@ export const collectWordPressListing = async (
|
||||
payload: ListingTaskPayload,
|
||||
manager: QueueManager = createQueueManager(),
|
||||
): Promise<number> => {
|
||||
const source = resolveSourceConfig(payload.sourceId) as WordPressSourceConfig;
|
||||
const source = resolveSourceConfig(payload.sourceId) as WordPressSourceOptions;
|
||||
if (source.sourceKind !== "wordpress") {
|
||||
return await collectHtmlListing(payload, manager);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
|
||||
import { JobsOptions, Queue, QueueOptions } from "bullmq";
|
||||
import { type CrawlerAsyncOptions, config } from "@basango/domain/config";
|
||||
import { JobsOptions, Queue } from "bullmq";
|
||||
import IORedis from "ioredis";
|
||||
|
||||
import { FetchAsyncConfig, config } from "#crawler/config";
|
||||
import {
|
||||
DetailsTaskPayload,
|
||||
DetailsTaskPayloadSchema,
|
||||
@@ -20,28 +20,27 @@ export interface QueueBackend<T = unknown> {
|
||||
|
||||
export type QueueFactory = (
|
||||
queueName: string,
|
||||
settings: FetchAsyncConfig,
|
||||
options: CrawlerAsyncOptions,
|
||||
connection?: IORedis,
|
||||
) => QueueBackend;
|
||||
|
||||
const defaultQueueFactory: QueueFactory = (queueName, settings, connection) => {
|
||||
const defaultQueueFactory: QueueFactory = (queueName, options, connection) => {
|
||||
const redisConnection =
|
||||
connection ??
|
||||
new IORedis(settings.redisUrl, {
|
||||
...parseRedisUrl(settings.redisUrl),
|
||||
new IORedis(options.redisUrl, {
|
||||
...parseRedisUrl(options.redisUrl),
|
||||
maxRetriesPerRequest: null,
|
||||
});
|
||||
const options: QueueOptions = {
|
||||
connection: redisConnection,
|
||||
prefix: settings.prefix,
|
||||
};
|
||||
|
||||
const queue = new Queue(queueName, options);
|
||||
const queue = new Queue(queueName, {
|
||||
connection: redisConnection,
|
||||
prefix: options.prefix,
|
||||
});
|
||||
return {
|
||||
add: async (name, data, opts) => {
|
||||
const job = await queue.add(name, data, {
|
||||
removeOnComplete: settings.ttl.result === 0 ? true : undefined,
|
||||
removeOnFail: settings.ttl.failure === 0 ? true : undefined,
|
||||
removeOnComplete: options.ttl.result === 0 ? true : undefined,
|
||||
removeOnFail: options.ttl.failure === 0 ? true : undefined,
|
||||
...opts,
|
||||
});
|
||||
return { id: job.id ?? randomUUID() };
|
||||
@@ -55,7 +54,7 @@ export interface CreateQueueManagerOptions {
|
||||
}
|
||||
|
||||
export interface QueueManager {
|
||||
readonly settings: FetchAsyncConfig;
|
||||
readonly options: CrawlerAsyncOptions;
|
||||
readonly connection: IORedis;
|
||||
enqueueListing: (payload: ListingTaskPayload) => Promise<{ id: string }>;
|
||||
enqueueArticle: (payload: DetailsTaskPayload) => Promise<{ id: string }>;
|
||||
@@ -66,17 +65,17 @@ export interface QueueManager {
|
||||
}
|
||||
|
||||
export const createQueueManager = (options: CreateQueueManagerOptions = {}): QueueManager => {
|
||||
const settings = config.fetch.async;
|
||||
const asyncOptions = config.crawler.fetch.async;
|
||||
|
||||
const connection =
|
||||
options.connection ??
|
||||
new IORedis(settings.redisUrl, {
|
||||
...parseRedisUrl(settings.redisUrl),
|
||||
new IORedis(asyncOptions.redisUrl, {
|
||||
...parseRedisUrl(asyncOptions.redisUrl),
|
||||
maxRetriesPerRequest: null,
|
||||
});
|
||||
const factory = options.queueFactory ?? defaultQueueFactory;
|
||||
|
||||
const ensureQueue = (queueName: string) => factory(queueName, settings, connection);
|
||||
const ensureQueue = (queueName: string) => factory(queueName, asyncOptions, connection);
|
||||
|
||||
return {
|
||||
close: async () => {
|
||||
@@ -85,25 +84,25 @@ export const createQueueManager = (options: CreateQueueManagerOptions = {}): Que
|
||||
connection,
|
||||
enqueueArticle: (payload) => {
|
||||
const data = DetailsTaskPayloadSchema.parse(payload);
|
||||
const queue = ensureQueue(settings.queues.details);
|
||||
const queue = ensureQueue(asyncOptions.queues.details);
|
||||
return queue.add("collect_article", data);
|
||||
},
|
||||
enqueueListing: (payload) => {
|
||||
const data = ListingTaskPayloadSchema.parse(payload);
|
||||
const queue = ensureQueue(settings.queues.listing);
|
||||
const queue = ensureQueue(asyncOptions.queues.listing);
|
||||
return queue.add("collect_listing", data);
|
||||
},
|
||||
enqueueProcessed: (payload) => {
|
||||
const data = ProcessingTaskPayloadSchema.parse(payload);
|
||||
const queue = ensureQueue(settings.queues.processing);
|
||||
const queue = ensureQueue(asyncOptions.queues.processing);
|
||||
return queue.add("forward_for_processing", data);
|
||||
},
|
||||
iterQueueNames: () => [
|
||||
settings.queues.listing,
|
||||
settings.queues.details,
|
||||
settings.queues.processing,
|
||||
asyncOptions.queues.listing,
|
||||
asyncOptions.queues.details,
|
||||
asyncOptions.queues.processing,
|
||||
],
|
||||
queueName: (suffix: string) => `${settings.prefix}:${suffix}`,
|
||||
settings,
|
||||
options: asyncOptions,
|
||||
queueName: (suffix: string) => `${asyncOptions.prefix}:${suffix}`,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { PageRangeSchema, TimestampRangeSchema } from "@basango/domain/crawler";
|
||||
import { articleSchema } from "@basango/domain/models";
|
||||
import { PageRangeSchema, TimestampRangeSchema, articleSchema } from "@basango/domain/models";
|
||||
import { z } from "zod";
|
||||
|
||||
export const ListingTaskPayloadSchema = z.object({
|
||||
|
||||
@@ -45,7 +45,7 @@ export const startWorker = (options: WorkerOptions): WorkerHandle => {
|
||||
{
|
||||
concurrency: options.concurrency ?? 5,
|
||||
connection,
|
||||
prefix: manager.settings.prefix,
|
||||
prefix: manager.options.prefix,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -56,7 +56,7 @@ export const startWorker = (options: WorkerOptions): WorkerHandle => {
|
||||
|
||||
const queueEvents = new QueueEvents(queueName, {
|
||||
connection,
|
||||
prefix: manager.settings.prefix,
|
||||
prefix: manager.options.prefix,
|
||||
});
|
||||
|
||||
workers.push(worker);
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import type { AnySourceConfig } from "@basango/domain/crawler";
|
||||
import { AnySourceOptions, CrawlerFetchingOptions, config } from "@basango/domain/config";
|
||||
import logger from "@basango/logger";
|
||||
|
||||
import { FetchCrawlerConfig, config } from "#crawler/config";
|
||||
import { JsonlPersistor, Persistor } from "#crawler/process/persistence";
|
||||
import { createPageRange, createTimestampRange } from "#crawler/utils";
|
||||
|
||||
@@ -13,11 +12,11 @@ export interface CrawlingOptions {
|
||||
}
|
||||
|
||||
export const resolveCrawlerConfig = (
|
||||
source: AnySourceConfig,
|
||||
source: AnySourceOptions,
|
||||
options: CrawlingOptions,
|
||||
): FetchCrawlerConfig => {
|
||||
): CrawlerFetchingOptions => {
|
||||
return {
|
||||
...config.fetch.crawler,
|
||||
...config.crawler.fetch.crawler,
|
||||
category: options.category,
|
||||
dateRange: createTimestampRange(options.dateRange),
|
||||
pageRange: createPageRange(options.pageRange),
|
||||
@@ -25,10 +24,10 @@ export const resolveCrawlerConfig = (
|
||||
};
|
||||
};
|
||||
|
||||
export const createPersistors = (source: AnySourceConfig): Persistor[] => {
|
||||
export const createPersistors = (source: AnySourceOptions): Persistor[] => {
|
||||
return [
|
||||
new JsonlPersistor({
|
||||
directory: config.paths.data,
|
||||
directory: config.crawler.paths.data,
|
||||
sourceId: source.sourceId,
|
||||
}),
|
||||
];
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import type { AnySourceConfig } from "@basango/domain/crawler";
|
||||
import { AnySourceOptions, CrawlerFetchingOptions, config } from "@basango/domain/config";
|
||||
import { Article } from "@basango/domain/models";
|
||||
import { HTMLElement, parse as parseHtml } from "node-html-parser";
|
||||
|
||||
import { FetchCrawlerConfig, config } from "#crawler/config";
|
||||
import { SyncHttpClient } from "#crawler/http/http-client";
|
||||
import { OpenGraph } from "#crawler/http/open-graph";
|
||||
import type { Persistor } from "#crawler/process/persistence";
|
||||
@@ -12,23 +11,23 @@ export interface CrawlerOptions {
|
||||
}
|
||||
|
||||
export abstract class BaseCrawler {
|
||||
protected readonly settings: FetchCrawlerConfig;
|
||||
protected readonly source: AnySourceConfig;
|
||||
protected readonly options: CrawlerFetchingOptions;
|
||||
protected readonly source: AnySourceOptions;
|
||||
protected readonly http: SyncHttpClient;
|
||||
protected readonly persistors: Persistor[];
|
||||
protected readonly openGraph: OpenGraph;
|
||||
|
||||
protected constructor(settings: FetchCrawlerConfig, options: CrawlerOptions = {}) {
|
||||
if (!settings.source) {
|
||||
protected constructor(options: CrawlerFetchingOptions, crawlerOptions: CrawlerOptions = {}) {
|
||||
if (!options.source) {
|
||||
throw new Error("Crawler requires a bound source");
|
||||
}
|
||||
|
||||
this.http = new SyncHttpClient(config.fetch.client);
|
||||
this.persistors = options.persistors ?? [];
|
||||
this.http = new SyncHttpClient(config.crawler.fetch.client);
|
||||
this.persistors = crawlerOptions.persistors ?? [];
|
||||
this.openGraph = new OpenGraph();
|
||||
|
||||
this.settings = settings;
|
||||
this.source = settings.source as AnySourceConfig;
|
||||
this.options = options;
|
||||
this.source = options.source as AnySourceOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import type { HtmlSourceConfig, TimestampRange } from "@basango/domain/crawler";
|
||||
import { Article } from "@basango/domain/models";
|
||||
import { CrawlerFetchingOptions, HtmlSourceOptions } from "@basango/domain/config";
|
||||
import { Article, TimestampRange } from "@basango/domain/models";
|
||||
import { logger } from "@basango/logger";
|
||||
import { fromUnixTime, getUnixTime, isMatch as isDateMatch, parse } from "date-fns";
|
||||
import { HTMLElement } from "node-html-parser";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
import { FetchCrawlerConfig } from "#crawler/config";
|
||||
import {
|
||||
ArticleOutOfDateRangeError,
|
||||
InvalidArticleError,
|
||||
@@ -26,21 +25,21 @@ const md = new TurndownService({
|
||||
* Crawler for generic HTML pages.
|
||||
*/
|
||||
export class HtmlCrawler extends BaseCrawler {
|
||||
readonly source: HtmlSourceConfig;
|
||||
readonly source: HtmlSourceOptions;
|
||||
private currentNode: string | null = null;
|
||||
|
||||
constructor(settings: FetchCrawlerConfig, options: { persistors?: Persistor[] } = {}) {
|
||||
constructor(settings: CrawlerFetchingOptions, options: { persistors?: Persistor[] } = {}) {
|
||||
super(settings, options);
|
||||
|
||||
if (!settings.source || settings.source.sourceKind !== "html") {
|
||||
throw new UnsupportedSourceKindError("HtmlCrawler requires a source of kind 'html'");
|
||||
}
|
||||
this.source = this.settings.source as HtmlSourceConfig;
|
||||
this.source = this.options.source as HtmlSourceOptions;
|
||||
}
|
||||
|
||||
async fetch(): Promise<void> {
|
||||
const pageRange = this.settings.pageRange ?? (await this.getPagination());
|
||||
const dateRange = this.settings.dateRange;
|
||||
const pageRange = this.options.pageRange ?? (await this.getPagination());
|
||||
const dateRange = this.options.dateRange;
|
||||
const selectors = this.source.sourceSelectors;
|
||||
|
||||
if (!selectors.articles) {
|
||||
@@ -218,7 +217,7 @@ export class HtmlCrawler extends BaseCrawler {
|
||||
*/
|
||||
private applyCategory(template: string): string {
|
||||
if (template.includes("{category}")) {
|
||||
const replacement = this.settings.category ?? "";
|
||||
const replacement = this.options.category ?? "";
|
||||
return template.replace("{category}", replacement);
|
||||
}
|
||||
return template;
|
||||
@@ -297,7 +296,7 @@ export class HtmlCrawler extends BaseCrawler {
|
||||
* @param selector - The CSS selector
|
||||
*/
|
||||
private extractCategories(root: HTMLElement, selector?: string | null): string[] {
|
||||
if (!selector && this.settings.category) return [this.settings.category.toLowerCase()];
|
||||
if (!selector && this.options.category) return [this.options.category.toLowerCase()];
|
||||
if (!selector) return [];
|
||||
|
||||
const values: string[] = [];
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import type { PageRange, TimestampRange, WordPressSourceConfig } from "@basango/domain/crawler";
|
||||
import { Article } from "@basango/domain/models";
|
||||
import { CrawlerFetchingOptions, WordPressSourceOptions } from "@basango/domain/config";
|
||||
import { Article, PageRange, TimestampRange } from "@basango/domain/models";
|
||||
import { logger } from "@basango/logger";
|
||||
import { fromUnixTime } from "date-fns";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
import { FetchCrawlerConfig } from "#crawler/config";
|
||||
import {
|
||||
ArticleOutOfDateRangeError,
|
||||
InvalidArticleError,
|
||||
@@ -33,7 +32,7 @@ interface WordPressPost {
|
||||
* Crawler for WordPress sites using the REST API.
|
||||
*/
|
||||
export class WordPressCrawler extends BaseCrawler {
|
||||
readonly source: WordPressSourceConfig;
|
||||
readonly source: WordPressSourceOptions;
|
||||
private categoryMap: Map<number, string> = new Map();
|
||||
|
||||
public static readonly POST_QUERY =
|
||||
@@ -43,7 +42,7 @@ export class WordPressCrawler extends BaseCrawler {
|
||||
public static readonly TOTAL_PAGES_HEADER = "x-wp-totalpages";
|
||||
public static readonly TOTAL_POSTS_HEADER = "x-wp-total";
|
||||
|
||||
constructor(settings: FetchCrawlerConfig, options: { persistors?: Persistor[] } = {}) {
|
||||
constructor(settings: CrawlerFetchingOptions, options: { persistors?: Persistor[] } = {}) {
|
||||
super(settings, options);
|
||||
|
||||
if (!settings.source || settings.source.sourceKind !== "wordpress") {
|
||||
@@ -51,15 +50,15 @@ export class WordPressCrawler extends BaseCrawler {
|
||||
"WordPressCrawler requires a source of kind 'wordpress'",
|
||||
);
|
||||
}
|
||||
this.source = this.settings.source as WordPressSourceConfig;
|
||||
this.source = this.options.source as WordPressSourceOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch and process WordPress posts.
|
||||
*/
|
||||
async fetch(): Promise<void> {
|
||||
const pageRange = this.settings.pageRange ?? (await this.getPagination());
|
||||
const dateRange = this.settings.dateRange;
|
||||
const pageRange = this.options.pageRange ?? (await this.getPagination());
|
||||
const dateRange = this.options.dateRange;
|
||||
|
||||
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
||||
const endpoint = this.buildEndpointUrl(page);
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
import { config } from "@basango/domain/config";
|
||||
import type { Article } from "@basango/domain/models";
|
||||
import { md5 } from "@basango/encryption";
|
||||
import logger from "@basango/logger";
|
||||
|
||||
import { config, env } from "#crawler/config";
|
||||
import { HttpError, SyncHttpClient } from "#crawler/http/http-client";
|
||||
|
||||
export interface Persistor {
|
||||
@@ -66,9 +66,9 @@ export const persist = async (
|
||||
};
|
||||
|
||||
export const forward = async (payload: Partial<Article>): Promise<void> => {
|
||||
const client = new SyncHttpClient(config.fetch.client);
|
||||
const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT");
|
||||
const token = env("BASANGO_CRAWLER_TOKEN");
|
||||
const client = new SyncHttpClient(config.crawler.fetch.client);
|
||||
const endpoint = config.crawler.backend.endpoint;
|
||||
const token = config.crawler.backend.token;
|
||||
|
||||
try {
|
||||
const response = await client.post(endpoint, {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#! /usr/bin/env bun
|
||||
#!/usr/bin/env bun
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { createInterface } from "node:readline";
|
||||
import { parseArgs } from "node:util";
|
||||
|
||||
import { config } from "@basango/domain/config";
|
||||
import type { Article } from "@basango/domain/models";
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import { config } from "#crawler/config";
|
||||
import { forward } from "#crawler/process/persistence";
|
||||
|
||||
const USAGE = `
|
||||
@@ -31,7 +31,7 @@ const main = async (): Promise<void> => {
|
||||
return;
|
||||
}
|
||||
|
||||
const filePath = path.join(config.paths.data, `${sourceId}.jsonl`);
|
||||
const filePath = path.join(config.crawler.paths.data, `${sourceId}.jsonl`);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL not found");
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#! /usr/bin/env bun
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#! /usr/bin/env bun
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
|
||||
@@ -1,28 +1,29 @@
|
||||
import {
|
||||
AnySourceOptions,
|
||||
HtmlSourceOptions,
|
||||
WordPressSourceOptions,
|
||||
config,
|
||||
} from "@basango/domain/config";
|
||||
import { DEFAULT_DATE_FORMAT } from "@basango/domain/constants";
|
||||
import {
|
||||
AnySourceConfig,
|
||||
DateSpecSchema,
|
||||
HtmlSourceConfig,
|
||||
PageRange,
|
||||
PageRangeSchema,
|
||||
PageSpecSchema,
|
||||
TimestampRange,
|
||||
TimestampRangeSchema,
|
||||
WordPressSourceConfig,
|
||||
} from "@basango/domain/crawler";
|
||||
} from "@basango/domain/models";
|
||||
import { format, fromUnixTime, getUnixTime, isMatch, parse } from "date-fns";
|
||||
import type { RedisOptions } from "ioredis";
|
||||
|
||||
import { config } from "#crawler/config";
|
||||
|
||||
/**
|
||||
* Resolve a source configuration by its ID.
|
||||
* @param id - The source ID
|
||||
*/
|
||||
export const resolveSourceConfig = (id: string): AnySourceConfig => {
|
||||
export const resolveSourceConfig = (id: string): AnySourceOptions => {
|
||||
const source =
|
||||
config.sources.html.find((s: HtmlSourceConfig) => s.sourceId === id) ||
|
||||
config.sources.wordpress.find((s: WordPressSourceConfig) => s.sourceId === id);
|
||||
config.crawler.sources.html.find((s: HtmlSourceOptions) => s.sourceId === id) ||
|
||||
config.crawler.sources.wordpress.find((s: WordPressSourceOptions) => s.sourceId === id);
|
||||
|
||||
if (source === undefined) {
|
||||
throw new Error(`Source '${id}' not found in configuration`);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 0,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "basango",
|
||||
@@ -25,13 +26,10 @@
|
||||
"@basango/domain": "workspace:*",
|
||||
"@basango/encryption": "workspace:*",
|
||||
"@basango/logger": "workspace:*",
|
||||
"@devscast/config": "catalog:",
|
||||
"@hono/node-server": "^1.19.6",
|
||||
"@hono/trpc-server": "^0.4.0",
|
||||
"@hono/zod-openapi": "^1.1.4",
|
||||
"@scalar/hono-api-reference": "^0.9.24",
|
||||
"@trpc/server": "^11.7.1",
|
||||
"ai": "^5.0.89",
|
||||
"camelcase-keys": "^10.0.1",
|
||||
"date-fns": "catalog:",
|
||||
"hono-rate-limiter": "^0.4.2",
|
||||
@@ -149,7 +147,6 @@
|
||||
"@basango/encryption": "workspace:*",
|
||||
"@basango/logger": "workspace:*",
|
||||
"@date-fns/utc": "^2.1.1",
|
||||
"@devscast/config": "catalog:",
|
||||
"date-fns": "catalog:",
|
||||
"drizzle-orm": "^0.44.7",
|
||||
"mysql2": "^3.15.3",
|
||||
@@ -188,7 +185,7 @@
|
||||
"packages/logger": {
|
||||
"name": "@basango/logger",
|
||||
"dependencies": {
|
||||
"@devscast/config": "catalog:",
|
||||
"@basango/domain": "workspace:*",
|
||||
"pino": "^10.1.0",
|
||||
"pino-pretty": "^13.1.2",
|
||||
},
|
||||
@@ -245,7 +242,7 @@
|
||||
},
|
||||
},
|
||||
"catalog": {
|
||||
"@devscast/config": "^1.0.3",
|
||||
"@devscast/config": "^1.1.1",
|
||||
"@types/bun": "^1.3.1",
|
||||
"@types/node": "^24.10.0",
|
||||
"@types/react": "^19.2.0",
|
||||
@@ -260,12 +257,6 @@
|
||||
"packages": {
|
||||
"@0no-co/graphql.web": ["@0no-co/graphql.web@1.2.0", "", { "peerDependencies": { "graphql": "^14.0.0 || ^15.0.0 || ^16.0.0" }, "optionalPeers": ["graphql"] }, "sha512-/1iHy9TTr63gE1YcR5idjx8UREz1s0kFhydf3bBLCXyqjhkIc6igAzTOx3zPifCwFR87tsh/4Pa9cNts6d2otw=="],
|
||||
|
||||
"@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.7", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.16", "@vercel/oidc": "3.0.3" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-/AI5AKi4vOK9SEb8Z1dfXkhsJ5NAfWsoJQc96B/mzn2KIrjw5occOjIwD06scuhV9xWlghCoXJT1sQD9QH/tyg=="],
|
||||
|
||||
"@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="],
|
||||
|
||||
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.16", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-lsWQY9aDXHitw7C1QRYIbVGmgwyT98TF3MfM8alNIXKpdJdi+W782Rzd9f1RyOfgRmZ08gJ2EYNDhWNK7RqpEA=="],
|
||||
|
||||
"@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="],
|
||||
|
||||
"@asteasolutions/zod-to-openapi": ["@asteasolutions/zod-to-openapi@8.1.0", "", { "dependencies": { "openapi3-ts": "^4.1.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-tQFxVs05J/6QXXqIzj6rTRk3nj1HFs4pe+uThwE95jL5II2JfpVXkK+CqkO7aT0Do5AYqO6LDrKpleLUFXgY+g=="],
|
||||
@@ -538,7 +529,7 @@
|
||||
|
||||
"@date-fns/utc": ["@date-fns/utc@2.1.1", "", {}, "sha512-SlJDfG6RPeEX8wEVv6ZB3kak4MmbtyiI2qX/5zuKdordbrhB/iaJ58GVMZgJ6P1sJaM1gMgENFYYeg1JWrCFrA=="],
|
||||
|
||||
"@devscast/config": ["@devscast/config@1.0.3", "", { "peerDependencies": { "ini": "^6.0.0", "yaml": "^2.8.1", "zod": "^4.1.12" }, "optionalPeers": ["ini", "yaml"] }, "sha512-/FjCA/MV1KR2tY44YBA4tdXNzQgoF75O+RQ4fbzvVWY77PXOama2Hf6YXeLcQsvxfItaXi2cFz8BaaVdqZYS8w=="],
|
||||
"@devscast/config": ["@devscast/config@1.1.1", "", { "peerDependencies": { "ini": "^6.0.0", "yaml": "^2.8.1", "zod": "^4.1.12" }, "optionalPeers": ["ini", "yaml"] }, "sha512-PyGV43m6V8sO66EOsKXWkohisH90rQZIcEgbGB2yVJ+BAfwj1P3rUx3DifpndX/Go8Ng9YbkjCNYBKYk5FwSgQ=="],
|
||||
|
||||
"@drizzle-team/brocli": ["@drizzle-team/brocli@0.10.2", "", {}, "sha512-z33Il7l5dKjUgGULTqBsQBQwckHh5AbIuxhdsIxDDiZAzBOrZO6q9ogcWC65kU382AfynTfgNumVcNIjuIua6w=="],
|
||||
|
||||
@@ -944,14 +935,6 @@
|
||||
|
||||
"@reduxjs/toolkit": ["@reduxjs/toolkit@2.10.1", "", { "dependencies": { "@standard-schema/spec": "^1.0.0", "@standard-schema/utils": "^0.3.0", "immer": "^10.2.0", "redux": "^5.0.1", "redux-thunk": "^3.1.0", "reselect": "^5.1.0" }, "peerDependencies": { "react": "^16.9.0 || ^17.0.0 || ^18 || ^19", "react-redux": "^7.2.1 || ^8.1.3 || ^9.0.0" }, "optionalPeers": ["react", "react-redux"] }, "sha512-/U17EXQ9Do9Yx4DlNGU6eVNfZvFJfYpUtRRdLf19PbPjdWBxNlxGZXywQZ1p1Nz8nMkWplTI7iD/23m07nolDA=="],
|
||||
|
||||
"@scalar/core": ["@scalar/core@0.3.22", "", { "dependencies": { "@scalar/types": "0.4.0" } }, "sha512-6lzeRkvgkukSgge35kvxJKiJBny4rdGSaLTNzn/sF1F6JRfUo7I0AgqFxxSZWMD+EG4kGyNxAz0zciDSx2Cjvw=="],
|
||||
|
||||
"@scalar/hono-api-reference": ["@scalar/hono-api-reference@0.9.24", "", { "dependencies": { "@scalar/core": "0.3.22" }, "peerDependencies": { "hono": "^4.10.3" } }, "sha512-NjPY3iMm/FqYRXAgr6V7qBhJGbSUQ8hbijFUMuqZo4pIjGEUNLeB5L9U2Gh4cDIPPWeso8mlc16jaX7dV0FrPw=="],
|
||||
|
||||
"@scalar/openapi-types": ["@scalar/openapi-types@0.5.1", "", { "dependencies": { "zod": "4.1.11" } }, "sha512-8g7s9lPolyDFtijyh3Ob459tpezPuZbkXoFgJwBTHjPZ7ap+TvOJTvLk56CFwxVBVz2BxCzWJqxYyy3FUdeLoA=="],
|
||||
|
||||
"@scalar/types": ["@scalar/types@0.4.0", "", { "dependencies": { "@scalar/openapi-types": "0.5.1", "nanoid": "5.1.5", "type-fest": "5.0.0", "zod": "4.1.11" } }, "sha512-vOD1GZez7kPdVA+UQit05QE9dbALfevhK9kqRTsqcPX7FvvZ9eQWSNl1GKmKtmRiAZGThv2agM5AvHRxkH2JSw=="],
|
||||
|
||||
"@sinclair/typebox": ["@sinclair/typebox@0.27.8", "", {}, "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA=="],
|
||||
|
||||
"@sinonjs/commons": ["@sinonjs/commons@3.0.1", "", { "dependencies": { "type-detect": "4.0.8" } }, "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ=="],
|
||||
@@ -1100,8 +1083,6 @@
|
||||
|
||||
"@urql/exchange-retry": ["@urql/exchange-retry@1.3.2", "", { "dependencies": { "@urql/core": "^5.1.2", "wonka": "^6.3.2" } }, "sha512-TQMCz2pFJMfpNxmSfX1VSfTjwUIFx/mL+p1bnfM1xjjdla7Z+KnGMW/EhFbpckp3LyWAH4PgOsMwOMnIN+MBFg=="],
|
||||
|
||||
"@vercel/oidc": ["@vercel/oidc@3.0.3", "", {}, "sha512-yNEQvPcVrK9sIe637+I0jD6leluPxzwJKx/Haw6F4H77CdDsszUn5V3o96LPziXkSNE2B83+Z3mjqGKBK/R6Gg=="],
|
||||
|
||||
"@xmldom/xmldom": ["@xmldom/xmldom@0.8.11", "", {}, "sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw=="],
|
||||
|
||||
"JSONStream": ["JSONStream@1.3.5", "", { "dependencies": { "jsonparse": "^1.2.0", "through": ">=2.2.7 <3" }, "bin": { "JSONStream": "./bin.js" } }, "sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ=="],
|
||||
@@ -1118,8 +1099,6 @@
|
||||
|
||||
"aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="],
|
||||
|
||||
"ai": ["ai@5.0.89", "", { "dependencies": { "@ai-sdk/gateway": "2.0.7", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.16", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8Nq+ZojGacQrupoJEQLrTDzT5VtR3gyp5AaqFSV3tzsAXlYQ9Igb7QE3yeoEdzOk5IRfDwWL7mDCUD+oBg1hDA=="],
|
||||
|
||||
"ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="],
|
||||
|
||||
"anser": ["anser@1.4.10", "", {}, "sha512-hCv9AqTQ8ycjpSd3upOJd7vFwW1JaoYQ7tpham03GJ1ca8/65rqn0RpaWpItOAd6ylW9wAw6luXYPJIyPFVOww=="],
|
||||
@@ -1478,8 +1457,6 @@
|
||||
|
||||
"eventemitter3": ["eventemitter3@5.0.1", "", {}, "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA=="],
|
||||
|
||||
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
|
||||
|
||||
"exec-async": ["exec-async@2.2.0", "", {}, "sha512-87OpwcEiMia/DeiKFzaQNBNFeN3XkkpYIh9FyOqq5mS2oKv3CBE67PXoEKcr6nodWdXNogTiQ0jE2NGuoffXPw=="],
|
||||
|
||||
"execa": ["execa@5.1.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^6.0.0", "human-signals": "^2.1.0", "is-stream": "^2.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^4.0.1", "onetime": "^5.1.2", "signal-exit": "^3.0.3", "strip-final-newline": "^2.0.0" } }, "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg=="],
|
||||
@@ -1786,8 +1763,6 @@
|
||||
|
||||
"json-parse-even-better-errors": ["json-parse-even-better-errors@2.3.1", "", {}, "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w=="],
|
||||
|
||||
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
|
||||
|
||||
"json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="],
|
||||
|
||||
"json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="],
|
||||
@@ -2410,8 +2385,6 @@
|
||||
|
||||
"swap-case": ["swap-case@1.1.2", "", { "dependencies": { "lower-case": "^1.1.1", "upper-case": "^1.1.1" } }, "sha512-BAmWG6/bx8syfc6qXPprof3Mn5vQgf5dwdUNJhsNqU9WdPt5P+ES/wQ5bxfijy8zwZgZZHslC3iAsxsuQMCzJQ=="],
|
||||
|
||||
"tagged-tag": ["tagged-tag@1.0.0", "", {}, "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng=="],
|
||||
|
||||
"tailwind-merge": ["tailwind-merge@3.3.1", "", {}, "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g=="],
|
||||
|
||||
"tailwindcss": ["tailwindcss@4.1.17", "", {}, "sha512-j9Ee2YjuQqYT9bbRTfTZht9W/ytp5H+jJpZKiYdP/bpnXARAuELt9ofP0lPnmHjbga7SNQIxdTAXCmtKVYjN+Q=="],
|
||||
@@ -2816,14 +2789,6 @@
|
||||
|
||||
"@radix-ui/react-visually-hidden/@radix-ui/react-primitive": ["@radix-ui/react-primitive@2.1.3", "", { "dependencies": { "@radix-ui/react-slot": "1.2.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ=="],
|
||||
|
||||
"@scalar/openapi-types/zod": ["zod@4.1.11", "", {}, "sha512-WPsqwxITS2tzx1bzhIKsEs19ABD5vmCVa4xBo2tq/SrV4RNZtfws1EnCWQXM6yh8bD08a1idvkB5MZSBiZsjwg=="],
|
||||
|
||||
"@scalar/types/nanoid": ["nanoid@5.1.5", "", { "bin": { "nanoid": "bin/nanoid.js" } }, "sha512-Ir/+ZpE9fDsNH0hQ3C68uyThDXzYcim2EqcZ8zn8Chtt1iylPT9xXJB0kPCnqzgcEGikO9RxSrh63MsmVCU7Fw=="],
|
||||
|
||||
"@scalar/types/type-fest": ["type-fest@5.0.0", "", { "dependencies": { "tagged-tag": "^1.0.0" } }, "sha512-GeJop7+u7BYlQ6yQCAY1nBQiRSHR+6OdCEtd8Bwp9a3NK3+fWAVjOaPKJDteB9f6cIJ0wt4IfnScjLG450EpXA=="],
|
||||
|
||||
"@scalar/types/zod": ["zod@4.1.11", "", {}, "sha512-WPsqwxITS2tzx1bzhIKsEs19ABD5vmCVa4xBo2tq/SrV4RNZtfws1EnCWQXM6yh8bD08a1idvkB5MZSBiZsjwg=="],
|
||||
|
||||
"@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.7.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" }, "bundled": true }, "sha512-pJdKGq/1iquWYtv1RRSljZklxHCOCAJFJrImO5ZLKPJVJlVUcs8yFwNQlqS0Lo8xT1VAXXTCZocF9n26FWEKsw=="],
|
||||
|
||||
"@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.7.0", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-oAYoQnCYaQZKVS53Fq23ceWMRxq5EhQsE0x0RdQ55jT7wagMu5k+fS39v1fiSLrtrLQlXwVINenqhLMtTrV/1Q=="],
|
||||
|
||||
@@ -48,29 +48,6 @@ services:
|
||||
networks:
|
||||
- basango_network
|
||||
|
||||
nginx:
|
||||
build: .docker/nginx
|
||||
ports:
|
||||
- "8000:80"
|
||||
volumes:
|
||||
- ./projects/api-legacy/public:/var/www/public:delegated
|
||||
depends_on:
|
||||
- php
|
||||
networks:
|
||||
- basango_network
|
||||
|
||||
php:
|
||||
user: '${USER_ID:-1000}:${GROUP_ID:-1000}'
|
||||
build: .docker/php
|
||||
volumes:
|
||||
- ./projects/api-legacy:/var/www:delegated
|
||||
depends_on:
|
||||
- mariadb
|
||||
- postgres
|
||||
- redis
|
||||
networks:
|
||||
- basango_network
|
||||
|
||||
adminer:
|
||||
image: adminer:latest
|
||||
depends_on:
|
||||
|
||||
+2
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"catalog": {
|
||||
"@devscast/config": "^1.0.3",
|
||||
"@devscast/config": "^1.1.1",
|
||||
"@types/bun": "^1.3.1",
|
||||
"@types/node": "^24.10.0",
|
||||
"@types/react": "^19.2.0",
|
||||
@@ -52,6 +52,7 @@
|
||||
"dev:dashboard": "turbo dev --filter=@basango/dashboard",
|
||||
"format": "biome format --write && biome check --write && biome lint --write",
|
||||
"lint": "biome check && biome lint && manypkg check",
|
||||
"migrate": "cd packages/db && bunx drizzle-kit migrate",
|
||||
"prepare": "husky",
|
||||
"start:api": "turbo start --filter=@basango/api",
|
||||
"start:dashboard": "turbo start --filter=@basango/dashboard",
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
BASANGO_DATABASE_URL="postgresql://postgres:postgres@localhost:5432/app?serverVersion=16&charset=utf8"
|
||||
|
||||
BASANGO_SOURCE_DATABASE_HOST="localhost"
|
||||
BASANGO_SOURCE_DATABASE_PASS="root"
|
||||
BASANGO_SOURCE_DATABASE_NAME="app"
|
||||
BASANGO_SOURCE_DATABASE_USER="root"
|
||||
@@ -1,10 +1,9 @@
|
||||
import { config } from "@basango/domain/config";
|
||||
import { defineConfig } from "drizzle-kit";
|
||||
|
||||
import { env } from "./src/config";
|
||||
|
||||
export default defineConfig({
|
||||
dbCredentials: {
|
||||
url: env("BASANGO_DATABASE_URL"),
|
||||
url: config.database.url,
|
||||
},
|
||||
dialect: "postgresql",
|
||||
out: "./migrations",
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
ALTER TABLE "article" drop column "tsv";--> statement-breakpoint
|
||||
ALTER TABLE "article" ADD COLUMN "tsv" "tsvector" GENERATED ALWAYS AS (setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::"char")) STORED;--> statement-breakpoint
|
||||
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,13 @@
|
||||
"tag": "0001_init",
|
||||
"version": "7",
|
||||
"when": 1762775267679
|
||||
},
|
||||
{
|
||||
"breakpoints": true,
|
||||
"idx": 2,
|
||||
"tag": "0002_modern_joseph",
|
||||
"version": "7",
|
||||
"when": 1763920009482
|
||||
}
|
||||
],
|
||||
"version": "7"
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
"@basango/encryption": "workspace:*",
|
||||
"@basango/logger": "workspace:*",
|
||||
"@date-fns/utc": "^2.1.1",
|
||||
"@devscast/config": "catalog:",
|
||||
"date-fns": "catalog:",
|
||||
"drizzle-orm": "^0.44.7",
|
||||
"mysql2": "^3.15.3",
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
import { config } from "@basango/domain/config";
|
||||
import { drizzle } from "drizzle-orm/node-postgres";
|
||||
import { Pool } from "pg";
|
||||
|
||||
import { env } from "#db/config";
|
||||
import * as schema from "#db/schema";
|
||||
|
||||
const isDevelopment = process.env.NODE_ENV === "development";
|
||||
|
||||
const pool = new Pool({
|
||||
allowExitOnIdle: true,
|
||||
connectionString: env("BASANGO_DATABASE_URL"),
|
||||
connectionString: config.database.url,
|
||||
connectionTimeoutMillis: 15_000,
|
||||
idleTimeoutMillis: isDevelopment ? 5_000 : 60_000,
|
||||
max: isDevelopment ? 8 : 12,
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
import path from "node:path";
|
||||
|
||||
import { loadConfig } from "@devscast/config";
|
||||
import { z } from "zod";
|
||||
|
||||
const PROJECT_DIR = path.resolve(__dirname, "../");
|
||||
|
||||
export const { env, config } = loadConfig({
|
||||
env: {
|
||||
knownKeys: [
|
||||
"BASANGO_DATABASE_URL",
|
||||
"BASANGO_SOURCE_DATABASE_HOST",
|
||||
"BASANGO_SOURCE_DATABASE_USER",
|
||||
"BASANGO_SOURCE_DATABASE_PASS",
|
||||
"BASANGO_SOURCE_DATABASE_NAME",
|
||||
] as const,
|
||||
path: path.join(PROJECT_DIR, ".env"),
|
||||
},
|
||||
schema: z.object({}),
|
||||
});
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
import { md5 } from "@basango/encryption";
|
||||
import type { SQL } from "drizzle-orm";
|
||||
import { count, desc, eq, getTableColumns, sql } from "drizzle-orm";
|
||||
import { v7 as uuidV7 } from "uuid";
|
||||
import * as uuid from "uuid";
|
||||
|
||||
import { Database } from "#db/client";
|
||||
import { getSourceIdByName } from "#db/queries/sources";
|
||||
@@ -56,7 +56,7 @@ export async function createArticle(db: Database, params: CreateArticleParams) {
|
||||
|
||||
const [result] = await db
|
||||
.insert(articles)
|
||||
.values({ id: uuidV7(), ...data })
|
||||
.values({ id: uuid.v7(), ...data })
|
||||
.returning({
|
||||
id: articles.id,
|
||||
sourceId: articles.sourceId,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { DEFAULT_CATEGORY_SHARES_LIMIT, DEFAULT_TIMEZONE } from "@basango/domain/constants";
|
||||
import { ID, Publication, Publications } from "@basango/domain/models";
|
||||
import { eq, sql } from "drizzle-orm";
|
||||
import { v7 as uuidV7 } from "uuid";
|
||||
import * as uuid from "uuid";
|
||||
|
||||
import { Database } from "#db/client";
|
||||
import { NotFoundError } from "#db/errors";
|
||||
@@ -32,7 +32,7 @@ export async function getSources(db: Database) {
|
||||
export async function createSource(db: Database, params: CreateSourceParams) {
|
||||
const [result] = await db
|
||||
.insert(sources)
|
||||
.values({ id: uuidV7(), ...params })
|
||||
.values({ id: uuid.v7(), ...params })
|
||||
.returning();
|
||||
|
||||
return result;
|
||||
|
||||
@@ -114,10 +114,7 @@ export const articles = pgTable(
|
||||
title: varchar({ length: 1024 }).notNull(),
|
||||
tokenStatistics: jsonb("token_statistics").$type<TokenStatistics>(),
|
||||
tsv: tsvector("tsv").generatedAlwaysAs(
|
||||
sql`(
|
||||
setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::"char")
|
||||
|| setweight(to_tsvector('french'::regconfig, COALESCE(body, ''::text)), 'B'::"char")
|
||||
)`,
|
||||
sql`setweight(to_tsvector('french'::regconfig, COALESCE(title, '')::text), 'A'::"char")`,
|
||||
),
|
||||
updatedAt: timestamp("updated_at"),
|
||||
},
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
/** biome-ignore-all lint/correctness/noUnusedPrivateClassMembers: false positive */
|
||||
|
||||
import { config } from "@basango/domain/config";
|
||||
import { RowDataPacket } from "mysql2/promise";
|
||||
import { Pool, PoolClient } from "pg";
|
||||
|
||||
import { env } from "#db/config";
|
||||
import { computeReadingTime } from "#db/utils/computed";
|
||||
|
||||
type SourceOptions = {
|
||||
@@ -598,13 +598,13 @@ async function main() {
|
||||
|
||||
const engine = new Engine(
|
||||
{
|
||||
database: env("BASANGO_SOURCE_DATABASE_NAME"),
|
||||
host: env("BASANGO_SOURCE_DATABASE_HOST"),
|
||||
password: env("BASANGO_SOURCE_DATABASE_PASS"),
|
||||
user: env("BASANGO_SOURCE_DATABASE_USER"),
|
||||
database: config.database.legacy.name,
|
||||
host: config.database.legacy.host,
|
||||
password: config.database.legacy.password,
|
||||
user: config.database.legacy.user,
|
||||
},
|
||||
{
|
||||
database: env("BASANGO_DATABASE_URL"),
|
||||
database: config.database.url,
|
||||
},
|
||||
);
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import { config } from "@basango/domain/config";
|
||||
import { Pool } from "pg";
|
||||
|
||||
import { env } from "#db/config";
|
||||
import { computeTokenStatistics } from "#db/utils/computed";
|
||||
|
||||
type ArticleRow = {
|
||||
@@ -114,7 +114,7 @@ class Engine {
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const engine = new Engine(env("BASANGO_DATABASE_URL"));
|
||||
const engine = new Engine(config.database.url);
|
||||
|
||||
try {
|
||||
await engine.synchronize();
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"api": {
|
||||
"cors": {
|
||||
"allowedHeaders": [
|
||||
"Authorization",
|
||||
"Content-Type",
|
||||
"accept-language",
|
||||
"x-trpc-source",
|
||||
"x-user-locale",
|
||||
"x-user-timezone",
|
||||
"x-user-country"
|
||||
],
|
||||
"allowMethods": ["GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"],
|
||||
"exposeHeaders": ["Content-Length"],
|
||||
"maxAge": 86400
|
||||
},
|
||||
"security": {
|
||||
"accessTokenTtl": "15m",
|
||||
"audience": "basango_dashboard",
|
||||
"crawlerToken": "%env(BASANGO_API_CRAWLER_TOKEN)%",
|
||||
"issuer": "basango_api",
|
||||
"jwtSecret": "%env(BASANGO_API_JWT_SECRET)%",
|
||||
"refreshTokenTtl": "7d"
|
||||
},
|
||||
"server": {
|
||||
"host": "%env(BASANGO_API_HOST)%",
|
||||
"port": "%env(number:BASANGO_API_PORT)%",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,262 @@
|
||||
{
|
||||
"crawler": {
|
||||
"backend": {
|
||||
"endpoint": "%env(BASANGO_API_CRAWLER_ENDPOINT)%",
|
||||
"token": "%env(BASANGO_API_CRAWLER_TOKEN)%"
|
||||
},
|
||||
"fetch": {
|
||||
"async": {
|
||||
"prefix": "basango:crawler",
|
||||
"queues": {
|
||||
"details": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS)%",
|
||||
"listing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_LISTING)%",
|
||||
"processing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING)%"
|
||||
},
|
||||
"redisUrl": "%env(BASANGO_CRAWLER_ASYNC_REDIS_URL)%",
|
||||
"ttl": {
|
||||
"default": 600,
|
||||
"failure": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_FAILURE)%",
|
||||
"result": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_RESULT)%"
|
||||
}
|
||||
},
|
||||
"client": {
|
||||
"backoffInitial": 1,
|
||||
"backoffMax": 30,
|
||||
"backoffMultiplier": 2,
|
||||
"followRedirects": true,
|
||||
"maxRetries": "%env(number:BASANGO_CRAWLER_FETCH_MAX_RETRIES)%",
|
||||
"respectRetryAfter": "%env(boolean:BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER)%",
|
||||
"rotate": true,
|
||||
"timeout": 20,
|
||||
"userAgent": "%env(BASANGO_CRAWLER_FETCH_USER_AGENT)%",
|
||||
"verifySsl": true
|
||||
},
|
||||
"crawler": {
|
||||
"direction": "%env(BASANGO_CRAWLER_UPDATE_DIRECTION)%",
|
||||
"maxWorkers": 5,
|
||||
"notify": false,
|
||||
"useMultiThreading": false
|
||||
}
|
||||
},
|
||||
"paths": {
|
||||
"data": "%env(BASANGO_CRAWLER_DATA_PATH)%",
|
||||
"root": "%env(BASANGO_CRAWLER_ROOT_PATH)%"
|
||||
},
|
||||
"sources": {
|
||||
"html": [
|
||||
{
|
||||
"paginationTemplate": "actualite",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {},
|
||||
"sourceId": "radiookapi.net",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": ".field-name-body",
|
||||
"articleCategories": ".views-field-field-cat-gorie a",
|
||||
"articleDate": "head > meta[property=\"article:published_time\"]",
|
||||
"articleLink": ".views-field-title a",
|
||||
"articles": ".view-content > .views-row.content-row",
|
||||
"articleTitle": "h1.page-header",
|
||||
"pagination": "ul.pagination > li.pager-last > a"
|
||||
},
|
||||
"sourceUrl": "https://www.radiookapi.net",
|
||||
"supportsCategories": false
|
||||
},
|
||||
{
|
||||
"categories": ["politique", "economie", "culture", "sport", "societe"],
|
||||
"paginationTemplate": "index.php/category/{category}",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {},
|
||||
"sourceId": "7sur7.cd",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": "div[property=\"schema:text\"].field.field--name-body",
|
||||
"articleDate": "head > meta[property=\"article:published_time\"]",
|
||||
"articleLink": ".views-field-title a",
|
||||
"articles": ".view-content > .row.views-row",
|
||||
"articleTitle": ".views-field-title a",
|
||||
"pagination": "ul.pagination > li.pager__item.pager__item--last > a"
|
||||
},
|
||||
"sourceUrl": "https://7sur7.cd",
|
||||
"supportsCategories": true
|
||||
},
|
||||
{
|
||||
"paginationTemplate": "articles.html",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {
|
||||
"format": "dd.MM.yyyy"
|
||||
},
|
||||
"sourceId": "mediacongo.net",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": ".article_ttext",
|
||||
"articleCategories": "a.color_link",
|
||||
"articleDate": ".article_other_about",
|
||||
"articleLink": "a:first-child",
|
||||
"articles": ".for_aitems > .article_other_item",
|
||||
"articleTitle": "h1",
|
||||
"pagination": "div.pagination > div > a:last-child"
|
||||
},
|
||||
"sourceUrl": "https://www.mediacongo.net",
|
||||
"supportsCategories": false
|
||||
},
|
||||
{
|
||||
"paginationTemplate": "actualite",
|
||||
"requiresDetails": true,
|
||||
"requiresRateLimit": false,
|
||||
"sourceDate": {},
|
||||
"sourceId": "actualite.cd",
|
||||
"sourceKind": "html",
|
||||
"sourceSelectors": {
|
||||
"articleBody": ".views-field.views-field-body .field-content",
|
||||
"articleCategories": "#actu-cat",
|
||||
"articleDate": "head > meta[property=\"article:published_time\"]",
|
||||
"articleLink": "#actu-titre a",
|
||||
"articles": "#views-bootstrap-taxonomy-term-page-2 > div > div",
|
||||
"articleTitle": "h1.page-title"
|
||||
},
|
||||
"sourceUrl": "https://actualite.cd",
|
||||
"supportsCategories": false
|
||||
}
|
||||
],
|
||||
"wordpress": [
|
||||
{
|
||||
"requiresRateLimit": true,
|
||||
"sourceId": "beto.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://beto.cd"
|
||||
},
|
||||
{ "sourceId": "newscd.net", "sourceKind": "wordpress", "sourceUrl": "https://newscd.net" },
|
||||
{
|
||||
"sourceId": "africanewsrdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.africanewsrdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "angazainstitute.ac.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://angazainstitute.ac.cd"
|
||||
},
|
||||
{ "sourceId": "b-onetv.cd", "sourceKind": "wordpress", "sourceUrl": "https://b-onetv.cd" },
|
||||
{
|
||||
"sourceId": "bukavufm.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://bukavufm.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "changement7.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://changement7.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "congoactu.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://congoactu.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "congoindependant.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.congoindependant.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "congoquotidien.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.congoquotidien.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "cumulard.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.cumulard.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "environews-rdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://environews-rdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "freemediardc.info",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.freemediardc.info"
|
||||
},
|
||||
{
|
||||
"sourceId": "geopolismagazine.org",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://geopolismagazine.org"
|
||||
},
|
||||
{
|
||||
"sourceId": "habarirdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://habarirdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "infordc.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://infordc.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "kilalopress.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://kilalopress.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "laprosperiteonline.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://laprosperiteonline.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "laprunellerdc.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://laprunellerdc.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "lesmedias.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://lesmedias.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "lesvolcansnews.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://lesvolcansnews.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "netic-news.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.netic-news.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "objectif-infos.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://objectif-infos.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "scooprdc.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://scooprdc.net"
|
||||
},
|
||||
{
|
||||
"sourceId": "journaldekinshasa.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://www.journaldekinshasa.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "lepotentiel.cd",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://lepotentiel.cd"
|
||||
},
|
||||
{
|
||||
"sourceId": "acturdc.com",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://acturdc.com"
|
||||
},
|
||||
{
|
||||
"sourceId": "matininfos.net",
|
||||
"sourceKind": "wordpress",
|
||||
"sourceUrl": "https://matininfos.net"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"database": {
|
||||
"legacy": {
|
||||
"host": "%env(BASANGO_DATABASE_LEGACY_HOST)%",
|
||||
"name": "%env(BASANGO_DATABASE_LEGACY_NAME)%",
|
||||
"password": "%env(BASANGO_DATABASE_LEGACY_PASSWORD)%",
|
||||
"port": "%env(number:BASANGO_DATABASE_LEGACY_PORT)%",
|
||||
"user": "%env(BASANGO_DATABASE_LEGACY_USER)%"
|
||||
},
|
||||
"url": "%env(BASANGO_DATABASE_URL)%"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"encryption": {
|
||||
"algorithm": "aes-256-gcm",
|
||||
"authTagLength": 16,
|
||||
"bcryptSaltRounds": 12,
|
||||
"ivLength": 16,
|
||||
"key": "%env(BASANGO_ENCRYPTION_KEY)%"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"logger": {
|
||||
"level": "%env(BASANGO_LOGGER_LEVEL)%"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"shared": {
|
||||
"categorySharesLimit": 10,
|
||||
"dateFormat": "yyyy-LL-dd",
|
||||
"dateTimeFormat": "yyyy-LL-dd'T'HH:mm:ss",
|
||||
"name": "Basango",
|
||||
"pagination": {
|
||||
"defaultLimit": 20,
|
||||
"maxLimit": 100,
|
||||
"page": 1
|
||||
},
|
||||
"publicationGraphDays": 30,
|
||||
"timezone": "Africa/Lubumbashi"
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@
|
||||
"@basango/tsconfig": "workspace:*"
|
||||
},
|
||||
"exports": {
|
||||
"./config": "./src/config/index.ts",
|
||||
"./constants": "./src/constants.ts",
|
||||
"./crawler": "./src/crawler/index.ts",
|
||||
"./models": "./src/models/index.ts"
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
import z from "zod";
|
||||
|
||||
export const ApiConfigurationSchema = z.object({
|
||||
cors: z.object({
|
||||
allowedHeaders: z.array(z.string()).default([]),
|
||||
allowMethods: z.array(z.string()).default([]),
|
||||
exposeHeaders: z.array(z.string()).default([]),
|
||||
maxAge: z.number().int().min(0).optional(),
|
||||
origin: z
|
||||
.array(z.string())
|
||||
.optional()
|
||||
.default(["http://localhost:3000", "http://127.0.0.1:3000", "https://dashboard.basango.io"]),
|
||||
}),
|
||||
security: z.object({
|
||||
accessTokenTtl: z.string(),
|
||||
audience: z.string(),
|
||||
crawlerToken: z.string(),
|
||||
issuer: z.string(),
|
||||
jwtSecret: z.string(),
|
||||
refreshTokenTtl: z.string(),
|
||||
}),
|
||||
server: z.object({
|
||||
host: z.string().default("localhost"),
|
||||
port: z.number().int().min(1).max(65535).default(3080),
|
||||
version: z.string().default("1.0.0"),
|
||||
}),
|
||||
});
|
||||
|
||||
export type ApiConfiguration = z.infer<typeof ApiConfigurationSchema>;
|
||||
@@ -0,0 +1,107 @@
|
||||
import { z } from "zod";
|
||||
|
||||
import { SOURCE_KINDS } from "../constants";
|
||||
import { PageRangeSchema, TimestampRangeSchema, UpdateDirectionSchema } from "../models";
|
||||
|
||||
export const SourceKindSchema = z.enum(SOURCE_KINDS);
|
||||
|
||||
export const SourceDateSchema = z.object({
|
||||
format: z.string().default("yyyy-LL-dd HH:mm"),
|
||||
});
|
||||
|
||||
const SourceOptionsSchema = z.object({
|
||||
categories: z.array(z.string()).default([]),
|
||||
requiresDetails: z.boolean().default(false),
|
||||
requiresRateLimit: z.boolean().default(false),
|
||||
sourceDate: SourceDateSchema,
|
||||
sourceId: z.string(),
|
||||
sourceKind: SourceKindSchema,
|
||||
sourceUrl: z.url(),
|
||||
supportsCategories: z.boolean().default(false),
|
||||
});
|
||||
|
||||
export const HtmlSourceOptionsSchema = SourceOptionsSchema.extend({
|
||||
paginationTemplate: z.string(),
|
||||
sourceKind: z.literal("html"),
|
||||
sourceSelectors: z.object({
|
||||
articleBody: z.string(),
|
||||
articleCategories: z.string().optional(),
|
||||
articleDate: z.string(),
|
||||
articleLink: z.string(),
|
||||
articles: z.string(),
|
||||
articleTitle: z.string(),
|
||||
pagination: z.string().default("ul.pagination > li a"),
|
||||
}),
|
||||
});
|
||||
|
||||
export const WordPressSourceOptionsSchema = SourceOptionsSchema.extend({
|
||||
sourceDate: SourceDateSchema.default(SourceDateSchema.parse({ format: "yyyy-LL-dd'T'HH:mm:ss" })),
|
||||
sourceKind: z.literal("wordpress"),
|
||||
});
|
||||
|
||||
export const CrawlerConfigurationSchema = z.object({
|
||||
backend: z.object({
|
||||
endpoint: z.url(),
|
||||
token: z.string(),
|
||||
}),
|
||||
fetch: z.object({
|
||||
async: z.object({
|
||||
prefix: z.string().default("basango:crawler:queue"),
|
||||
queues: z.object({
|
||||
details: z.string().default("details"),
|
||||
listing: z.string().default("listing"),
|
||||
processing: z.string().default("processing"),
|
||||
}),
|
||||
redisUrl: z.string().default("redis://localhost:6379/0"),
|
||||
ttl: z.object({
|
||||
default: z.number().int().positive().default(600),
|
||||
failure: z.number().int().nonnegative().default(3600),
|
||||
result: z.number().int().nonnegative().default(3600),
|
||||
}),
|
||||
}),
|
||||
client: z.object({
|
||||
backoffInitial: z.number().nonnegative().default(1),
|
||||
backoffMax: z.number().nonnegative().default(30),
|
||||
backoffMultiplier: z.number().positive().default(2),
|
||||
followRedirects: z.boolean().default(true),
|
||||
maxRetries: z.number().int().nonnegative().default(3),
|
||||
respectRetryAfter: z.boolean().default(true),
|
||||
rotate: z.boolean().default(true),
|
||||
timeout: z.number().positive().default(20),
|
||||
userAgent: z.string().default("Basango/0.1 (+https://github.com/bernard-ng/basango)"),
|
||||
verifySsl: z.boolean().default(true),
|
||||
}),
|
||||
crawler: z.object({
|
||||
category: z.string().optional(),
|
||||
dateRange: TimestampRangeSchema.optional(),
|
||||
direction: UpdateDirectionSchema.default("forward"),
|
||||
isUpdate: z.boolean().default(false),
|
||||
maxWorkers: z.number().int().positive().default(5),
|
||||
notify: z.boolean().default(false),
|
||||
pageRange: PageRangeSchema.optional(),
|
||||
source: z.union([HtmlSourceOptionsSchema, WordPressSourceOptionsSchema]).optional(),
|
||||
useMultiThreading: z.boolean().default(false),
|
||||
}),
|
||||
}),
|
||||
paths: z.object({
|
||||
data: z.string(),
|
||||
root: z.string(),
|
||||
}),
|
||||
sources: z.object({
|
||||
html: z.array(HtmlSourceOptionsSchema).default([]),
|
||||
wordpress: z.array(WordPressSourceOptionsSchema).default([]),
|
||||
}),
|
||||
});
|
||||
|
||||
// types
|
||||
export type SourceKind = z.infer<typeof SourceKindSchema>;
|
||||
export type SourceDate = z.infer<typeof SourceDateSchema>;
|
||||
export type HtmlSourceOptions = z.infer<typeof HtmlSourceOptionsSchema>;
|
||||
export type WordPressSourceOptions = z.infer<typeof WordPressSourceOptionsSchema>;
|
||||
export type AnySourceOptions = HtmlSourceOptions | WordPressSourceOptions;
|
||||
|
||||
export type CrawlerConfiguration = z.infer<typeof CrawlerConfigurationSchema>;
|
||||
export type CrawlerHttpOptions = CrawlerConfiguration["fetch"]["client"];
|
||||
export type CrawlerFetchingOptions = CrawlerConfiguration["fetch"]["crawler"];
|
||||
export type CrawlerAsyncOptions = CrawlerConfiguration["fetch"]["async"];
|
||||
export type CrawlerBackendOptions = CrawlerConfiguration["backend"];
|
||||
@@ -0,0 +1,15 @@
|
||||
import z from "zod";
|
||||
|
||||
export const DatabaseConfigurationSchema = z.object({
|
||||
legacy: z.object({
|
||||
host: z.string().min(1),
|
||||
name: z.string().min(1),
|
||||
password: z.string().min(1),
|
||||
port: z.number().optional(),
|
||||
user: z.string().min(1),
|
||||
}),
|
||||
url: z.string().min(1),
|
||||
});
|
||||
|
||||
// types
|
||||
export type DatabaseConfiguration = z.infer<typeof DatabaseConfigurationSchema>;
|
||||
@@ -0,0 +1,18 @@
|
||||
import z from "zod";
|
||||
|
||||
import {
|
||||
DEFAULT_AUTH_TAG_LENGTH,
|
||||
DEFAULT_BCRYPT_SALT_ROUNDS,
|
||||
DEFAULT_IV_LENGTH,
|
||||
} from "../constants";
|
||||
|
||||
export const EncryptionConfigurationSchema = z.object({
|
||||
algorithm: z.enum(["aes-128-gcm", "aes-192-gcm", "aes-256-gcm"]),
|
||||
authTagLength: z.number().nonnegative().default(DEFAULT_AUTH_TAG_LENGTH),
|
||||
bcryptSaltRounds: z.number().nonnegative().default(DEFAULT_BCRYPT_SALT_ROUNDS),
|
||||
ivLength: z.number().nonnegative().default(DEFAULT_IV_LENGTH),
|
||||
key: z.string(),
|
||||
});
|
||||
|
||||
// types
|
||||
export type EncryptionConfiguration = z.infer<typeof EncryptionConfigurationSchema>;
|
||||
@@ -0,0 +1,72 @@
|
||||
import path from "node:path";
|
||||
|
||||
import { defineConfig } from "@devscast/config";
|
||||
import z from "zod";
|
||||
|
||||
import { ApiConfigurationSchema } from "./api";
|
||||
import { CrawlerConfigurationSchema } from "./crawler";
|
||||
import { DatabaseConfigurationSchema } from "./database";
|
||||
import { EncryptionConfigurationSchema } from "./encryption";
|
||||
import { LoggerConfigurationSchema } from "./logger";
|
||||
import { SharedConfigurationSchema } from "./shared";
|
||||
|
||||
export * from "./api";
|
||||
export * from "./crawler";
|
||||
export * from "./database";
|
||||
export * from "./encryption";
|
||||
export * from "./logger";
|
||||
export * from "./shared";
|
||||
|
||||
const root = path.resolve(__dirname, "../../../../");
|
||||
const domain = path.join(root, "packages", "domain", "config");
|
||||
|
||||
export const { env, config } = defineConfig({
|
||||
env: {
|
||||
knownKeys: [
|
||||
"NODE_ENV",
|
||||
"BASANGO_API_HOST",
|
||||
"BASANGO_API_PORT",
|
||||
"BASANGO_API_ALLOWED_ORIGINS",
|
||||
"BASANGO_API_KEY",
|
||||
"BASANGO_API_CRAWLER_TOKEN",
|
||||
"BASANGO_API_JWT_SECRET",
|
||||
"BASANGO_DATABASE_URL",
|
||||
"BASANGO_DATABASE_LEGACY_HOST",
|
||||
"BASANGO_DATABASE_LEGACY_PASSWORD",
|
||||
"BASANGO_DATABASE_LEGACY_NAME",
|
||||
"BASANGO_DATABASE_LEGACY_USER",
|
||||
"BASANGO_CRAWLER_ROOT_PATH",
|
||||
"BASANGO_CRAWLER_DATA_PATH",
|
||||
"BASANGO_CRAWLER_LOGS_PATH",
|
||||
"BASANGO_CRAWLER_CONFIG_PATH",
|
||||
"BASANGO_CRAWLER_UPDATE_DIRECTION",
|
||||
"BASANGO_CRAWLER_FETCH_USER_AGENT",
|
||||
"BASANGO_CRAWLER_FETCH_MAX_RETRIES",
|
||||
"BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER",
|
||||
"BASANGO_CRAWLER_ASYNC_REDIS_URL",
|
||||
"BASANGO_CRAWLER_ASYNC_TTL_RESULT",
|
||||
"BASANGO_CRAWLER_ASYNC_TTL_FAILURE",
|
||||
"BASANGO_CRAWLER_ASYNC_QUEUE_LISTING",
|
||||
"BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS",
|
||||
"BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING",
|
||||
"BASANGO_ENCRYPTION_KEY",
|
||||
] as const,
|
||||
path: path.join(root, ".env"),
|
||||
},
|
||||
schema: z.object({
|
||||
api: ApiConfigurationSchema,
|
||||
crawler: CrawlerConfigurationSchema,
|
||||
database: DatabaseConfigurationSchema,
|
||||
encryption: EncryptionConfigurationSchema,
|
||||
logger: LoggerConfigurationSchema,
|
||||
shared: SharedConfigurationSchema,
|
||||
}),
|
||||
sources: [
|
||||
path.join(domain, "api.json"),
|
||||
path.join(domain, "crawler.json"),
|
||||
path.join(domain, "database.json"),
|
||||
path.join(domain, "encryption.json"),
|
||||
path.join(domain, "logger.json"),
|
||||
path.join(domain, "shared.json"),
|
||||
],
|
||||
});
|
||||
@@ -0,0 +1,8 @@
|
||||
import z from "zod";
|
||||
|
||||
export const LoggerConfigurationSchema = z.object({
|
||||
level: z.string().default("info"),
|
||||
});
|
||||
|
||||
// types
|
||||
export type LoggerConfiguration = z.infer<typeof LoggerConfigurationSchema>;
|
||||
@@ -0,0 +1,17 @@
|
||||
import z from "zod";
|
||||
|
||||
export const SharedConfigurationSchema = z.object({
|
||||
categorySharesLimit: z.number().int().min(1).default(10),
|
||||
dateFormat: z.string(),
|
||||
dateTimeFormat: z.string(),
|
||||
name: z.string().default("Basango"),
|
||||
pagination: z.object({
|
||||
defaultLimit: z.number().int().min(1).max(100),
|
||||
maxLimit: z.number().int().min(1).max(100),
|
||||
page: z.number().int().min(1),
|
||||
}),
|
||||
publicationGraphDays: z.number().int().min(1),
|
||||
timezone: z.string(),
|
||||
});
|
||||
|
||||
export type SharedConfiguration = z.infer<typeof SharedConfigurationSchema>;
|
||||
@@ -1,10 +1,8 @@
|
||||
// Domain-specific constants and types
|
||||
export const BIAS = ["neutral", "slightly", "partisan", "extreme"] as const;
|
||||
export const RELIABILITY = ["trusted", "reliable", "average", "low_trust", "unreliable"] as const;
|
||||
export const TRANSPARENCY = ["high", "medium", "low"] as const;
|
||||
export const SENTIMENT = ["positive", "neutral", "negative"] as const;
|
||||
|
||||
// Crawler-related constants and types
|
||||
export const UPDATE_DIRECTIONS = ["forward", "backward"] as const;
|
||||
export const SOURCE_KINDS = ["wordpress", "html"] as const;
|
||||
|
||||
@@ -32,5 +30,5 @@ export const DEFAULT_AUTH_TAG_LENGTH = 16;
|
||||
export const DEFAULT_BCRYPT_SALT_ROUNDS = 12;
|
||||
export const DEFAULT_TOKEN_AUDIENCE = "basango_dashboard";
|
||||
export const DEFAULT_TOKEN_ISSUER = "basango_api";
|
||||
export const DEFAULT_ACCESS_TOKEN_TTL = "15m";
|
||||
export const DEFAULT_ACCESS_TOKEN_TTL = "35m";
|
||||
export const DEFAULT_REFRESH_TOKEN_TTL = "7d";
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
import { z } from "zod";
|
||||
|
||||
import { SOURCE_KINDS } from "#domain/constants";
|
||||
|
||||
// schemas
|
||||
export const SourceKindSchema = z.enum(SOURCE_KINDS);
|
||||
|
||||
export const SourceDateSchema = z.object({
|
||||
format: z.string().default("yyyy-LL-dd HH:mm"),
|
||||
});
|
||||
|
||||
const SourceConfigSchema = z.object({
|
||||
categories: z.array(z.string()).default([]),
|
||||
requiresDetails: z.boolean().default(false),
|
||||
requiresRateLimit: z.boolean().default(false),
|
||||
sourceDate: SourceDateSchema,
|
||||
sourceId: z.string(),
|
||||
sourceKind: SourceKindSchema,
|
||||
sourceUrl: z.url(),
|
||||
supportsCategories: z.boolean().default(false),
|
||||
});
|
||||
|
||||
export const HtmlSourceConfigSchema = SourceConfigSchema.extend({
|
||||
paginationTemplate: z.string(),
|
||||
sourceKind: z.literal("html"),
|
||||
sourceSelectors: z.object({
|
||||
articleBody: z.string(),
|
||||
articleCategories: z.string().optional(),
|
||||
articleDate: z.string(),
|
||||
articleLink: z.string(),
|
||||
articles: z.string(),
|
||||
articleTitle: z.string(),
|
||||
pagination: z.string().default("ul.pagination > li a"),
|
||||
}),
|
||||
});
|
||||
|
||||
export const WordPressSourceConfigSchema = SourceConfigSchema.extend({
|
||||
sourceDate: SourceDateSchema.default(SourceDateSchema.parse({ format: "yyyy-LL-dd'T'HH:mm:ss" })),
|
||||
sourceKind: z.literal("wordpress"),
|
||||
});
|
||||
|
||||
// types
|
||||
export type SourceKind = z.infer<typeof SourceKindSchema>;
|
||||
export type SourceDate = z.infer<typeof SourceDateSchema>;
|
||||
export type HtmlSourceConfig = z.infer<typeof HtmlSourceConfigSchema>;
|
||||
export type WordPressSourceConfig = z.infer<typeof WordPressSourceConfigSchema>;
|
||||
export type AnySourceConfig = HtmlSourceConfig | WordPressSourceConfig;
|
||||
@@ -1,2 +0,0 @@
|
||||
export * from "./config";
|
||||
export * from "./schemas";
|
||||
@@ -1,185 +1,65 @@
|
||||
import { z } from "@hono/zod-openapi";
|
||||
|
||||
import { idSchema, sentimentSchema } from "#domain/models/shared";
|
||||
import z from "zod";
|
||||
|
||||
import { idSchema, sentimentSchema } from "./shared";
|
||||
import { sourceSchema } from "./sources";
|
||||
|
||||
// schemas
|
||||
export const articleMetadataSchema = z.object({
|
||||
author: z.string().optional().openapi({
|
||||
description: "The author of the article.",
|
||||
example: "John Doe",
|
||||
}),
|
||||
description: z.string().optional().openapi({
|
||||
description: "A brief description or summary of the article.",
|
||||
example: "This article discusses the latest advancements in AI technology.",
|
||||
}),
|
||||
image: z.url().optional().openapi({
|
||||
description: "The URL of the main image associated with the article.",
|
||||
example: "https://example.com/image.jpg",
|
||||
}),
|
||||
publishedAt: z.date().optional().openapi({
|
||||
description: "The publication date of the article as a Date object.",
|
||||
example: "2023-01-01T00:00:00Z",
|
||||
}),
|
||||
title: z.string().optional().openapi({
|
||||
description: "The title of the article for metadata purposes.",
|
||||
example: "The Rise of AI",
|
||||
}),
|
||||
updatedAt: z.date().optional().openapi({
|
||||
description: "The last updated date of the article as a Date object.",
|
||||
example: "2023-01-02T12:00:00Z",
|
||||
}),
|
||||
url: z.url().optional().openapi({
|
||||
description: "The canonical URL of the article.",
|
||||
example: "https://example.com/article",
|
||||
}),
|
||||
author: z.string().optional(),
|
||||
description: z.string().optional(),
|
||||
image: z.url().optional(),
|
||||
publishedAt: z.date().optional(),
|
||||
title: z.string().optional(),
|
||||
updatedAt: z.date().optional(),
|
||||
url: z.url().optional(),
|
||||
});
|
||||
|
||||
export const tokenStatisticsSchema = z.object({
|
||||
body: z.number().optional().default(0).openapi({
|
||||
description: "The number of tokens in the article body.",
|
||||
example: 250,
|
||||
}),
|
||||
categories: z.number().optional().default(0).openapi({
|
||||
description: "The number of tokens in the article categories.",
|
||||
example: 3,
|
||||
}),
|
||||
excerpt: z.number().optional().default(0).openapi({
|
||||
description: "The number of tokens in the article excerpt.",
|
||||
example: 50,
|
||||
}),
|
||||
title: z.number().optional().default(0).openapi({
|
||||
description: "The number of tokens in the article title.",
|
||||
example: 10,
|
||||
}),
|
||||
total: z.number().optional().default(0).openapi({
|
||||
description: "The total number of tokens in the article.",
|
||||
example: 313,
|
||||
}),
|
||||
body: z.number().optional().default(0),
|
||||
categories: z.number().optional().default(0),
|
||||
excerpt: z.number().optional().default(0),
|
||||
title: z.number().optional().default(0),
|
||||
total: z.number().optional().default(0),
|
||||
});
|
||||
|
||||
export const articleSchema = z.object({
|
||||
body: z.string().min(1).openapi({
|
||||
description: "The main content of the article.",
|
||||
example: "This is the body of the article...",
|
||||
}),
|
||||
categories: z.array(z.string()).openapi({
|
||||
description: "The categories or tags associated with the article.",
|
||||
example: ["Technology", "AI"],
|
||||
}),
|
||||
createdAt: z.date().openapi({
|
||||
description: "The date and time when the article was created in the system.",
|
||||
example: "2023-01-01T12:00:00Z",
|
||||
}),
|
||||
excerpt: z.string().optional().openapi({
|
||||
description: "A brief excerpt or summary of the article.",
|
||||
example: "This article discusses the latest advancements in AI technology.",
|
||||
}),
|
||||
hash: z.string().min(1).openapi({
|
||||
description: "The unique hash of the article link.",
|
||||
example: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
}),
|
||||
body: z.string().min(1),
|
||||
categories: z.array(z.string()),
|
||||
createdAt: z.date(),
|
||||
excerpt: z.string().optional(),
|
||||
hash: z.string().min(1),
|
||||
id: idSchema,
|
||||
image: z.url().optional().openapi({
|
||||
description: "The URL of the main image associated with the article.",
|
||||
example: "https://example.com/image.jpg",
|
||||
}),
|
||||
link: z.string().url().openapi({
|
||||
description: "The URL of the article.",
|
||||
example: "https://example.com/article",
|
||||
}),
|
||||
image: z.url().optional(),
|
||||
link: z.url(),
|
||||
metadata: articleMetadataSchema.optional(),
|
||||
publishedAt: z.date().openapi({
|
||||
description: "The publication date of the article as a Date object.",
|
||||
example: "2023-01-01T00:00:00Z",
|
||||
}),
|
||||
readingTime: z.number().int().min(1).openapi({
|
||||
description: "Estimated reading time of the article in minutes.",
|
||||
example: 5,
|
||||
}),
|
||||
publishedAt: z.date(),
|
||||
readingTime: z.number().int().min(1),
|
||||
source: sourceSchema.optional(),
|
||||
sourceId: z.union([z.uuid(), z.string().min(1)]).openapi({
|
||||
description: "The unique identifier of the source from which the article was crawled.",
|
||||
example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g",
|
||||
}),
|
||||
title: z.string().min(1).openapi({
|
||||
description: "The title of the article.",
|
||||
example: "The Rise of AI",
|
||||
}),
|
||||
sourceId: z.union([z.uuid(), z.string().min(1)]),
|
||||
title: z.string().min(1),
|
||||
tokenStatistics: tokenStatisticsSchema.optional(),
|
||||
updatedAt: z.date().optional().openapi({
|
||||
description: "The date and time when the article was last updated in the system.",
|
||||
example: "2023-01-02T12:00:00Z",
|
||||
}),
|
||||
updatedAt: z.date().optional(),
|
||||
});
|
||||
|
||||
// API
|
||||
export const createArticleSchema = z
|
||||
.object({
|
||||
body: z.string().min(1).openapi({
|
||||
description: "The main content of the article.",
|
||||
example: "This is the body of the article...",
|
||||
}),
|
||||
categories: z
|
||||
.array(z.string())
|
||||
.openapi({
|
||||
description: "The categories or tags associated with the article.",
|
||||
example: ["Technology", "AI"],
|
||||
})
|
||||
.optional()
|
||||
.default([]),
|
||||
hash: z.string().min(1).openapi({
|
||||
description: "The unique hash of the article link.",
|
||||
example: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
}),
|
||||
link: z.string().url().openapi({
|
||||
description: "The URL of the article.",
|
||||
example: "https://example.com/article",
|
||||
}),
|
||||
metadata: articleMetadataSchema.optional(),
|
||||
publishedAt: z
|
||||
.string()
|
||||
.refine((value) => !Number.isNaN(Date.parse(value)), {
|
||||
message: "Invalid date format",
|
||||
})
|
||||
.transform((value) => new Date(value))
|
||||
.openapi({
|
||||
description: "The publication date of the article in ISO 8601 format.",
|
||||
example: "2023-01-01T00:00:00Z",
|
||||
}),
|
||||
sourceId: z.string().openapi({
|
||||
description: "The unique identifier of the source from which the article was crawled.",
|
||||
example: "radiookapi.net",
|
||||
}),
|
||||
title: z.string().min(1).openapi({
|
||||
description: "The title of the article.",
|
||||
example: "The Rise of AI",
|
||||
}),
|
||||
})
|
||||
.openapi("CreateArticle");
|
||||
export const createArticleSchema = z.object({
|
||||
body: z.string().min(1),
|
||||
categories: z.array(z.string()).optional().default([]),
|
||||
hash: z.string().min(1),
|
||||
link: z.url(),
|
||||
metadata: articleMetadataSchema.optional(),
|
||||
publishedAt: z.coerce.date(),
|
||||
sourceId: z.string(),
|
||||
title: z.string().min(1),
|
||||
});
|
||||
|
||||
export const createArticleResponseSchema = z
|
||||
.object({ id: idSchema, sourceId: idSchema })
|
||||
.openapi("CreateArticleResponse");
|
||||
export const createArticleResponseSchema = z.object({ id: idSchema, sourceId: idSchema });
|
||||
|
||||
export const getArticlesSchema = z.object({
|
||||
category: z.string().min(1).max(255).optional().openapi({
|
||||
description: "Filter articles by a specific category.",
|
||||
example: "Technology",
|
||||
}),
|
||||
cursor: z.string().nullable().optional().openapi({
|
||||
description: "Optional cursor for fetching the next page of articles.",
|
||||
}),
|
||||
limit: z.number().int().min(1).max(100).optional().openapi({
|
||||
default: 10,
|
||||
description: "Maximum number of articles to return per page.",
|
||||
example: 20,
|
||||
}),
|
||||
search: z.string().max(512).optional().openapi({
|
||||
description: "Full-text search query applied to article titles and bodies.",
|
||||
example: "gouvernement congolais",
|
||||
}),
|
||||
category: z.string().min(1).max(255).optional(),
|
||||
cursor: z.string().nullable().optional(),
|
||||
limit: z.number().int().min(1).max(100).optional(),
|
||||
search: z.string().max(512).optional(),
|
||||
sentiment: sentimentSchema.optional(),
|
||||
sourceId: idSchema.optional(),
|
||||
});
|
||||
|
||||
@@ -1,18 +1,10 @@
|
||||
import { z } from "@hono/zod-openapi";
|
||||
import z from "zod";
|
||||
|
||||
export const loginSchema = z.object({
|
||||
email: z.email().openapi({
|
||||
description: "Email address used to authenticate the user.",
|
||||
example: "user@example.com",
|
||||
}),
|
||||
password: z.string().min(8).openapi({
|
||||
description: "Account password.",
|
||||
example: "••••••••",
|
||||
}),
|
||||
email: z.email(),
|
||||
password: z.string().min(8),
|
||||
});
|
||||
|
||||
export const refreshSessionSchema = z.object({
|
||||
refreshToken: z.string().min(1).openapi({
|
||||
description: "Refresh token returned when logging in.",
|
||||
}),
|
||||
refreshToken: z.string().min(1),
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { z } from "zod";
|
||||
import z from "zod";
|
||||
|
||||
import { UPDATE_DIRECTIONS } from "#domain/constants";
|
||||
import { UPDATE_DIRECTIONS } from "../constants";
|
||||
|
||||
// schemas
|
||||
export const UpdateDirectionSchema = z.enum(UPDATE_DIRECTIONS);
|
||||
@@ -1,5 +1,6 @@
|
||||
export * from "./articles";
|
||||
export * from "./auth";
|
||||
export * from "./crawler";
|
||||
export * from "./reports";
|
||||
export * from "./shared";
|
||||
export * from "./sources";
|
||||
|
||||
@@ -1,30 +1,17 @@
|
||||
import { z } from "@hono/zod-openapi";
|
||||
import z from "zod";
|
||||
|
||||
import { deltaSchema } from "#domain/models/shared";
|
||||
import { deltaSchema } from "./shared";
|
||||
|
||||
export const overviewMetricSchema = z
|
||||
.object({
|
||||
delta: deltaSchema.openapi({
|
||||
description: "Change measured over the last 30 days compared to the previous 30-day window.",
|
||||
}),
|
||||
total: z.number().int().nonnegative().openapi({
|
||||
description: "Total count across the entire dataset.",
|
||||
example: 12584,
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Aggregated metric with total count and delta metadata.",
|
||||
});
|
||||
export const overviewMetricSchema = z.object({
|
||||
delta: deltaSchema,
|
||||
total: z.number().int().nonnegative(),
|
||||
});
|
||||
|
||||
export const dashboardOverviewSchema = z
|
||||
.object({
|
||||
articles: overviewMetricSchema,
|
||||
sources: overviewMetricSchema,
|
||||
users: overviewMetricSchema,
|
||||
})
|
||||
.openapi({
|
||||
description: "Dashboard overview metrics for key entities.",
|
||||
});
|
||||
export const dashboardOverviewSchema = z.object({
|
||||
articles: overviewMetricSchema,
|
||||
sources: overviewMetricSchema,
|
||||
users: overviewMetricSchema,
|
||||
});
|
||||
|
||||
export type OverviewMetric = z.infer<typeof overviewMetricSchema>;
|
||||
export type DashboardOverview = z.infer<typeof dashboardOverviewSchema>;
|
||||
|
||||
@@ -1,138 +1,50 @@
|
||||
import { z } from "@hono/zod-openapi";
|
||||
|
||||
import { BIAS, RELIABILITY, SENTIMENT, TRANSPARENCY } from "#domain/constants";
|
||||
import { BIAS, RELIABILITY, SENTIMENT, TRANSPARENCY } from "../constants";
|
||||
|
||||
// schemas
|
||||
export const idSchema = z.uuid().openapi({
|
||||
description: "The unique identifier of the resource.",
|
||||
example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g",
|
||||
export const idSchema = z.uuid();
|
||||
|
||||
export const dateRangeSchema = z.object({
|
||||
end: z.coerce.date(),
|
||||
start: z.coerce.date(),
|
||||
});
|
||||
|
||||
export const dateRangeSchema = z
|
||||
.object({
|
||||
end: z.date().openapi({
|
||||
description: "The end date of the range.",
|
||||
example: "2023-01-30T23:59:59Z",
|
||||
}),
|
||||
start: z.date().openapi({
|
||||
description: "The start date of the range.",
|
||||
example: "2023-01-01T00:00:00Z",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Inclusive date range for publication metrics.",
|
||||
});
|
||||
export const limitSchema = z.number().int().min(1).max(100);
|
||||
export const sentimentSchema = z.enum(SENTIMENT);
|
||||
export const biasSchema = z.enum(BIAS);
|
||||
export const reliabilitySchema = z.enum(RELIABILITY);
|
||||
export const transparencySchema = z.enum(TRANSPARENCY);
|
||||
|
||||
export const limitSchema = z.number().int().min(1).max(100).openapi({
|
||||
default: 10,
|
||||
description: "The maximum number of items to return.",
|
||||
example: 10,
|
||||
export const credibilitySchema = z.object({
|
||||
bias: biasSchema.default("neutral"),
|
||||
reliability: reliabilitySchema.default("average"),
|
||||
transparency: transparencySchema.default("medium"),
|
||||
});
|
||||
|
||||
export const sentimentSchema = z.enum(SENTIMENT).openapi({
|
||||
description: "Sentiment detected for the article.",
|
||||
example: "positive",
|
||||
export const deviceSchema = z.object({
|
||||
client: z.string().optional(),
|
||||
device: z.string().optional(),
|
||||
isBot: z.boolean(),
|
||||
operatingSystem: z.string().optional(),
|
||||
});
|
||||
|
||||
export const biasSchema = z.enum(BIAS).openapi({
|
||||
description: "The bias level of the source.",
|
||||
example: "neutral",
|
||||
export const geoLocationSchema = z.object({
|
||||
accuracyRadius: z.number().optional(),
|
||||
city: z.string().optional(),
|
||||
country: z.string().optional(),
|
||||
latitude: z.number().optional(),
|
||||
longitude: z.number().optional(),
|
||||
timeZone: z.string().optional(),
|
||||
});
|
||||
|
||||
export const reliabilitySchema = z.enum(RELIABILITY).openapi({
|
||||
description: "The reliability level of the source.",
|
||||
example: "trusted",
|
||||
export const distrubtionSchema = z.object({
|
||||
count: z.number().int(),
|
||||
id: idSchema,
|
||||
name: z.string(),
|
||||
percentage: z.number(),
|
||||
});
|
||||
|
||||
export const transparencySchema = z.enum(TRANSPARENCY).openapi({
|
||||
description: "The transparency level of the source.",
|
||||
example: "high",
|
||||
});
|
||||
|
||||
export const credibilitySchema = z
|
||||
.object({
|
||||
bias: biasSchema.default("neutral"),
|
||||
reliability: reliabilitySchema.default("average"),
|
||||
transparency: transparencySchema.default("medium"),
|
||||
})
|
||||
.openapi({
|
||||
description: "Credibility information about the resource.",
|
||||
});
|
||||
|
||||
export const deviceSchema = z
|
||||
.object({
|
||||
client: z.string().optional().openapi({
|
||||
description: "The client software of the device.",
|
||||
example: "Chrome 90",
|
||||
}),
|
||||
device: z.string().optional().openapi({
|
||||
description: "The device model.",
|
||||
example: "Dell XPS 13",
|
||||
}),
|
||||
isBot: z.boolean().openapi({
|
||||
description: "Indicates if the device is a bot.",
|
||||
example: false,
|
||||
}),
|
||||
operatingSystem: z.string().optional().openapi({
|
||||
description: "The operating system of the device.",
|
||||
example: "Windows 10",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Information about the user's device.",
|
||||
});
|
||||
|
||||
export const geoLocationSchema = z
|
||||
.object({
|
||||
accuracyRadius: z.number().optional().openapi({
|
||||
description: "The accuracy radius in kilometers.",
|
||||
example: 50,
|
||||
}),
|
||||
city: z.string().optional().openapi({
|
||||
description: "The city of the user.",
|
||||
example: "San Francisco",
|
||||
}),
|
||||
country: z.string().optional().openapi({
|
||||
description: "The country of the user.",
|
||||
example: "United States",
|
||||
}),
|
||||
latitude: z.number().optional().openapi({
|
||||
description: "The latitude of the user's location.",
|
||||
example: 37.7749,
|
||||
}),
|
||||
longitude: z.number().optional().openapi({
|
||||
description: "The longitude of the user's location.",
|
||||
example: -122.4194,
|
||||
}),
|
||||
timeZone: z.string().optional().openapi({
|
||||
description: "The time zone of the user.",
|
||||
example: "America/Los_Angeles",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Geolocation information about the user.",
|
||||
});
|
||||
|
||||
export const distrubtionSchema = z
|
||||
.object({
|
||||
count: z.number().int().openapi({
|
||||
description: "The count of items in the distribution.",
|
||||
example: 42,
|
||||
}),
|
||||
id: idSchema,
|
||||
name: z.string().openapi({
|
||||
description: "The name of the distribution.",
|
||||
example: "Technology",
|
||||
}),
|
||||
percentage: z.number().openapi({
|
||||
description: "The percentage of items in the distribution.",
|
||||
example: 12.5,
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Distribution information.",
|
||||
});
|
||||
|
||||
export const getDistributionsSchema = z.object({
|
||||
id: idSchema.optional(),
|
||||
limit: limitSchema.optional(),
|
||||
@@ -143,172 +55,60 @@ export const getPublicationsSchema = z.object({
|
||||
range: dateRangeSchema.optional(),
|
||||
});
|
||||
|
||||
export const distributionsSchema = z
|
||||
.object({
|
||||
items: z.array(distrubtionSchema).openapi({
|
||||
description: "List of distributions.",
|
||||
}),
|
||||
total: z.number().int().openapi({
|
||||
description: "Total number of distributions.",
|
||||
example: 100,
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Distributions data.",
|
||||
});
|
||||
export const distributionsSchema = z.object({
|
||||
items: z.array(distrubtionSchema),
|
||||
total: z.number().int(),
|
||||
});
|
||||
|
||||
export const publicationSchema = z
|
||||
.object({
|
||||
count: z.number().int().openapi({
|
||||
description: "The number of articles published on that date.",
|
||||
example: 42,
|
||||
}),
|
||||
date: z.string().openapi({
|
||||
description: "The date of the publication.",
|
||||
example: "2023-01-15",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Publication metrics for a specific date.",
|
||||
});
|
||||
export const publicationSchema = z.object({
|
||||
count: z.number().int(),
|
||||
date: z.string(),
|
||||
});
|
||||
|
||||
export const deltaSchema = z
|
||||
.object({
|
||||
delta: z.number().openapi({
|
||||
description: "The absolute change in value.",
|
||||
example: 10,
|
||||
}),
|
||||
percentage: z.number().openapi({
|
||||
description: "The percentage change in value.",
|
||||
example: 25.0,
|
||||
}),
|
||||
sign: z.enum(["+", "-"]).openapi({
|
||||
description: "The sign of the change.",
|
||||
example: "+",
|
||||
}),
|
||||
variant: z.enum(["increase", "decrease", "positive"]).openapi({
|
||||
description: "The variant of the change.",
|
||||
example: "increase",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Delta information representing change over time.",
|
||||
});
|
||||
export const deltaSchema = z.object({
|
||||
delta: z.number(),
|
||||
percentage: z.number(),
|
||||
sign: z.enum(["+", "-"]),
|
||||
variant: z.enum(["increase", "decrease", "positive"]),
|
||||
});
|
||||
|
||||
export const publicationMetaSchema = z
|
||||
.object({
|
||||
current: z.number().openapi({
|
||||
description: "The current total value.",
|
||||
example: 150,
|
||||
}),
|
||||
delta: deltaSchema,
|
||||
previous: z.number().openapi({
|
||||
description: "The previous total value.",
|
||||
example: 120,
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Metadata for publication metrics.",
|
||||
});
|
||||
export const publicationMetaSchema = z.object({
|
||||
current: z.number(),
|
||||
delta: deltaSchema,
|
||||
previous: z.number(),
|
||||
});
|
||||
|
||||
export const publicationsSchema = z
|
||||
.object({
|
||||
items: z.array(publicationSchema).openapi({
|
||||
description: "List of publication metrics for the source.",
|
||||
}),
|
||||
meta: publicationMetaSchema.optional(),
|
||||
})
|
||||
.openapi({
|
||||
description: "Publication metrics for the source.",
|
||||
});
|
||||
export const publicationsSchema = z.object({
|
||||
items: z.array(publicationSchema),
|
||||
meta: publicationMetaSchema.optional(),
|
||||
});
|
||||
|
||||
export const paginationCursorSchema = z
|
||||
.object({
|
||||
date: z.string().openapi({
|
||||
description: "The date associated with the last item in the current page.",
|
||||
example: "2023-01-15",
|
||||
}),
|
||||
id: z.string().openapi({
|
||||
description: "The unique identifier of the last item in the current page.",
|
||||
example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Cursor information for pagination.",
|
||||
});
|
||||
export const paginationCursorSchema = z.object({
|
||||
date: z.string(),
|
||||
id: z.string(),
|
||||
});
|
||||
|
||||
export const paginationRequestSchema = z
|
||||
.object({
|
||||
cursor: z.string().nullable().optional().openapi({
|
||||
description: "The pagination cursor for cursor-based pagination.",
|
||||
example:
|
||||
"eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==",
|
||||
}),
|
||||
limit: limitSchema.optional(),
|
||||
page: z.number().int().min(1).optional().openapi({
|
||||
description: "The page number to retrieve.",
|
||||
example: 1,
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Pagination request parameters.",
|
||||
});
|
||||
export const paginationRequestSchema = z.object({
|
||||
cursor: z.string().nullable().optional(),
|
||||
limit: limitSchema.optional(),
|
||||
page: z.number().nonnegative().default(1).optional(),
|
||||
});
|
||||
|
||||
export const paginationStateSchema = z
|
||||
.object({
|
||||
cursor: z.string().nullable().openapi({
|
||||
description: "The current pagination cursor.",
|
||||
example:
|
||||
"eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==",
|
||||
}),
|
||||
limit: z.number().int().openapi({
|
||||
description: "The number of items per page.",
|
||||
example: 10,
|
||||
}),
|
||||
offset: z.number().int().openapi({
|
||||
description: "The offset for the current page.",
|
||||
example: 0,
|
||||
}),
|
||||
page: z.number().int().openapi({
|
||||
description: "The current page number.",
|
||||
example: 1,
|
||||
}),
|
||||
payload: paginationCursorSchema.nullable().openapi({
|
||||
description: "The decoded payload from the pagination cursor.",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Internal pagination state.",
|
||||
});
|
||||
export const paginationStateSchema = z.object({
|
||||
cursor: z.string().nullable(),
|
||||
limit: z.number().int(),
|
||||
offset: z.number().int(),
|
||||
page: z.number().int(),
|
||||
payload: paginationCursorSchema.nullable(),
|
||||
});
|
||||
|
||||
export const paginationMetaSchema = z
|
||||
.object({
|
||||
current: z.number().int().openapi({
|
||||
description: "The current page number or offset.",
|
||||
example: 1,
|
||||
}),
|
||||
cursor: z.string().nullable().openapi({
|
||||
description: "The current pagination cursor.",
|
||||
example:
|
||||
"eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==",
|
||||
}),
|
||||
hasNext: z.boolean().openapi({
|
||||
description: "Indicates if there is a next page available.",
|
||||
example: true,
|
||||
}),
|
||||
limit: z.number().int().openapi({
|
||||
description: "The number of items per page.",
|
||||
example: 10,
|
||||
}),
|
||||
nextCursor: z.string().nullable().openapi({
|
||||
description: "The next pagination cursor, if available.",
|
||||
example:
|
||||
"eyJkYXRlIjoiMjAyMy0wMS0yMCIsImlkIjoiZDRmNWU2ZTAtNzY4Ny00Y2E3LTg5ZTItYjY0ZGI3Y2E3ZGIifQ==",
|
||||
}),
|
||||
})
|
||||
.openapi({
|
||||
description: "Pagination metadata.",
|
||||
});
|
||||
export const paginationMetaSchema = z.object({
|
||||
current: z.number().int(),
|
||||
cursor: z.string().nullable(),
|
||||
hasNext: z.boolean(),
|
||||
limit: z.number().int(),
|
||||
nextCursor: z.string().nullable(),
|
||||
});
|
||||
|
||||
// types
|
||||
export type PaginatedResult<T> = {
|
||||
|
||||
@@ -1,37 +1,17 @@
|
||||
import { z } from "@hono/zod-openapi";
|
||||
import z from "zod";
|
||||
|
||||
import {
|
||||
credibilitySchema,
|
||||
idSchema,
|
||||
limitSchema,
|
||||
publicationsSchema,
|
||||
} from "#domain/models/shared";
|
||||
import { credibilitySchema, idSchema, limitSchema, publicationsSchema } from "./shared";
|
||||
|
||||
// schemas
|
||||
export const sourceSchema = z.object({
|
||||
articles: z.number().int().min(0).optional().openapi({
|
||||
description: "The total number of articles from this source.",
|
||||
example: 1250,
|
||||
}),
|
||||
articles: z.number().int().min(0).optional(),
|
||||
credibility: credibilitySchema.optional(),
|
||||
description: z.string().max(1024).optional().openapi({
|
||||
description: "A brief description of the source.",
|
||||
example: "Radio Okapi is a Congolese radio station that provides news and information.",
|
||||
}),
|
||||
displayName: z.string().min(1).max(255).optional().openapi({
|
||||
description: "The display name of the source.",
|
||||
example: "Radio Okapi",
|
||||
}),
|
||||
description: z.string().max(1024).optional(),
|
||||
displayName: z.string().min(1).max(255).optional(),
|
||||
id: idSchema,
|
||||
name: z.string().min(1).max(255).openapi({
|
||||
description: "The name of the source.",
|
||||
example: "radiookapi.com",
|
||||
}),
|
||||
name: z.string().min(1).max(255),
|
||||
publications: publicationsSchema.optional(),
|
||||
url: z.url().max(255).openapi({
|
||||
description: "The URL of the source.",
|
||||
example: "https://techcrunch.com",
|
||||
}),
|
||||
url: z.url().max(255),
|
||||
});
|
||||
|
||||
export const createSourceSchema = sourceSchema.pick({
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
BASANGO_ENCRYPTION_KEY=testkey
|
||||
@@ -1,18 +1,10 @@
|
||||
import crypto from "node:crypto";
|
||||
|
||||
import {
|
||||
DEFAULT_AUTH_TAG_LENGTH,
|
||||
DEFAULT_BCRYPT_SALT_ROUNDS,
|
||||
DEFAULT_ENCRYPTION_ALGORITHM,
|
||||
DEFAULT_IV_LENGTH,
|
||||
} from "@basango/domain/constants";
|
||||
import { createEnvAccessor } from "@devscast/config";
|
||||
import { config } from "@basango/domain/config";
|
||||
import * as bcrypt from "bcrypt";
|
||||
|
||||
export const env = createEnvAccessor(["BASANGO_ENCRYPTION_KEY"] as const);
|
||||
|
||||
function getKey(): Buffer {
|
||||
const key = env("BASANGO_ENCRYPTION_KEY");
|
||||
const key = config.encryption.key;
|
||||
|
||||
if (Buffer.from(key, "hex").length !== 32) {
|
||||
throw new Error("BASANGO_ENCRYPTION_KEY must be a 64-character hex string (32 bytes).");
|
||||
@@ -20,6 +12,12 @@ function getKey(): Buffer {
|
||||
return Buffer.from(key, "hex");
|
||||
}
|
||||
|
||||
const getEncryptionSettings = () => ({
|
||||
algorithm: config.encryption.algorithm as crypto.CipherGCMTypes,
|
||||
authTagLength: config.encryption.authTagLength,
|
||||
ivLength: config.encryption.ivLength,
|
||||
});
|
||||
|
||||
/**
|
||||
* Encrypts a plaintext string using AES-256-GCM.
|
||||
* @param text The plaintext string to encrypt.
|
||||
@@ -27,8 +25,9 @@ function getKey(): Buffer {
|
||||
*/
|
||||
export function encrypt(text: string): string {
|
||||
const key = getKey();
|
||||
const iv = crypto.randomBytes(DEFAULT_IV_LENGTH);
|
||||
const cipher = crypto.createCipheriv(DEFAULT_ENCRYPTION_ALGORITHM, key, iv);
|
||||
const { algorithm, ivLength } = getEncryptionSettings();
|
||||
const iv = crypto.randomBytes(ivLength);
|
||||
const cipher = crypto.createCipheriv(algorithm, key, iv);
|
||||
|
||||
let encrypted = cipher.update(text, "utf8", "hex");
|
||||
encrypted += cipher.final("hex");
|
||||
@@ -50,17 +49,15 @@ export function encrypt(text: string): string {
|
||||
*/
|
||||
export function decrypt(encryptedPayload: string): string {
|
||||
const key = getKey();
|
||||
const { algorithm, authTagLength, ivLength } = getEncryptionSettings();
|
||||
const dataBuffer = Buffer.from(encryptedPayload, "base64");
|
||||
|
||||
// Extract IV, auth tag, and encrypted data
|
||||
const iv = dataBuffer.subarray(0, DEFAULT_IV_LENGTH);
|
||||
const authTag = dataBuffer.subarray(
|
||||
DEFAULT_IV_LENGTH,
|
||||
DEFAULT_IV_LENGTH + DEFAULT_AUTH_TAG_LENGTH,
|
||||
);
|
||||
const encryptedText = dataBuffer.subarray(DEFAULT_IV_LENGTH + DEFAULT_AUTH_TAG_LENGTH);
|
||||
const iv = dataBuffer.subarray(0, ivLength);
|
||||
const authTag = dataBuffer.subarray(ivLength, ivLength + authTagLength);
|
||||
const encryptedText = dataBuffer.subarray(ivLength + authTagLength);
|
||||
|
||||
const decipher = crypto.createDecipheriv(DEFAULT_ENCRYPTION_ALGORITHM, key, iv);
|
||||
const decipher = crypto.createDecipheriv(algorithm, key, iv);
|
||||
decipher.setAuthTag(authTag);
|
||||
|
||||
let decrypted = decipher.update(encryptedText.toString("hex"), "hex", "utf8");
|
||||
@@ -82,7 +79,8 @@ export function generateRandomBytes(size: number): string {
|
||||
}
|
||||
|
||||
export async function hashPassword(password: string): Promise<string> {
|
||||
return bcrypt.hash(password, DEFAULT_BCRYPT_SALT_ROUNDS);
|
||||
const rounds = config.encryption.bcryptSaltRounds;
|
||||
return bcrypt.hash(password, rounds);
|
||||
}
|
||||
|
||||
export async function verifyPassword(password: string, hashed: string): Promise<boolean> {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"@devscast/config": "catalog:",
|
||||
"@basango/domain": "workspace:*",
|
||||
"pino": "^10.1.0",
|
||||
"pino-pretty": "^13.1.2"
|
||||
},
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
import { createEnvAccessor } from "@devscast/config";
|
||||
import { config } from "@basango/domain/config";
|
||||
import pino from "pino";
|
||||
|
||||
const env = createEnvAccessor(["LOG_LEVEL", "NODE_ENV"] as const);
|
||||
|
||||
export const logger = pino({
|
||||
level: env("LOG_LEVEL", { default: "info" }),
|
||||
// Use pretty printing in development, structured JSON in production
|
||||
...(env("NODE_ENV") !== "production" && {
|
||||
level: config.logger.level,
|
||||
...(process.env.NODE_ENV !== "production" && {
|
||||
transport: {
|
||||
options: {
|
||||
colorize: true,
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"paths": {
|
||||
"#domain/*": ["../domain/src/*"],
|
||||
"#logger/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"exclude": ["node_modules"],
|
||||
"extends": "@basango/tsconfig/base.json",
|
||||
"include": ["src"]
|
||||
|
||||
Reference in New Issue
Block a user