feat(domain): centralize data definition

This commit is contained in:
2025-11-17 00:04:27 +02:00
parent e7585aa76c
commit f39635e04f
96 changed files with 3474 additions and 1167 deletions
+25
View File
@@ -0,0 +1,25 @@
// Domain-specific constants and types
export const BIAS = ["neutral", "slightly", "partisan", "extreme"] as const;
export const RELIABILITY = ["trusted", "reliable", "average", "low_trust", "unreliable"] as const;
export const TRANSPARENCY = ["high", "medium", "low"] as const;
export const SENTIMENT = ["positive", "neutral", "negative"] as const;
// Crawler-related constants and types
export const UPDATE_DIRECTIONS = ["forward", "backward"] as const;
export const SOURCE_KINDS = ["wordpress", "html"] as const;
export const DEFAULT_DATE_FORMAT = "yyyy-LL-dd";
export const DEFAULT_DATETIME_FORMAT = "yyyy-LL-ddTHH:mmZ";
export const DEFAULT_USER_AGENT = "Basango/0.1 (+https://github.com/bernard-ng/basango)";
export const DEFAULT_OPEN_GRAPH_USER_AGENT = "facebookexternalhit/1.1";
export const DEFAULT_TRANSIENT_HTTP_STATUSES = [429, 500, 502, 503, 504];
export const DEFAULT_RETRY_AFTER_HEADER = "retry-after";
export const DEFAULT_PAGINATION_LIMIT = 12;
export const DEFAULT_PAGINATION_PAGE = 1;
export const DEFAULT_PAGINATION_MAX_LIMIT = 100;
export const DEFAULT_SOURCE_IMAGE = "https://devscast.org/images/sources/";
export const DEFAULT_PUBLICATION_GRAPH_DAYS = 30;
export const DEFAULT_CATEGORY_SHARES_LIMIT = 10;
export const DEFAULT_TIMEZONE = "Africa/Lubumbashi";
+47
View File
@@ -0,0 +1,47 @@
import { z } from "zod";
import { SOURCE_KINDS } from "#domain/constants";
// schemas
export const SourceKindSchema = z.enum(SOURCE_KINDS);
export const SourceDateSchema = z.object({
format: z.string().default("yyyy-LL-dd HH:mm"),
});
const SourceConfigSchema = z.object({
categories: z.array(z.string()).default([]),
requiresDetails: z.boolean().default(false),
requiresRateLimit: z.boolean().default(false),
sourceDate: SourceDateSchema,
sourceId: z.string(),
sourceKind: SourceKindSchema,
sourceUrl: z.url(),
supportsCategories: z.boolean().default(false),
});
export const HtmlSourceConfigSchema = SourceConfigSchema.extend({
paginationTemplate: z.string(),
sourceKind: z.literal("html"),
sourceSelectors: z.object({
articleBody: z.string(),
articleCategories: z.string().optional(),
articleDate: z.string(),
articleLink: z.string(),
articles: z.string(),
articleTitle: z.string(),
pagination: z.string().default("ul.pagination > li a"),
}),
});
export const WordPressSourceConfigSchema = SourceConfigSchema.extend({
sourceDate: SourceDateSchema.default(SourceDateSchema.parse({ format: "yyyy-LL-dd'T'HH:mm:ss" })),
sourceKind: z.literal("wordpress"),
});
// types
export type SourceKind = z.infer<typeof SourceKindSchema>;
export type SourceDate = z.infer<typeof SourceDateSchema>;
export type HtmlSourceConfig = z.infer<typeof HtmlSourceConfigSchema>;
export type WordPressSourceConfig = z.infer<typeof WordPressSourceConfigSchema>;
export type AnySourceConfig = HtmlSourceConfig | WordPressSourceConfig;
+2
View File
@@ -0,0 +1,2 @@
export * from "./config";
export * from "./schemas";
+66
View File
@@ -0,0 +1,66 @@
import { z } from "zod";
import { UPDATE_DIRECTIONS } from "#domain/constants";
// schemas
export const UpdateDirectionSchema = z.enum(UPDATE_DIRECTIONS);
export const TimestampRangeSchema = z
.object({
end: z.number().int(),
start: z.number().int(),
})
.superRefine((value, ctx) => {
if (value.start === 0 || value.end === 0) {
ctx.addIssue({
code: "custom",
message: "Timestamp cannot be zero",
});
}
if (value.end < value.start) {
ctx.addIssue({
code: "custom",
message: "End timestamp must be greater than or equal to start",
});
}
});
export const PageRangeSchema = z
.object({
end: z.number().int().min(0),
start: z.number().int().min(0),
})
.superRefine((value, ctx) => {
if (value.end < value.start) {
ctx.addIssue({
code: "custom",
message: "End page must be greater than or equal to start page",
});
}
});
export const PageSpecSchema = z
.string()
.regex(/^[0-9]+:[0-9]+$/, "Invalid page range format. Use start:end")
.transform((spec) => {
const [startText, endText] = spec.split(":");
return {
end: Number.parseInt(String(endText), 10),
start: Number.parseInt(String(startText), 10),
};
});
export const DateSpecSchema = z
.string()
.regex(/.+:.+/, "Expected start:end format")
.transform((spec) => {
const [startRaw, endRaw] = spec.split(":");
return { endRaw: String(endRaw), startRaw: String(startRaw) };
});
// types
export type UpdateDirection = z.infer<typeof UpdateDirectionSchema>;
export type TimestampRange = z.infer<typeof TimestampRangeSchema>;
export type PageSpec = z.infer<typeof PageSpecSchema>;
export type DateSpec = z.infer<typeof DateSpecSchema>;
export type PageRange = z.infer<typeof PageRangeSchema>;
+175
View File
@@ -0,0 +1,175 @@
import { z } from "@hono/zod-openapi";
import { idSchema, sentimentSchema } from "#domain/models/shared";
// schemas
export const articleMetadataSchema = z.object({
author: z.string().optional().openapi({
description: "The author of the article.",
example: "John Doe",
}),
description: z.string().optional().openapi({
description: "A brief description or summary of the article.",
example: "This article discusses the latest advancements in AI technology.",
}),
image: z.url().optional().openapi({
description: "The URL of the main image associated with the article.",
example: "https://example.com/image.jpg",
}),
publishedAt: z.date().optional().openapi({
description: "The publication date of the article as a Date object.",
example: "2023-01-01T00:00:00Z",
}),
title: z.string().optional().openapi({
description: "The title of the article for metadata purposes.",
example: "The Rise of AI",
}),
updatedAt: z.date().optional().openapi({
description: "The last updated date of the article as a Date object.",
example: "2023-01-02T12:00:00Z",
}),
url: z.url().optional().openapi({
description: "The canonical URL of the article.",
example: "https://example.com/article",
}),
});
export const tokenStatisticsSchema = z.object({
body: z.number().optional().default(0).openapi({
description: "The number of tokens in the article body.",
example: 250,
}),
categories: z.number().optional().default(0).openapi({
description: "The number of tokens in the article categories.",
example: 3,
}),
excerpt: z.number().optional().default(0).openapi({
description: "The number of tokens in the article excerpt.",
example: 50,
}),
title: z.number().optional().default(0).openapi({
description: "The number of tokens in the article title.",
example: 10,
}),
total: z.number().optional().default(0).openapi({
description: "The total number of tokens in the article.",
example: 313,
}),
});
export const articleSchema = z.object({
body: z.string().min(1).openapi({
description: "The main content of the article.",
example: "This is the body of the article...",
}),
categories: z.array(z.string()).openapi({
description: "The categories or tags associated with the article.",
example: ["Technology", "AI"],
}),
createdAt: z.date().openapi({
description: "The date and time when the article was created in the system.",
example: "2023-01-01T12:00:00Z",
}),
hash: z.string().min(1).openapi({
description: "The unique hash of the article link.",
example: "d41d8cd98f00b204e9800998ecf8427e",
}),
id: idSchema,
link: z.string().url().openapi({
description: "The URL of the article.",
example: "https://example.com/article",
}),
metadata: articleMetadataSchema.optional(),
publishedAt: z.date().openapi({
description: "The publication date of the article as a Date object.",
example: "2023-01-01T00:00:00Z",
}),
sourceId: z.union([z.uuid(), z.string().min(1)]).openapi({
description: "The unique identifier of the source from which the article was crawled.",
example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g",
}),
title: z.string().min(1).openapi({
description: "The title of the article.",
example: "The Rise of AI",
}),
tokenStatistics: tokenStatisticsSchema.optional(),
updatedAt: z.date().optional().openapi({
description: "The date and time when the article was last updated in the system.",
example: "2023-01-02T12:00:00Z",
}),
});
// API
export const createArticleSchema = z
.object({
body: z.string().min(1).openapi({
description: "The main content of the article.",
example: "This is the body of the article...",
}),
categories: z
.array(z.string())
.openapi({
description: "The categories or tags associated with the article.",
example: ["Technology", "AI"],
})
.optional()
.default([]),
hash: z.string().min(1).openapi({
description: "The unique hash of the article link.",
example: "d41d8cd98f00b204e9800998ecf8427e",
}),
link: z.string().url().openapi({
description: "The URL of the article.",
example: "https://example.com/article",
}),
metadata: articleMetadataSchema.optional(),
publishedAt: z
.string()
.refine((value) => !Number.isNaN(Date.parse(value)), {
message: "Invalid date format",
})
.transform((value) => new Date(value))
.openapi({
description: "The publication date of the article in ISO 8601 format.",
example: "2023-01-01T00:00:00Z",
}),
sourceId: z.string().openapi({
description: "The unique identifier of the source from which the article was crawled.",
example: "radiookapi.net",
}),
title: z.string().min(1).openapi({
description: "The title of the article.",
example: "The Rise of AI",
}),
})
.openapi("CreateArticle");
export const createArticleResponseSchema = z
.object({ id: idSchema, sourceId: idSchema })
.openapi("CreateArticleResponse");
export const getArticlesSchema = z.object({
category: z.string().min(1).max(255).optional().openapi({
description: "Filter articles by a specific category.",
example: "Technology",
}),
cursor: z.string().nullable().optional().openapi({
description: "Optional cursor for fetching the next page of articles.",
}),
limit: z.number().int().min(1).max(100).optional().openapi({
default: 10,
description: "Maximum number of articles to return per page.",
example: 20,
}),
search: z.string().max(512).optional().openapi({
description: "Full-text search query applied to article titles and bodies.",
example: "gouvernement congolais",
}),
sentiment: sentimentSchema.optional(),
sourceId: idSchema.optional(),
});
// types
export type Article = z.infer<typeof articleSchema>;
export type ArticleMetadata = z.infer<typeof articleMetadataSchema>;
export type TokenStatistics = z.infer<typeof tokenStatisticsSchema>;
+4
View File
@@ -0,0 +1,4 @@
export * from "./articles";
export * from "./shared";
export * from "./sources";
export * from "./users";
+339
View File
@@ -0,0 +1,339 @@
import { z } from "@hono/zod-openapi";
import { BIAS, RELIABILITY, SENTIMENT, TRANSPARENCY } from "#domain/constants";
// schemas
export const idSchema = z.uuid().openapi({
description: "The unique identifier of the resource.",
example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g",
});
export const dateRangeSchema = z
.object({
end: z.date().openapi({
description: "The end date of the range.",
example: "2023-01-30T23:59:59Z",
}),
start: z.date().openapi({
description: "The start date of the range.",
example: "2023-01-01T00:00:00Z",
}),
})
.openapi({
description: "Inclusive date range for publication metrics.",
});
export const limitSchema = z.number().int().min(1).max(100).openapi({
default: 10,
description: "The maximum number of items to return.",
example: 10,
});
export const sentimentSchema = z.enum(SENTIMENT).openapi({
description: "Sentiment detected for the article.",
example: "positive",
});
export const biasSchema = z.enum(BIAS).openapi({
description: "The bias level of the source.",
example: "neutral",
});
export const reliabilitySchema = z.enum(RELIABILITY).openapi({
description: "The reliability level of the source.",
example: "trusted",
});
export const transparencySchema = z.enum(TRANSPARENCY).openapi({
description: "The transparency level of the source.",
example: "high",
});
export const credibilitySchema = z
.object({
bias: biasSchema.default("neutral"),
reliability: reliabilitySchema.default("average"),
transparency: transparencySchema.default("medium"),
})
.openapi({
description: "Credibility information about the resource.",
});
export const deviceSchema = z
.object({
client: z.string().optional().openapi({
description: "The client software of the device.",
example: "Chrome 90",
}),
device: z.string().optional().openapi({
description: "The device model.",
example: "Dell XPS 13",
}),
isBot: z.boolean().openapi({
description: "Indicates if the device is a bot.",
example: false,
}),
operatingSystem: z.string().optional().openapi({
description: "The operating system of the device.",
example: "Windows 10",
}),
})
.openapi({
description: "Information about the user's device.",
});
export const geoLocationSchema = z
.object({
accuracyRadius: z.number().optional().openapi({
description: "The accuracy radius in kilometers.",
example: 50,
}),
city: z.string().optional().openapi({
description: "The city of the user.",
example: "San Francisco",
}),
country: z.string().optional().openapi({
description: "The country of the user.",
example: "United States",
}),
latitude: z.number().optional().openapi({
description: "The latitude of the user's location.",
example: 37.7749,
}),
longitude: z.number().optional().openapi({
description: "The longitude of the user's location.",
example: -122.4194,
}),
timeZone: z.string().optional().openapi({
description: "The time zone of the user.",
example: "America/Los_Angeles",
}),
})
.openapi({
description: "Geolocation information about the user.",
});
export const distrubtionSchema = z
.object({
count: z.number().int().openapi({
description: "The count of items in the distribution.",
example: 42,
}),
id: idSchema,
name: z.string().openapi({
description: "The name of the distribution.",
example: "Technology",
}),
percentage: z.number().openapi({
description: "The percentage of items in the distribution.",
example: 12.5,
}),
})
.openapi({
description: "Distribution information.",
});
export const getDistributionsSchema = z.object({
id: idSchema.optional(),
limit: limitSchema.optional(),
});
export const getPublicationsSchema = z.object({
id: idSchema.optional(),
range: dateRangeSchema.optional(),
});
export const distributionsSchema = z
.object({
items: z.array(distrubtionSchema).openapi({
description: "List of distributions.",
}),
total: z.number().int().openapi({
description: "Total number of distributions.",
example: 100,
}),
})
.openapi({
description: "Distributions data.",
});
export const publicationSchema = z
.object({
count: z.number().int().openapi({
description: "The number of articles published on that date.",
example: 42,
}),
date: z.string().openapi({
description: "The date of the publication.",
example: "2023-01-15",
}),
})
.openapi({
description: "Publication metrics for a specific date.",
});
export const deltaSchema = z
.object({
delta: z.number().openapi({
description: "The absolute change in value.",
example: 10,
}),
percentage: z.number().openapi({
description: "The percentage change in value.",
example: 25.0,
}),
sign: z.enum(["+", "-"]).openapi({
description: "The sign of the change.",
example: "+",
}),
variant: z.enum(["increase", "decrease", "positive"]).openapi({
description: "The variant of the change.",
example: "increase",
}),
})
.openapi({
description: "Delta information representing change over time.",
});
export const publicationMetaSchema = z
.object({
current: z.number().openapi({
description: "The current total value.",
example: 150,
}),
delta: deltaSchema,
previous: z.number().openapi({
description: "The previous total value.",
example: 120,
}),
})
.openapi({
description: "Metadata for publication metrics.",
});
export const publicationsSchema = z
.object({
items: z.array(publicationSchema).openapi({
description: "List of publication metrics for the source.",
}),
meta: publicationMetaSchema.optional(),
})
.openapi({
description: "Publication metrics for the source.",
});
export const paginationCursorSchema = z
.object({
date: z.string().openapi({
description: "The date associated with the last item in the current page.",
example: "2023-01-15",
}),
id: z.string().openapi({
description: "The unique identifier of the last item in the current page.",
example: "b3e1c8f4-5d6a-4c9e-8f1e-2d3c4b5a6f7g",
}),
})
.openapi({
description: "Cursor information for pagination.",
});
export const paginationRequestSchema = z
.object({
cursor: z.string().nullable().optional().openapi({
description: "The pagination cursor for cursor-based pagination.",
example:
"eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==",
}),
limit: limitSchema.optional(),
page: z.number().int().min(1).optional().openapi({
description: "The page number to retrieve.",
example: 1,
}),
})
.openapi({
description: "Pagination request parameters.",
});
export const paginationStateSchema = z
.object({
cursor: z.string().nullable().openapi({
description: "The current pagination cursor.",
example:
"eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==",
}),
limit: z.number().int().openapi({
description: "The number of items per page.",
example: 10,
}),
offset: z.number().int().openapi({
description: "The offset for the current page.",
example: 0,
}),
page: z.number().int().openapi({
description: "The current page number.",
example: 1,
}),
payload: paginationCursorSchema.nullable().openapi({
description: "The decoded payload from the pagination cursor.",
}),
})
.openapi({
description: "Internal pagination state.",
});
export const paginationMetaSchema = z
.object({
current: z.number().int().openapi({
description: "The current page number or offset.",
example: 1,
}),
cursor: z.string().nullable().openapi({
description: "The current pagination cursor.",
example:
"eyJkYXRlIjoiMjAyMy0wMS0xNSIsImlkIjoiYjNlMWM4ZjQtNWQ2YS00YzllLThmMWUtMmQzYzRiNWE2ZjdifQ==",
}),
hasNext: z.boolean().openapi({
description: "Indicates if there is a next page available.",
example: true,
}),
limit: z.number().int().openapi({
description: "The number of items per page.",
example: 10,
}),
nextCursor: z.string().nullable().openapi({
description: "The next pagination cursor, if available.",
example:
"eyJkYXRlIjoiMjAyMy0wMS0yMCIsImlkIjoiZDRmNWU2ZTAtNzY4Ny00Y2E3LTg5ZTItYjY0ZGI3Y2E3ZGIifQ==",
}),
})
.openapi({
description: "Pagination metadata.",
});
// types
export type PaginatedResult<T> = {
items: T[];
meta: PaginationMeta;
};
export type ID = z.infer<typeof idSchema>;
export type DateRange = z.infer<typeof dateRangeSchema>;
export type Sentiment = z.infer<typeof sentimentSchema>;
export type Bias = z.infer<typeof biasSchema>;
export type Reliability = z.infer<typeof reliabilitySchema>;
export type Transparency = z.infer<typeof transparencySchema>;
export type Credibility = z.infer<typeof credibilitySchema>;
export type Device = z.infer<typeof deviceSchema>;
export type GeoLocation = z.infer<typeof geoLocationSchema>;
export type Distribution = z.infer<typeof distrubtionSchema>;
export type Distributions = z.infer<typeof distributionsSchema>;
export type Publication = z.infer<typeof publicationSchema>;
export type Publications = z.infer<typeof publicationsSchema>;
export type PublicationMeta = z.infer<typeof publicationMetaSchema>;
export type Delta = z.infer<typeof deltaSchema>;
export type PaginationCursor = z.infer<typeof paginationCursorSchema>;
export type PaginationRequest = z.infer<typeof paginationRequestSchema>;
export type PaginationState = z.infer<typeof paginationStateSchema>;
export type PaginationMeta = z.infer<typeof paginationMetaSchema>;
+63
View File
@@ -0,0 +1,63 @@
import { z } from "@hono/zod-openapi";
import {
credibilitySchema,
idSchema,
limitSchema,
publicationsSchema,
} from "#domain/models/shared";
// schemas
export const sourceSchema = z.object({
articles: z.number().int().min(0).optional().openapi({
description: "The total number of articles from this source.",
example: 1250,
}),
credibility: credibilitySchema.optional(),
description: z.string().max(1024).optional().openapi({
description: "A brief description of the source.",
example: "Radio Okapi is a Congolese radio station that provides news and information.",
}),
displayName: z.string().min(1).max(255).optional().openapi({
description: "The display name of the source.",
example: "Radio Okapi",
}),
id: idSchema,
name: z.string().min(1).max(255).openapi({
description: "The name of the source.",
example: "radiookapi.com",
}),
publications: publicationsSchema.optional(),
url: z.url().max(255).openapi({
description: "The URL of the source.",
example: "https://techcrunch.com",
}),
});
export const createSourceSchema = sourceSchema.pick({
description: true,
displayName: true,
name: true,
url: true,
});
export const getSourceSchema = z.object({
id: idSchema,
});
export const getCategorySharesSchema = z.object({
id: idSchema,
limit: limitSchema.optional(),
});
export const updateSourceSchema = sourceSchema.pick({
credibility: true,
description: true,
displayName: true,
id: true,
name: true,
url: true,
});
// types
export type Source = z.infer<typeof sourceSchema>;
+2
View File
@@ -0,0 +1,2 @@
export type Role = "ROLE_USER" | "ROLE_ADMIN";
export type Roles = Role[];