feat: articles clusters

This commit is contained in:
2025-12-03 15:54:38 +02:00
parent 1d062f679b
commit 78c27b8220
20 changed files with 2113 additions and 51 deletions
+1
View File
@@ -32,3 +32,4 @@ export const DEFAULT_TOKEN_AUDIENCE = "basango_dashboard";
export const DEFAULT_TOKEN_ISSUER = "basango_api";
export const DEFAULT_ACCESS_TOKEN_TTL = "35m";
export const DEFAULT_REFRESH_TOKEN_TTL = "7d";
export const DEFAULT_CATEGORY = "divers-autres";
+5 -1
View File
@@ -1,5 +1,6 @@
import z from "zod";
import { categorySchema } from "./categories";
import { idSchema, sentimentSchema } from "./shared";
import { sourceSchema } from "./sources";
@@ -24,7 +25,10 @@ export const tokenStatisticsSchema = z.object({
export const articleSchema = z.object({
body: z.string().min(1),
categories: z.array(z.string()),
categories: z.array(z.string()).optional().default([]),
category: categorySchema.optional(),
categoryId: idSchema.optional(),
clustered: z.boolean().default(false),
createdAt: z.coerce.date(),
excerpt: z.string().optional(),
hash: z.string().min(1),
+296
View File
@@ -0,0 +1,296 @@
import z from "zod";
import { idSchema } from "./shared";
export const categorySchema = z.object({
candidates: z.array(z.string()),
createdAt: z.coerce.date(),
description: z.string().max(512).optional(),
embeddings: z.array(z.number()).optional(),
id: idSchema,
name: z.string().min(1).max(255),
slug: z.string().min(1).max(255),
updatedAt: z.coerce.date().optional(),
weight: z.number().int(),
});
export type Category = z.infer<typeof categorySchema>;
export const Categories: Category[] = [
{
candidates: [
"accident",
"actualite",
"actualité",
"a-la-une",
"en bref",
"en-clair",
"encontinu",
"flash",
"faits-divers",
"drame",
"enquetes",
"desintox",
"archives",
"insolite",
"national",
"featured",
"related-featured",
"top-featured",
"top-trending",
"news-dont-miss",
"news-just-in",
"la-rdc-a-la-une",
"example-1",
"example-2",
"example-3",
"example-4",
"beto-premium",
"fiches",
"suspension",
],
createdAt: new Date(),
description: "Nouvelles de dernière minute, faits divers et informations générales.",
id: "06930299-71a3-735e-9dcd-055c321f2ca9",
name: "Actualités & Faits Divers",
slug: "actualites-faits-divers",
weight: 4,
},
{
candidates: [
"democratie",
"dialogue entre congolais",
"diplomatie",
"diplomatie-et-securite",
"election",
"élections",
"elections-2023",
"legislatives",
"presidentielle",
"parlement",
"politique",
"serment",
"si j'étais président",
"spécial elections",
"us-politics",
"ukraine-conflict",
"conférence des nations unies",
"nations unies",
"rebellion",
],
createdAt: new Date(),
description: "Élections, gouvernance, institutions, diplomatie et conflits politiques.",
id: "06930299-71a3-7aa5-95a4-a7b39c421255",
name: "Politique & Gouvernement",
slug: "politique-gouvernement",
weight: 10,
},
{
candidates: [
"agrobusiness",
"banking",
"banques-et-finances",
"economico",
"economie",
"économie",
"finances",
"industrie",
"investments",
"mines",
"pme-entrepreneuriat",
"featured-economy",
"featured-markets",
"intl-markets",
"us-business",
"la-une-eco",
"emploi",
],
createdAt: new Date(),
description: "Affaires, marchés financiers, entreprises, banques, emplois et entrepreneuriat.",
id: "06930299-71a3-7c5b-98b0-d58c8308496d",
name: "Économie & Finances",
slug: "economie-finances",
weight: 9,
},
{
candidates: [
"arts",
"culture",
"musique",
"livre",
"livres",
"patrimoine-traditions",
"identité culturelle",
"caricature",
"histoire",
],
createdAt: new Date(),
description: "Art, musique, patrimoine, histoire, littérature et expression culturelle.",
id: "06930299-71a3-7d47-8df2-b201975437f4",
name: "Culture & Arts",
slug: "culture-arts",
weight: 2,
},
{
candidates: ["sport", "sports", "football", "boxe", "can", "okapi sports"],
createdAt: new Date(),
description: "Compétitions sportives nationales et internationales, analyses et résultats.",
id: "06930299-71a3-7e65-9421-b418c8a161b7",
name: "Sports",
slug: "sports",
weight: 5,
},
{
candidates: [
"famille-genre",
"femme",
"jeunes",
"justice",
"criminalite",
"arrestation",
"kidnapping",
"viol",
"vol",
"manifestation",
"marche",
"salubrite",
"denonciation",
"evasion",
"sante",
"santé",
"necrologie",
"education",
"éducation",
"enseignement",
"religion",
"religion-spiritualite",
"message-des-voeux",
"style et beauté",
"societe",
"société",
],
createdAt: new Date(),
description: "Questions sociales, éducation, santé, justice, genre et vie quotidienne.",
id: "06930299-71a3-7f8b-b5a3-413f512ec6d8",
name: "Société & Vie Quotidienne",
slug: "societe-vie-quotidienne",
weight: 6,
},
{
candidates: [
"climat-et-environnement",
"developpement-durable",
"biodiversite",
"ecologico",
"environnement",
"nature",
"eau",
"electricite",
"energie",
"inondation",
"science & env.",
"sciences",
"technologie",
"technologie-innovation",
"mc geek !",
"sur le net",
],
createdAt: new Date(),
description:
"Recherche scientifique, innovation technologique, climat, environnement et énergie.",
id: "06930299-71a4-7096-8a7f-d69920882d95",
name: "Sciences, Technologies & Environnement",
slug: "sciences-technologies-environnement",
weight: 7,
},
{
candidates: [
"afrique",
"congo-brazzaville",
"congolais de l'étranger",
"diaspora",
"euro-zone",
"se-asia",
"middle-east",
"monde",
"world-news",
"grands-lacs",
"bandundu",
"bukavu",
"bunia",
"ituri",
"katanga",
"kinshasa",
"maniema",
"mbujimayi",
"provinces",
"info kin",
"tourisme",
"transport",
"route",
"infrastructures",
"ukraine-conflict",
],
createdAt: new Date(),
description: "Actualités internationales, régions du monde et provinces locales.",
id: "06930299-71a4-724a-8975-ea7d21286c22",
name: "International & Régions",
slug: "international-regions",
weight: 8,
},
{
candidates: [
"analyses",
"opinion",
"opinions",
"tribune",
"grand-angle",
"grande interview",
"le débat",
"lettre-ouverte",
"l'invité de la campagne",
"l'invité du jour",
"émissions",
"magazine",
"magazine un",
"medias",
"communication",
"communications",
"parole aux auditeurs",
"parole d'enfant",
"revue de presse",
"tele-medias",
"multimedia",
"tv",
],
createdAt: new Date(),
description: "Chroniques, analyses, tribunes, programmes et contenus médiatiques.",
id: "06930299-71a4-745b-8813-6bca9c6b3c56",
name: "Opinions & Médias",
slug: "opinions-medias",
weight: 3,
},
{
candidates: [
"beto-premium",
"example-1",
"example-2",
"example-3",
"example-4",
"fiches",
"publicite",
"okapi service",
"petro-chem-example-3",
"sans catégorie",
"uncategorized",
"lefonde",
"jdc",
],
createdAt: new Date(),
description: "Rubriques expérimentales, catégories indéterminées et éléments divers.",
id: "06930299-71a4-756a-948b-e4a244b5887e",
name: "Divers & Autres",
slug: "divers-autres",
weight: 1,
},
];
+1
View File
@@ -1,5 +1,6 @@
export * from "./articles";
export * from "./auth";
export * from "./categories";
export * from "./crawler";
export * from "./reports";
export * from "./shared";