feat(crawler): compute source updates dates

This commit is contained in:
2025-11-25 01:05:39 +02:00
parent 72dfa53f80
commit 1d062f679b
16 changed files with 186 additions and 85 deletions
+25 -1
View File
@@ -1,6 +1,6 @@
import { DEFAULT_CATEGORY_SHARES_LIMIT, DEFAULT_TIMEZONE } from "@basango/domain/constants";
import { ID, Publication, Publications } from "@basango/domain/models";
import { eq, sql } from "drizzle-orm";
import { eq, max, min, sql } from "drizzle-orm";
import * as uuid from "uuid";
import { Database } from "#db/client";
@@ -161,3 +161,27 @@ export async function getSourceCategoryShares(
return { items: data.rows, total: data.rowCount ?? 0 };
}
export async function getLatestPublished(db: Database, source: string): Promise<Date> {
const result = await db
.select({
publishedAt: max(articles.publishedAt),
})
.from(articles)
.innerJoin(sources, eq(articles.sourceId, sources.id))
.where(eq(sources.name, source));
return result[0]?.publishedAt ?? new Date();
}
export async function getEarliestPublished(db: Database, source: string): Promise<Date> {
const result = await db
.select({
publishedAt: min(articles.publishedAt),
})
.from(articles)
.innerJoin(sources, eq(articles.sourceId, sources.id))
.where(eq(sources.name, source));
return result[0]?.publishedAt ?? new Date();
}
+4 -4
View File
@@ -8,9 +8,9 @@ export const articleMetadataSchema = z.object({
author: z.string().optional(),
description: z.string().optional(),
image: z.url().optional(),
publishedAt: z.date().optional(),
publishedAt: z.string().optional(),
title: z.string().optional(),
updatedAt: z.date().optional(),
updatedAt: z.string().optional(),
url: z.url().optional(),
});
@@ -25,7 +25,7 @@ export const tokenStatisticsSchema = z.object({
export const articleSchema = z.object({
body: z.string().min(1),
categories: z.array(z.string()),
createdAt: z.date(),
createdAt: z.coerce.date(),
excerpt: z.string().optional(),
hash: z.string().min(1),
id: idSchema,
@@ -38,7 +38,7 @@ export const articleSchema = z.object({
sourceId: z.union([z.uuid(), z.string().min(1)]),
title: z.string().min(1),
tokenStatistics: tokenStatisticsSchema.optional(),
updatedAt: z.date().optional(),
updatedAt: z.coerce.date().optional(),
});
// API
+10
View File
@@ -39,5 +39,15 @@ export const updateSourceSchema = sourceSchema.pick({
url: true,
});
export const getSourceUpdateDatesSchema = z.object({
name: z.string().min(1).max(255),
});
export const getSourceUpdateDatesResponseSchema = z.object({
earliest: z.coerce.date(),
latest: z.coerce.date(),
});
// types
export type Source = z.infer<typeof sourceSchema>;
export type SourceUpdateDates = z.infer<typeof getSourceUpdateDatesResponseSchema>;