feat(crawler): compute source updates dates
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import { DEFAULT_CATEGORY_SHARES_LIMIT, DEFAULT_TIMEZONE } from "@basango/domain/constants";
|
||||
import { ID, Publication, Publications } from "@basango/domain/models";
|
||||
import { eq, sql } from "drizzle-orm";
|
||||
import { eq, max, min, sql } from "drizzle-orm";
|
||||
import * as uuid from "uuid";
|
||||
|
||||
import { Database } from "#db/client";
|
||||
@@ -161,3 +161,27 @@ export async function getSourceCategoryShares(
|
||||
|
||||
return { items: data.rows, total: data.rowCount ?? 0 };
|
||||
}
|
||||
|
||||
export async function getLatestPublished(db: Database, source: string): Promise<Date> {
|
||||
const result = await db
|
||||
.select({
|
||||
publishedAt: max(articles.publishedAt),
|
||||
})
|
||||
.from(articles)
|
||||
.innerJoin(sources, eq(articles.sourceId, sources.id))
|
||||
.where(eq(sources.name, source));
|
||||
|
||||
return result[0]?.publishedAt ?? new Date();
|
||||
}
|
||||
|
||||
export async function getEarliestPublished(db: Database, source: string): Promise<Date> {
|
||||
const result = await db
|
||||
.select({
|
||||
publishedAt: min(articles.publishedAt),
|
||||
})
|
||||
.from(articles)
|
||||
.innerJoin(sources, eq(articles.sourceId, sources.id))
|
||||
.where(eq(sources.name, source));
|
||||
|
||||
return result[0]?.publishedAt ?? new Date();
|
||||
}
|
||||
|
||||
@@ -8,9 +8,9 @@ export const articleMetadataSchema = z.object({
|
||||
author: z.string().optional(),
|
||||
description: z.string().optional(),
|
||||
image: z.url().optional(),
|
||||
publishedAt: z.date().optional(),
|
||||
publishedAt: z.string().optional(),
|
||||
title: z.string().optional(),
|
||||
updatedAt: z.date().optional(),
|
||||
updatedAt: z.string().optional(),
|
||||
url: z.url().optional(),
|
||||
});
|
||||
|
||||
@@ -25,7 +25,7 @@ export const tokenStatisticsSchema = z.object({
|
||||
export const articleSchema = z.object({
|
||||
body: z.string().min(1),
|
||||
categories: z.array(z.string()),
|
||||
createdAt: z.date(),
|
||||
createdAt: z.coerce.date(),
|
||||
excerpt: z.string().optional(),
|
||||
hash: z.string().min(1),
|
||||
id: idSchema,
|
||||
@@ -38,7 +38,7 @@ export const articleSchema = z.object({
|
||||
sourceId: z.union([z.uuid(), z.string().min(1)]),
|
||||
title: z.string().min(1),
|
||||
tokenStatistics: tokenStatisticsSchema.optional(),
|
||||
updatedAt: z.date().optional(),
|
||||
updatedAt: z.coerce.date().optional(),
|
||||
});
|
||||
|
||||
// API
|
||||
|
||||
@@ -39,5 +39,15 @@ export const updateSourceSchema = sourceSchema.pick({
|
||||
url: true,
|
||||
});
|
||||
|
||||
export const getSourceUpdateDatesSchema = z.object({
|
||||
name: z.string().min(1).max(255),
|
||||
});
|
||||
|
||||
export const getSourceUpdateDatesResponseSchema = z.object({
|
||||
earliest: z.coerce.date(),
|
||||
latest: z.coerce.date(),
|
||||
});
|
||||
|
||||
// types
|
||||
export type Source = z.infer<typeof sourceSchema>;
|
||||
export type SourceUpdateDates = z.infer<typeof getSourceUpdateDatesResponseSchema>;
|
||||
|
||||
Reference in New Issue
Block a user