feat(crawler): compute source updates dates

This commit is contained in:
2025-11-25 01:05:39 +02:00
parent 72dfa53f80
commit 1d062f679b
16 changed files with 186 additions and 85 deletions
+38
View File
@@ -13,9 +13,12 @@ import {
TimestampRange,
TimestampRangeSchema,
} from "@basango/domain/models";
import logger from "@basango/logger";
import { format, fromUnixTime, getUnixTime, isMatch, parse } from "date-fns";
import type { RedisOptions } from "ioredis";
import { getSourceUpdateDates } from "./process/persistence";
/**
* Resolve a source configuration by its ID.
* @param id - The source ID
@@ -32,6 +35,41 @@ export const resolveSourceConfig = (id: string): AnySourceOptions => {
return source;
};
export const resolveSourceUpdateDates = async (settings: {
dateRange?: TimestampRange;
direction: "forward" | "backward";
source?: AnySourceOptions;
}) => {
if (settings.dateRange === undefined && settings.source) {
const dates = await getSourceUpdateDates(settings.source.sourceId);
switch (settings.direction) {
case "backward":
settings.dateRange = {
end: getUnixTime(dates.earliest),
start: getUnixTime(new Date()),
};
logger.info(
{ dateRange: settings.dateRange, sourceId: settings.source.sourceId },
"Set date range start from earliest published date",
);
break;
case "forward":
if (dates.latest) {
settings.dateRange = {
end: getUnixTime(new Date()),
start: getUnixTime(dates.latest),
};
logger.info(
{ dateRange: settings.dateRange, sourceId: settings.source.sourceId },
"Set date range start from latest published date",
);
}
break;
}
}
};
/**
* Parse a Redis URL into RedisOptions.
* @param url - The Redis URL (e.g., "redis://:password@localhost:6379/0")