feat(domain): centralize data definition

This commit is contained in:
2025-11-17 00:04:27 +02:00
parent e7585aa76c
commit f39635e04f
96 changed files with 3474 additions and 1167 deletions
+79
View File
@@ -0,0 +1,79 @@
#! /usr/bin/env bun
import fs from "node:fs";
import path from "node:path";
import { createInterface } from "node:readline";
import { parseArgs } from "node:util";
import type { Article } from "@basango/domain/models";
import { logger } from "@basango/logger";
import { config } from "#crawler/config";
import { forward } from "#crawler/process/persistence";
const USAGE = `
Usage: bun run crawler:sync -- --sourceId <id>
`;
const parseCliArgs = (): { sourceId?: string } => {
const { values } = parseArgs({
options: {
sourceId: { type: "string" },
},
});
return values as { sourceId?: string };
};
const main = async (): Promise<void> => {
const { sourceId } = parseCliArgs();
if (!sourceId) {
console.log(USAGE);
process.exitCode = 1;
return;
}
const filePath = path.join(config.paths.data, `${sourceId}.jsonl`);
if (!fs.existsSync(filePath)) {
logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL not found");
process.exitCode = 1;
return;
}
const stat = fs.statSync(filePath);
if (stat.size === 0) {
logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL is empty");
process.exitCode = 1;
return;
}
logger.info({ filePath, sourceId }, "Syncing articles from JSONL to backend");
const stream = fs.createReadStream(filePath, { encoding: "utf-8" });
const rl = createInterface({ crlfDelay: Infinity, input: stream });
let count = 0;
try {
for await (const raw of rl) {
const line = raw.trim();
if (!line) continue;
try {
const article = JSON.parse(line) as Article & { publishedAt: string };
await forward({
...article,
publishedAt: new Date(article.publishedAt),
});
count += 1;
} catch (error) {
logger.error({ error, linePreview: line.slice(0, 100) }, "Invalid JSONL line");
}
}
} finally {
rl.close();
}
logger.info({ forwarded: count, sourceId }, "Sync completed");
};
void main();