feat(crawler): fix async crawling

This commit is contained in:
2025-11-09 01:01:07 +02:00
parent e8c0f0422b
commit 2b5482e9f5
58 changed files with 243 additions and 169 deletions
+1
View File
@@ -1,4 +1,5 @@
import { logger } from "@basango/logger";
import { runSyncCrawl } from "@/process/sync/tasks";
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
+1
View File
@@ -1,4 +1,5 @@
import { logger } from "@basango/logger";
import { scheduleAsyncCrawl } from "@/process/async/tasks";
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
+2 -2
View File
@@ -1,4 +1,5 @@
import { parseArgs } from "node:util";
import { CrawlingOptions } from "@/process/crawler";
interface WorkerCliOptions {
@@ -6,13 +7,12 @@ interface WorkerCliOptions {
}
export const CRAWLING_USAGE = `
Usage: bun run crawl:[async|sync] -- --sourceId <id> [options]
Usage: bun run crawler:[async|sync] -- --sourceId <id> [options]
Options:
--pageRange <range> Optional page range filter (e.g. 1:5)
--dateRange <range> Optional date range filter (e.g. 2024-01-01:2024-01-31)
--category <slug> Optional category to crawl
-h, --help Show this message
`;
export const parseWorkerCliArgs = (): WorkerCliOptions => {
+1 -3
View File
@@ -8,9 +8,7 @@ const main = async (): Promise<void> => {
const options = parseWorkerCliArgs();
const manager = createQueueManager();
const queues = options.queue?.length
? options.queue.map((name) => manager.queueName(name))
: undefined;
const queues = options.queue?.length ? options.queue : undefined;
const handle = startWorker({
queueManager: manager,