fix(app): remove legacy and scoped namespace alias

This commit is contained in:
2025-11-14 11:56:34 +02:00
parent 085851527e
commit 4ec2a608b1
681 changed files with 655 additions and 36825 deletions
+34 -16
View File
@@ -1,19 +1,24 @@
import { logger } from "@basango/logger";
import { config, env } from "@/config";
import { UnsupportedSourceKindError } from "@/errors";
import { SyncHttpClient } from "@/http/http-client";
import { QueueManager, createQueueManager } from "@/process/async/queue";
import { config, env } from "#crawler/config";
import { UnsupportedSourceKindError } from "#crawler/errors";
import { SyncHttpClient } from "#crawler/http/http-client";
import { QueueManager, createQueueManager } from "#crawler/process/async/queue";
import {
DetailsTaskPayload,
ListingTaskPayload,
ProcessingTaskPayload,
} from "@/process/async/schemas";
import { createPersistors, resolveCrawlerConfig } from "@/process/crawler";
import { HtmlCrawler } from "@/process/parsers/html";
import { WordPressCrawler } from "@/process/parsers/wordpress";
import { Article, HtmlSourceConfig, SourceKindSchema, WordPressSourceConfig } from "@/schema";
import { createDateRange, formatDateRange, formatPageRange, resolveSourceConfig } from "@/utils";
} from "#crawler/process/async/schemas";
import { createPersistors, resolveCrawlerConfig } from "#crawler/process/crawler";
import { HtmlCrawler } from "#crawler/process/parsers/html";
import { WordPressCrawler } from "#crawler/process/parsers/wordpress";
import { Article, HtmlSourceConfig, WordPressSourceConfig } from "#crawler/schema";
import {
createDateRange,
formatDateRange,
formatPageRange,
resolveSourceConfig,
} from "#crawler/utils";
export const collectHtmlListing = async (
payload: ListingTaskPayload,
@@ -107,7 +112,7 @@ export const collectArticle = async (
});
const persistors = createPersistors(source);
if (source.sourceKind === SourceKindSchema.enum.html) {
if (source.sourceKind === "html") {
const crawler = new HtmlCrawler(settings, { persistors });
const html = await crawler.crawl(payload.url);
@@ -118,7 +123,7 @@ export const collectArticle = async (
} as ProcessingTaskPayload);
}
if (source.sourceKind === SourceKindSchema.enum.wordpress) {
if (source.sourceKind === "wordpress") {
const crawler = new WordPressCrawler(settings, { persistors });
const article = await crawler.fetchOne(payload.data ?? {}, settings.dateRange);
@@ -134,11 +139,24 @@ export const collectArticle = async (
export const forwardForProcessing = async (payload: ProcessingTaskPayload): Promise<Article> => {
logger.info({ article: payload.article.title }, "Ready for downstream processing");
const client = new SyncHttpClient(config.fetch.client);
const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT");
try {
logger.info({ article: payload.article.title }, "Forwarding article to API");
await client.post(endpoint, { json: payload.article });
logger.info({ article: payload.article.title }, "Forwarded article to API");
const client = new SyncHttpClient(config.fetch.client);
const response = await client.post(env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT"), {
headers: {
Authorization: `${env("BASANGO_CRAWLER_TOKEN")}`,
},
json: payload.article,
});
if (response.ok) {
const data = await response.json();
logger.info({ ...data }, "Article successfully forwarded to API");
}
} catch (error) {
logger.error({ error }, "Failed to forward article to API");
}
return payload.article;
};
+3 -3
View File
@@ -3,7 +3,7 @@ import { randomUUID } from "node:crypto";
import { JobsOptions, Queue, QueueOptions } from "bullmq";
import IORedis from "ioredis";
import { FetchAsyncConfig, config } from "@/config";
import { FetchAsyncConfig, config } from "#crawler/config";
import {
DetailsTaskPayload,
DetailsTaskPayloadSchema,
@@ -11,8 +11,8 @@ import {
ListingTaskPayloadSchema,
ProcessingTaskPayload,
ProcessingTaskPayloadSchema,
} from "@/process/async/schemas";
import { parseRedisUrl } from "@/utils";
} from "#crawler/process/async/schemas";
import { parseRedisUrl } from "#crawler/utils";
export interface QueueBackend<T = unknown> {
add: (name: string, data: T, opts?: JobsOptions) => Promise<{ id: string }>;
+1 -1
View File
@@ -1,6 +1,6 @@
import { z } from "zod";
import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "@/schema";
import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "#crawler/schema";
export const ListingTaskPayloadSchema = z.object({
category: z.string().optional(),
+4 -4
View File
@@ -1,13 +1,13 @@
import { logger } from "@basango/logger";
import * as handlers from "@/process/async/handlers";
import { createQueueManager } from "@/process/async/queue";
import * as handlers from "#crawler/process/async/handlers";
import { createQueueManager } from "#crawler/process/async/queue";
import {
DetailsTaskPayloadSchema,
ListingTaskPayloadSchema,
ProcessingTaskPayloadSchema,
} from "@/process/async/schemas";
import { CrawlingOptions } from "@/process/crawler";
} from "#crawler/process/async/schemas";
import { CrawlingOptions } from "#crawler/process/crawler";
export const collectListing = async (payload: unknown): Promise<number> => {
const data = ListingTaskPayloadSchema.parse(payload);
+2 -2
View File
@@ -1,8 +1,8 @@
import { QueueEvents, Worker } from "bullmq";
import IORedis from "ioredis";
import { QueueFactory, QueueManager } from "@/process/async/queue";
import { collectArticle, collectListing, forwardForProcessing } from "@/process/async/tasks";
import { QueueFactory, QueueManager } from "#crawler/process/async/queue";
import { collectArticle, collectListing, forwardForProcessing } from "#crawler/process/async/tasks";
export interface WorkerOptions {
queueNames?: string[];
+4 -4
View File
@@ -1,9 +1,9 @@
import logger from "@basango/logger";
import { FetchCrawlerConfig, config } from "@/config";
import { JsonlPersistor, Persistor } from "@/process/persistence";
import { AnySourceConfig } from "@/schema";
import { createDateRange, createPageRange } from "@/utils";
import { FetchCrawlerConfig, config } from "#crawler/config";
import { JsonlPersistor, Persistor } from "#crawler/process/persistence";
import { AnySourceConfig } from "#crawler/schema";
import { createDateRange, createPageRange } from "#crawler/utils";
export interface CrawlingOptions {
sourceId: string;
+5 -5
View File
@@ -1,10 +1,10 @@
import { HTMLElement, parse as parseHtml } from "node-html-parser";
import { FetchCrawlerConfig, config } from "@/config";
import { SyncHttpClient } from "@/http/http-client";
import { OpenGraph } from "@/http/open-graph";
import type { Persistor } from "@/process/persistence";
import { AnySourceConfig, Article } from "@/schema";
import { FetchCrawlerConfig, config } from "#crawler/config";
import { SyncHttpClient } from "#crawler/http/http-client";
import { OpenGraph } from "#crawler/http/open-graph";
import type { Persistor } from "#crawler/process/persistence";
import { AnySourceConfig, Article } from "#crawler/schema";
export interface CrawlerOptions {
persistors?: Persistor[];
+8 -8
View File
@@ -3,17 +3,17 @@ import { getUnixTime, isMatch as isDateMatch, parse as parseDateFns } from "date
import { HTMLElement } from "node-html-parser";
import TurndownService from "turndown";
import { FetchCrawlerConfig } from "@/config";
import { FetchCrawlerConfig } from "#crawler/config";
import {
ArticleOutOfDateRangeError,
InvalidArticleError,
InvalidSourceSelectorsError,
UnsupportedSourceKindError,
} from "@/errors";
import { BaseCrawler } from "@/process/parsers/base";
import { Persistor, persist } from "@/process/persistence";
import { Article, DateRange, HtmlSourceConfig } from "@/schema";
import { createAbsoluteUrl, isTimestampInRange } from "@/utils";
} from "#crawler/errors";
import { BaseCrawler } from "#crawler/process/parsers/base";
import { Persistor, persist } from "#crawler/process/persistence";
import { Article, DateRange, HtmlSourceConfig } from "#crawler/schema";
import { createAbsoluteUrl, isTimestampInRange } from "#crawler/utils";
const md = new TurndownService({
bulletListMarker: "-",
@@ -148,8 +148,8 @@ export class HtmlCrawler extends BaseCrawler {
body,
categories,
link,
source: this.source.sourceId,
timestamp,
publishedAt: new Date(timestamp * 1000),
sourceId: this.source.sourceId,
title,
},
link,
@@ -1,16 +1,16 @@
import { logger } from "@basango/logger";
import TurndownService from "turndown";
import { FetchCrawlerConfig } from "@/config";
import { FetchCrawlerConfig } from "#crawler/config";
import {
ArticleOutOfDateRangeError,
InvalidArticleError,
UnsupportedSourceKindError,
} from "@/errors";
import { BaseCrawler } from "@/process/parsers/base";
import { Persistor, persist } from "@/process/persistence";
import { Article, DateRange, PageRange, WordPressSourceConfig } from "@/schema";
import { isTimestampInRange } from "@/utils";
} from "#crawler/errors";
import { BaseCrawler } from "#crawler/process/parsers/base";
import { Persistor, persist } from "#crawler/process/persistence";
import { Article, DateRange, PageRange, WordPressSourceConfig } from "#crawler/schema";
import { isTimestampInRange } from "#crawler/utils";
const md = new TurndownService({
bulletListMarker: "-",
@@ -148,8 +148,8 @@ export class WordPressCrawler extends BaseCrawler {
body,
categories,
link,
source: this.source.sourceId,
timestamp,
publishedAt: new Date(timestamp * 1000),
sourceId: this.source.sourceId,
title,
},
link,
+1 -8
View File
@@ -4,8 +4,7 @@ import path from "node:path";
import { md5 } from "@basango/encryption";
import logger from "@basango/logger";
import { Article } from "@/schema";
import { countTokens } from "@/utils";
import { Article } from "#crawler/schema";
export interface Persistor {
persist(record: Article): Promise<void> | void;
@@ -47,12 +46,6 @@ export const persist = async (payload: Article, persistors: Persistor[]): Promis
const article = {
...data,
hash: md5(data.link),
tokenStatistics: {
body: countTokens(data.body),
categories: countTokens(data.categories.join(",")),
excerpt: countTokens(data.body.substring(0, 200)),
title: countTokens(data.title),
},
} as Article;
for (const persistor of persistors) {
+4 -4
View File
@@ -5,10 +5,10 @@ import {
closePersistors,
createPersistors,
resolveCrawlerConfig,
} from "@/process/crawler";
import { HtmlCrawler } from "@/process/parsers/html";
import { WordPressCrawler } from "@/process/parsers/wordpress";
import { resolveSourceConfig } from "@/utils";
} from "#crawler/process/crawler";
import { HtmlCrawler } from "#crawler/process/parsers/html";
import { WordPressCrawler } from "#crawler/process/parsers/wordpress";
import { resolveSourceConfig } from "#crawler/utils";
export const runSyncCrawl = async (options: CrawlingOptions): Promise<void> => {
const source = resolveSourceConfig(options.sourceId);