[crawler]: stabilize import

This commit is contained in:
2025-11-02 21:26:07 +02:00
parent 07bb3992ad
commit c53c0b576b
51 changed files with 441 additions and 685 deletions
+2 -2
View File
@@ -21,7 +21,7 @@ jobs:
functional:
name: phpunit
runs-on: ubuntu-latest
# services:
# process:
# mysql:
# image: mariadb:10.11.11
# env:
@@ -68,4 +68,4 @@ jobs:
run: composer app:test
env:
APP_ENV: test
# DATABASE_URL: mysql://root:root@127.0.0.1:${{ job.services.mysql.ports['3306'] }}/app_test
# DATABASE_URL: mysql://root:root@127.0.0.1:${{ job.process.mysql.ports['3306'] }}/app_test
+4 -5
View File
@@ -1,20 +1,19 @@
{
"name": "@basango/crawler",
"version": "0.1.0",
"private": true,
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"build": "tsc -b",
"test": "vitest --run",
"lint": "biome check .",
"lint:fix": "biome check --write .",
"format": "biome format --write .",
"queue": "bun run src/scripts/queue.ts",
"worker": "bun run src/scripts/worker.ts"
},
"dependencies": {
"@basango/logger": "workspace:*",
"bullmq": "^4.17.0",
"date-fns": "^3.6.0",
"date-fns": "catalog:",
"ioredis": "^5.3.2",
"tiktoken": "^1.0.14",
"zod": "catalog:"
@@ -3,9 +3,9 @@ import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { loadConfig } from "@/config";
import { loadConfig } from "./config";
import { resolveConfigPath } from "./schema";
import { resolveConfigPath } from "@/utils";
describe("loadConfig", () => {
it("parses json configuration and ensures directories", () => {
@@ -32,7 +32,7 @@ describe("loadConfig", () => {
),
);
const config = loadConfig({ configPath });
const config = loadConfig({ path: configPath });
expect(config.fetch.client.timeout).toBe(10);
expect(fs.existsSync(paths.data)).toBe(true);
@@ -74,7 +74,7 @@ describe("loadConfig", () => {
),
);
const config = loadConfig({ configPath: basePath, env: "production" });
const config = loadConfig({ path: basePath, env: "production" });
expect(config.logging.level).toBe("DEBUG");
});
@@ -1 +0,0 @@
export * from "../config";
@@ -1,6 +1,5 @@
import { describe, expect, it } from "vitest";
import { createQueueManager, createQueueSettings } from "./queue";
import { createQueueManager, createQueueSettings } from "@/process/async/queue";
class InMemoryQueue {
public jobs: Array<{ name: string; data: unknown }> = [];
@@ -1 +0,0 @@
export * from "@basango/crawler/services/async/queue";
@@ -1,14 +1,15 @@
import { describe, expect, it } from "vitest";
import {
PageRangeSchema,
PageRangeSpecSchema,
PipelineConfigSchema,
} from "@/schema";
import {
createDateRange,
formatDateRange,
isTimestampInRange,
PageRangeSpecSchema,
PageRangeSchema,
schemaToJSON,
} from "./schema";
} from "@/utils";
describe("schema helpers", () => {
it("creates date range from spec", () => {
@@ -30,6 +31,7 @@ describe("schema helpers", () => {
it("produces json schema", () => {
const json = schemaToJSON(PipelineConfigSchema);
// @ts-ignore
expect(json.type).toBe("object");
});
});
@@ -1 +0,0 @@
export * from "../schema";
@@ -1,11 +1,10 @@
import { describe, expect, it, vi } from "vitest";
import { QueueManager } from "../process/async/queue";
import {
scheduleAsyncCrawl,
registerCrawlerTaskHandlers,
collectListing,
} from "./tasks";
import { QueueManager } from "./queue";
registerCrawlerTaskHandlers,
scheduleAsyncCrawl,
} from "@/process/async/tasks";
describe("Async tasks", () => {
it("schedules crawl with provided manager", async () => {
@@ -1 +0,0 @@
export * from "@basango/crawler/services/async/tasks";
+18 -48
View File
@@ -1,27 +1,22 @@
import * as fs from "node:fs";
import * as path from "node:path";
import fs from "node:fs";
import path from "node:path";
import {logger} from "@basango/logger";
import { logger } from "@basango/logger";
import { PipelineConfig, PipelineConfigSchema } from "@/schema";
import {
ensureDirectories,
mergePipelineConfig,
PipelineConfig,
PipelineConfigSchema,
resolveConfigPath,
resolveProjectPaths,
} from "./schema";
import {ensureDirectories} from "./utils";
} from "@/utils";
import { DEFAULT_CONFIG_FILES } from "@/constants";
export interface LoadConfigOptions {
configPath?: string;
path?: string;
env?: string;
}
const DEFAULT_CONFIG_FILES = [
path.join(process.cwd(), "config", "pipeline.json"),
path.join(process.cwd(), "pipeline.json"),
];
const readJsonFile = (filePath: string): unknown => {
const contents = fs.readFileSync(filePath, "utf-8");
return contents.trim() === "" ? {} : JSON.parse(contents);
@@ -38,7 +33,7 @@ const locateConfigFile = (explicit?: string): string => {
}
}
return DEFAULT_CONFIG_FILES[0];
return DEFAULT_CONFIG_FILES[0]!;
};
const readPipelineConfig = (configPath: string): PipelineConfig => {
@@ -71,7 +66,7 @@ const applyEnvironmentOverride = (
};
export const loadConfig = (options: LoadConfigOptions = {}): PipelineConfig => {
const basePath = locateConfigFile(options.configPath);
const basePath = locateConfigFile(options.path);
const config = applyEnvironmentOverride(
readPipelineConfig(basePath),
basePath,
@@ -88,12 +83,12 @@ export const dumpConfig = (
): void => {
const destination = targetPath ?? locateConfigFile();
const normalized = PipelineConfigSchema.parse(config);
fs.mkdirSync(path.dirname(destination), {recursive: true});
fs.mkdirSync(path.dirname(destination), { recursive: true });
fs.writeFileSync(destination, JSON.stringify(normalized, null, 2));
};
export interface PipelineConfigManagerOptions {
configPath?: string;
path?: string;
env?: string;
autoLoad?: boolean;
}
@@ -106,12 +101,12 @@ export class PipelineConfigManager {
private cache?: PipelineConfig;
constructor(options: PipelineConfigManagerOptions = {}) {
this.explicitPath = options.configPath;
this.explicitPath = options.path;
this.defaultEnv = options.env ?? "development";
if (options.autoLoad !== false) {
this.cache = loadConfig({
configPath: this.explicitPath,
path: this.explicitPath,
env: this.defaultEnv,
});
}
@@ -122,14 +117,14 @@ export class PipelineConfigManager {
if (resolvedEnv !== this.defaultEnv) {
return loadConfig({
configPath: this.explicitPath,
path: this.explicitPath,
env: resolvedEnv,
});
}
if (!this.cache) {
this.cache = loadConfig({
configPath: this.explicitPath,
path: this.explicitPath,
env: resolvedEnv,
});
}
@@ -137,29 +132,9 @@ export class PipelineConfigManager {
return this.cache;
}
reload(env?: string): PipelineConfig {
const resolvedEnv = env ?? this.defaultEnv;
const config = loadConfig({
configPath: this.explicitPath,
env: resolvedEnv,
});
if (resolvedEnv === this.defaultEnv) {
this.cache = config;
}
return config;
}
ensureDirectories(config?: PipelineConfig): PipelineConfig {
const pipeline = config ?? this.get();
ensureDirectories(pipeline.paths);
return pipeline;
}
setupLogging(config?: PipelineConfig): void {
const pipeline = config ?? this.get();
this.ensureDirectories(pipeline);
ensureDirectories(pipeline.paths);
const level = pipeline.logging.level.toLowerCase();
process.env.LOG_LEVEL = level;
@@ -168,15 +143,10 @@ export class PipelineConfigManager {
if (pipeline.logging.file_logging) {
const logDir = pipeline.paths.logs;
const destination = path.join(logDir, pipeline.logging.log_file);
fs.mkdirSync(path.dirname(destination), {recursive: true});
fs.mkdirSync(path.dirname(destination), { recursive: true });
if (!fs.existsSync(destination)) {
fs.writeFileSync(destination, "");
}
}
}
resolveConfigPath(env?: string): string {
const base = locateConfigFile(this.explicitPath);
return resolveConfigPath(base, env ?? this.defaultEnv);
}
}
+7
View File
@@ -0,0 +1,7 @@
import path from "node:path";
export const DEFAULT_DATE_FORMAT = "yyyy-LL-dd";
export const DEFAULT_CONFIG_FILES = [
path.join(process.cwd(), "config", "pipeline.json"),
path.join(process.cwd(), "pipeline.json"),
];
@@ -0,0 +1,35 @@
export class UserAgents {
private static readonly USER_AGENTS: string[] = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5",
"Mozilla/50.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7",
"Mozilla/5.0 (Linux; U; Android 5.0; SM-P815 Build/LRX22G) AppleWebKit/600.4 (KHTML, like Gecko) Chrome/48.0.1562.260 Mobile Safari/600.0",
"Mozilla/5.0 (Windows; U; Windows NT 6.3;) AppleWebKit/533.34 (KHTML, like Gecko) Chrome/51.0.1883.215 Safari/533",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; x64; en-US Trident/4.0)",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_10_3) Gecko/20100101 Firefox/63.4",
"Mozilla/5.0 (Linux; Linux x86_64; en-US) AppleWebKit/603.50 (KHTML, like Gecko) Chrome/55.0.2226.116 Safari/601",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 7_8_3; en-US) Gecko/20100101 Firefox/68.9",
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_9_8; like Mac OS X) AppleWebKit/603.34 (KHTML, like Gecko) Chrome/47.0.1126.107 Mobile Safari/602.7",
"Mozilla/5.0 (iPod; CPU iPod OS 8_2_0; like Mac OS X) AppleWebKit/601.40 (KHTML, like Gecko) Chrome/47.0.1590.178 Mobile Safari/535.2",
];
private readonly rotate: boolean;
private readonly fallback: string;
constructor(
rotate: boolean = true,
fallback: string = "Basango/0.1 (+https://github.com/bernard-ng/basango)",
) {
this.rotate = rotate;
this.fallback = fallback;
}
static og(): string {
return "facebookexternalhit/1.1";
}
get(): string {
if (!this.rotate) return this.fallback;
const idx = Math.floor(Math.random() * UserAgents.USER_AGENTS.length);
return UserAgents.USER_AGENTS[idx]!;
}
}
-4
View File
@@ -1,4 +0,0 @@
export * from "./config";
export * from "./schema";
export * from "./utils";
export * from "./services/crawler";
@@ -11,8 +11,8 @@ import {
ListingTaskPayloadSchema,
ProcessedTaskPayload,
ProcessedTaskPayloadSchema,
} from "./schemas";
import { parseRedisUrl } from "@crawler/utils";
} from "@/process/async/schemas";
import { parseRedisUrl } from "@/utils";
const QueueSettingsSchema = z.object({
redis_url: z
@@ -1,10 +1,5 @@
import { z } from "zod";
import {
AnySourceConfig,
DateRangeSchema,
PageRangeSchema,
} from "@crawler/schema";
import { AnySourceConfig, DateRangeSchema, PageRangeSchema } from "@/schema";
export const ListingTaskPayloadSchema = z.object({
source_id: z.string(),
@@ -7,13 +7,13 @@ import {
ListingTaskPayloadSchema,
ProcessedTaskPayload,
ProcessedTaskPayloadSchema,
} from "./schemas";
} from "@/process/async/schemas";
import {
createQueueManager,
QueueManager,
QueueSettings,
QueueSettingsInput,
} from "./queue";
} from "@/process/async/queue";
export interface CrawlerTaskHandlers {
collectListing: (payload: ListingTaskPayload) => Promise<number> | number;
@@ -1,5 +1,5 @@
import IORedis from "ioredis";
import { Worker, QueueEvents } from "bullmq";
import { QueueEvents, Worker } from "bullmq";
import {
createQueueManager,
@@ -7,8 +7,12 @@ import {
QueueManager,
QueueSettings,
QueueSettingsInput,
} from "./queue";
import { collectArticle, collectListing, forwardForProcessing } from "./tasks";
} from "@/process/async/queue";
import {
collectArticle,
collectListing,
forwardForProcessing,
} from "@/process/async/tasks";
export interface WorkerOptions {
queueNames?: string[];
@@ -79,6 +83,7 @@ export const startWorker = (options: WorkerOptions = {}): WorkerHandle => {
close: async () => {
await Promise.all(workers.map((worker) => worker.close()));
await Promise.all(events.map((event) => event.close()));
if (!options.queueManager) {
await manager.close();
}
+27 -161
View File
@@ -1,20 +1,14 @@
import * as path from "node:path";
import {format as formatDate, getUnixTime, isMatch, parse} from "date-fns";
import {z} from "zod";
import {createSourcesConfig, resolveProjectPaths} from "@/utils";
export const UpdateDirectionSchema = z.enum(["forward", "backward"]);
export type UpdateDirection = z.infer<typeof UpdateDirectionSchema>;
export const SourceKindSchema = z.enum(["wordpress", "html"]);
export type SourceKind = z.infer<typeof SourceKindSchema>;
export const SourceDateSchema = z.object({
format: z.string().default("yyyy-LL-dd HH:mm"),
pattern: z.string().nullable().optional(),
replacement: z.string().nullable().optional(),
});
export type SourceDate = z.infer<typeof SourceDateSchema>;
export const SourceSelectorsSchema = z.object({
articles: z.string().optional().nullable(),
@@ -25,7 +19,6 @@ export const SourceSelectorsSchema = z.object({
article_categories: z.string().optional().nullable(),
pagination: z.string().default("ul.pagination > li a"),
});
export type SourceSelectors = z.infer<typeof SourceSelectorsSchema>;
const BaseSourceSchema = z.object({
source_id: z.string(),
@@ -53,10 +46,6 @@ export const WordPressSourceConfigSchema = BaseSourceSchema.extend({
),
});
export type HtmlSourceConfig = z.infer<typeof HtmlSourceConfigSchema>;
export type WordPressSourceConfig = z.infer<typeof WordPressSourceConfigSchema>;
export type AnySourceConfig = HtmlSourceConfig | WordPressSourceConfig;
export const DateRangeSchema = z
.object({
start: z.number().int(),
@@ -77,8 +66,6 @@ export const DateRangeSchema = z
}
});
export type DateRange = z.infer<typeof DateRangeSchema>;
export const PageRangeSchema = z
.object({
start: z.number().int().min(0),
@@ -93,100 +80,31 @@ export const PageRangeSchema = z
}
});
export type PageRange = z.infer<typeof PageRangeSchema>;
export const PageRangeSpecSchema = z
.string()
.regex(/^[0-9]+:[0-9]+$/, "Invalid page range format. Use start:end")
.transform((spec) => {
const [startText, endText] = spec.split(":");
return {
start: Number.parseInt(startText, 10),
end: Number.parseInt(endText, 10),
start: Number.parseInt(String(startText), 10),
end: Number.parseInt(String(endText), 10),
};
});
const defaultDateFormat = "yyyy-LL-dd";
export const DateRangeSpecSchema = z
.string()
.regex(/.+:.+/, "Expected start:end format")
.transform((spec) => {
const [startRaw, endRaw] = spec.split(":");
return {startRaw, endRaw};
return {startRaw: String(startRaw), endRaw: String(endRaw)};
});
const parseDate = (value: string, format: string): Date => {
if (!isMatch(value, format)) {
throw new Error(`Invalid date '${value}' for format '${format}'`);
}
const parsed = parse(value, format, new Date());
if (Number.isNaN(parsed.getTime())) {
throw new Error(`Invalid date '${value}' for format '${format}'`);
}
return parsed;
};
export interface CreateDateRangeOptions {
format?: string;
separator?: string;
}
export const createDateRange = (
spec: string,
options: CreateDateRangeOptions = {},
): DateRange => {
const {format = defaultDateFormat, separator = ":"} = options;
if (!separator) {
throw new Error("Separator cannot be empty");
}
const normalized = spec.replace(separator, ":");
const parsedSpec = DateRangeSpecSchema.parse(normalized);
const startDate = parseDate(parsedSpec.startRaw, format);
const endDate = parseDate(parsedSpec.endRaw, format);
const range = {
start: getUnixTime(startDate),
end: getUnixTime(endDate),
};
return DateRangeSchema.parse(range);
};
export const formatDateRange = (
range: DateRange,
fmt = defaultDateFormat,
): string => {
const start = formatDate(new Date(range.start * 1000), fmt);
const end = formatDate(new Date(range.end * 1000), fmt);
return `${start}:${end}`;
};
export const isTimestampInRange = (
range: DateRange,
timestamp: number,
): boolean => {
return range.start <= timestamp && timestamp <= range.end;
};
export const ProjectPathsSchema = z.object({
root: z.string(),
data: z.string(),
logs: z.string(),
configs: z.string(),
});
export type ProjectPaths = z.infer<typeof ProjectPathsSchema>;
export const resolveProjectPaths = (rootDir: string): ProjectPaths => {
return ProjectPathsSchema.parse({
root: rootDir,
data: path.join(rootDir, "data", "dataset"),
logs: path.join(rootDir, "data", "logs"),
configs: path.join(rootDir, "config"),
});
};
export const LoggingConfigSchema = z.object({
level: z.string().default("INFO"),
@@ -203,7 +121,6 @@ export const LoggingConfigSchema = z.object({
.default(10 * 1024 * 1024),
backup_count: z.number().int().nonnegative().default(5),
});
export type LoggingConfig = z.infer<typeof LoggingConfigSchema>;
export const ClientConfigSchema = z.object({
timeout: z.number().positive().default(20),
@@ -234,35 +151,16 @@ export const CrawlerConfigSchema = z.object({
direction: UpdateDirectionSchema.default("forward"),
});
export type ClientConfig = z.infer<typeof ClientConfigSchema>;
export type CrawlerConfig = z.infer<typeof CrawlerConfigSchema> & {
source?: AnySourceConfig;
};
export const FetchConfigSchema = z.object({
client: ClientConfigSchema.default(ClientConfigSchema.parse({})),
crawler: CrawlerConfigSchema.default(CrawlerConfigSchema.parse({})),
});
export type FetchConfig = z.infer<typeof FetchConfigSchema>;
const SourcesConfigSchema = z.object({
export const SourcesConfigSchema = z.object({
html: z.array(HtmlSourceConfigSchema).default([]),
wordpress: z.array(WordPressSourceConfigSchema).default([]),
});
export type SourcesConfig = z.infer<typeof SourcesConfigSchema> & {
find: (sourceId: string) => AnySourceConfig | undefined;
};
export const createSourcesConfig = (input: unknown): SourcesConfig => {
const parsed = SourcesConfigSchema.parse(input);
const resolver = (sourceId: string) =>
[...parsed.html, ...parsed.wordpress].find(
(source) => source.source_id === sourceId,
);
return Object.assign({find: resolver}, parsed);
};
export const PipelineConfigSchema = z.object({
paths: ProjectPathsSchema.default(resolveProjectPaths(process.cwd())),
logging: LoggingConfigSchema.default(LoggingConfigSchema.parse({})),
@@ -271,61 +169,29 @@ export const PipelineConfigSchema = z.object({
.union([SourcesConfigSchema, z.undefined()])
.transform((value) => createSourcesConfig(value ?? {})),
});
export type PipelineConfig = z.infer<typeof PipelineConfigSchema>
export const mergePipelineConfig = (
base: PipelineConfig,
overrides: Partial<PipelineConfig>,
): PipelineConfig => {
const paths = overrides.paths ?? base.paths;
const logging = {...base.logging, ...(overrides.logging ?? {})};
const fetch = {
client: {...base.fetch.client, ...(overrides.fetch?.client ?? {})},
crawler: {...base.fetch.crawler, ...(overrides.fetch?.crawler ?? {})},
};
export type UpdateDirection = z.infer<typeof UpdateDirectionSchema>;
export type SourceKind = z.infer<typeof SourceKindSchema>;
export type SourceDate = z.infer<typeof SourceDateSchema>;
export type SourceSelectors = z.infer<typeof SourceSelectorsSchema>;
export type HtmlSourceConfig = z.infer<typeof HtmlSourceConfigSchema>;
export type WordPressSourceConfig = z.infer<typeof WordPressSourceConfigSchema>;
export type AnySourceConfig = HtmlSourceConfig | WordPressSourceConfig;
export type DateRange = z.infer<typeof DateRangeSchema>;
export type PageRange = z.infer<typeof PageRangeSchema>;
const sources = createSourcesConfig({
html: overrides.sources?.html ?? base.sources.html,
wordpress: overrides.sources?.wordpress ?? base.sources.wordpress,
});
return {
paths,
logging,
fetch,
sources,
};
export interface CreateDateRangeOptions {
format?: string;
separator?: string;
}
export type SourcesConfig = z.infer<typeof SourcesConfigSchema> & {
find: (sourceId: string) => AnySourceConfig | undefined;
};
export const resolveConfigPath = (basePath: string, env?: string): string => {
if (!env || env === "development") {
return basePath;
}
const ext = path.extname(basePath);
const withoutExt = basePath.slice(0, basePath.length - ext.length);
return `${withoutExt}.${env}${ext}`;
export type ProjectPaths = z.infer<typeof ProjectPathsSchema>;
export type LoggingConfig = z.infer<typeof LoggingConfigSchema>;
export type ClientConfig = z.infer<typeof ClientConfigSchema>;
export type CrawlerConfig = z.infer<typeof CrawlerConfigSchema> & {
source?: AnySourceConfig;
};
export const schemaToJSON = <T extends z.ZodTypeAny>(schema: T): unknown => {
const toJSONSchema = (z as any).toJSONSchema as
| ((s: z.ZodTypeAny, opts?: Record<string, unknown>) => unknown)
| undefined;
if (typeof toJSONSchema === "function") {
try {
// target can be "draft-2020-12" | "draft-7" | "draft-4" | "openapi-3.0"
return toJSONSchema(schema, {target: "draft-2020-12", unrepresentable: "any"});
} catch {
// fall through to minimal mapping
}
}
if (schema instanceof z.ZodObject) return {type: "object"};
if (schema instanceof z.ZodArray) return {type: "array"};
if (schema instanceof z.ZodString) return {type: "string"};
if (schema instanceof z.ZodNumber) return {type: "number"};
if (schema instanceof z.ZodBoolean) return {type: "boolean"};
return {type: "unknown"};
};
export type FetchConfig = z.infer<typeof FetchConfigSchema>;
export type PipelineConfig = z.infer<typeof PipelineConfigSchema>;
+38 -44
View File
@@ -1,33 +1,42 @@
import {parseArgs} from "node:util";
import { parseArgs } from "node:util";
import {logger} from "@basango/logger";
import {PipelineConfigManager} from "@crawler/config";
import {createQueueSettings} from "@crawler/services/async/queue";
import {scheduleAsyncCrawl} from "@crawler/services/async/tasks";
import { logger } from "@basango/logger";
import { PipelineConfigManager } from "@/config";
import { createQueueSettings } from "@/process/async/queue";
import { scheduleAsyncCrawl } from "@/process/async/tasks";
interface QueueCliOptions {
"source-id"?: string;
source?: string;
env: string;
"page-range"?: string;
"date-range"?: string;
page?: string;
date?: string;
category?: string;
"redis-url"?: string;
help?: boolean;
}
const usage = `Usage: bun run src/scripts/queue.ts -- --source-id <id> [options]\n\nOptions:\n --env <env> Environment to load (default: development)\n --page-range <range> Optional page range filter (e.g. 1:5)\n --date-range <range> Optional date range filter (e.g. 2024-01-01:2024-01-31)\n --category <slug> Optional category to crawl\n --redis-url <url> Override Redis connection URL\n -h, --help Show this message`;
const usage = `
Usage: bun run src/scripts/queue -- --source <id> [options]
Options:
--page <range> Optional page range filter (e.g. 1:5)
--date <range> Optional date range filter (e.g. 2024-01-01:2024-01-31)
--category <slug> Optional category to crawl
--redis-url <url> Override Redis connection URL
--env <env> Environment to load (default: development)
-h, --help Show this message
`;
const parseCliArgs = (): QueueCliOptions => {
const {values} = parseArgs({
const { values } = parseArgs({
options: {
"source-id": {type: "string"},
env: {type: "string", default: "development"},
"page-range": {type: "string"},
"date-range": {type: "string"},
category: {type: "string"},
"redis-url": {type: "string"},
help: {type: "boolean", short: "h"},
source: { type: "string" },
page: { type: "string" },
date: { type: "string" },
category: { type: "string" },
"redis-url": { type: "string" },
env: { type: "string", default: "development" },
help: { type: "boolean", short: "h" },
},
});
@@ -37,50 +46,35 @@ const parseCliArgs = (): QueueCliOptions => {
const main = async (): Promise<void> => {
const options = parseCliArgs();
if (options.help || !options["source-id"]) {
if (options.help || !options.source) {
console.log(usage);
if (!options["source-id"]) {
if (!options.source) {
process.exitCode = 1;
}
return;
}
const env = options.env ?? "development";
const manager = new PipelineConfigManager({env});
const config = manager.ensureDirectories();
manager.setupLogging(config);
const manager = new PipelineConfigManager({ env });
manager.setupLogging(manager.get(env));
const settings = options["redis-url"]
? createQueueSettings({redis_url: options["redis-url"]})
? createQueueSettings({ redis_url: options["redis-url"] })
: undefined;
try {
const jobId = await scheduleAsyncCrawl({
sourceId: options["source-id"],
const id = await scheduleAsyncCrawl({
sourceId: options.source,
env,
pageRange: options["page-range"] ?? null,
dateRange: options["date-range"] ?? null,
pageRange: options.page ?? null,
dateRange: options.date ?? null,
category: options.category ?? null,
settings,
});
logger.info(
{
jobId,
sourceId: options["source-id"],
env,
},
"Scheduled asynchronous crawl job",
);
console.log(
`Scheduled async crawl job ${jobId} for source '${options["source-id"]}' (env=${env})`,
);
logger.info({ id, ...options }, "Scheduled asynchronous crawl job");
} catch (error) {
logger.error(
error instanceof Error ? error : {error},
"Failed to schedule crawl job",
);
console.error(`Failed to schedule crawl job: ${(error as Error).message}`);
logger.error({ error }, "Failed to schedule crawl job");
process.exitCode = 1;
}
};
+27 -19
View File
@@ -1,10 +1,10 @@
import {parseArgs} from "node:util";
import { parseArgs } from "node:util";
import {logger} from "@basango/logger";
import { logger } from "@basango/logger";
import {PipelineConfigManager} from "@crawler/config";
import {createQueueManager, createQueueSettings,} from "@crawler/services/async/queue";
import {startWorker} from "@crawler/services/async/worker";
import { PipelineConfigManager } from "@/config";
import { createQueueManager, createQueueSettings } from "@/process/async/queue";
import { startWorker } from "@/process/async/worker";
interface WorkerCliOptions {
env: string;
@@ -14,16 +14,25 @@ interface WorkerCliOptions {
help?: boolean;
}
const usage = `Usage: bun run src/scripts/worker.ts [options]\n\nOptions:\n --env <env> Environment to load (default: development)\n -q, --queue <name> Queue name to listen on (repeatable)\n --concurrency <number> Number of concurrent jobs per worker\n --redis-url <url> Override Redis connection URL\n -h, --help Show this message`;
const usage = `
Usage: bun run src/scripts/worker [options]
Options:
--env <env> Environment to load (default: development)
-q, --queue <name> Queue name to listen on (repeatable)
--concurrency <number> Number of concurrent jobs per worker
--redis-url <url> Override Redis connection URL
-h, --help Show this message
`;
const parseCliArgs = (): WorkerCliOptions => {
const {values} = parseArgs({
const { values } = parseArgs({
options: {
env: {type: "string", default: "development"},
queue: {type: "string", multiple: true, short: "q"},
concurrency: {type: "string"},
"redis-url": {type: "string"},
help: {type: "boolean", short: "h"},
env: { type: "string", default: "development" },
queue: { type: "string", multiple: true, short: "q" },
concurrency: { type: "string" },
"redis-url": { type: "string" },
help: { type: "boolean", short: "h" },
},
});
@@ -52,25 +61,24 @@ const main = async (): Promise<void> => {
}
const env = options.env ?? "development";
const manager = new PipelineConfigManager({env});
const config = manager.ensureDirectories();
manager.setupLogging(config);
const manager = new PipelineConfigManager({ env });
manager.setupLogging(manager.get(env));
let concurrency: number | undefined;
try {
concurrency = parseConcurrency(options.concurrency);
} catch (error) {
logger.error(
error instanceof Error ? error : {error},
error instanceof Error ? error : { error },
"Invalid concurrency value provided",
);
process.exitCode = 1;
return;
}
const settings = options["redis-url"]
? createQueueSettings({redis_url: options["redis-url"]})
? createQueueSettings({ redis_url: options["redis-url"] })
: undefined;
const queueManager = createQueueManager({settings});
const queueManager = createQueueManager({ settings });
const queueNames = options.queue?.length
? options.queue.map((name) => queueManager.queueName(name))
@@ -83,7 +91,7 @@ const main = async (): Promise<void> => {
});
const shutdown = async (signal: NodeJS.Signals) => {
logger.info({signal}, "Received shutdown signal, draining workers");
logger.info({ signal }, "Received shutdown signal, draining workers");
try {
await handle.close();
} finally {
+147 -3
View File
@@ -1,9 +1,23 @@
import fs from "node:fs";
import path from "node:path";
import type { RedisOptions } from "ioredis";
import { get_encoding } from "tiktoken";
import { get_encoding, TiktokenEncoding } from "tiktoken";
import { format, getUnixTime, isMatch, parse } from "date-fns";
import { z } from "zod";
import type { ProjectPaths } from "@crawler/schema";
import {
CreateDateRangeOptions,
DateRange,
DateRangeSchema,
DateRangeSpecSchema,
PipelineConfig,
ProjectPaths,
ProjectPathsSchema,
SourcesConfig,
SourcesConfigSchema,
} from "@/schema";
import { DEFAULT_DATE_FORMAT } from "@/constants";
export const ensureDirectories = (paths: ProjectPaths): void => {
for (const dir of [paths.data, paths.logs, paths.configs]) {
@@ -26,7 +40,10 @@ export const parseRedisUrl = (url: string): RedisOptions => {
};
};
export const countTokens = (text: string, encoding = "cl100k_base"): number => {
export const countTokens = (
text: string,
encoding: TiktokenEncoding = "cl100k_base",
): number => {
try {
const encoder = get_encoding(encoding);
const tokens = encoder.encode(text);
@@ -36,3 +53,130 @@ export const countTokens = (text: string, encoding = "cl100k_base"): number => {
return text.length;
}
};
const parseDate = (value: string, format: string): Date => {
if (!isMatch(value, format)) {
throw new Error(`Invalid date '${value}' for format '${format}'`);
}
const parsed = parse(value, format, new Date());
if (Number.isNaN(parsed.getTime())) {
throw new Error(`Invalid date '${value}' for format '${format}'`);
}
return parsed;
};
export const createDateRange = (
spec: string,
options: CreateDateRangeOptions = {},
): DateRange => {
const { format = DEFAULT_DATE_FORMAT, separator = ":" } = options;
if (!separator) {
throw new Error("Separator cannot be empty");
}
const normalized = spec.replace(separator, ":");
const parsedSpec = DateRangeSpecSchema.parse(normalized);
const startDate = parseDate(parsedSpec.startRaw, format);
const endDate = parseDate(parsedSpec.endRaw, format);
const range = {
start: getUnixTime(startDate),
end: getUnixTime(endDate),
};
return DateRangeSchema.parse(range);
};
export const formatDateRange = (
range: DateRange,
fmt = DEFAULT_DATE_FORMAT,
): string => {
const start = format(new Date(range.start * 1000), fmt);
const end = format(new Date(range.end * 1000), fmt);
return `${start}:${end}`;
};
export const isTimestampInRange = (
range: DateRange,
timestamp: number,
): boolean => {
return range.start <= timestamp && timestamp <= range.end;
};
export const resolveProjectPaths = (rootDir: string): ProjectPaths => {
return ProjectPathsSchema.parse({
root: rootDir,
data: path.join(rootDir, "data", "dataset"),
logs: path.join(rootDir, "data", "logs"),
configs: path.join(rootDir, "config"),
});
};
export const createSourcesConfig = (input: unknown): SourcesConfig => {
const parsed = SourcesConfigSchema.parse(input);
const resolver = (sourceId: string) =>
[...parsed.html, ...parsed.wordpress].find(
(source) => source.source_id === sourceId,
);
return Object.assign({ find: resolver }, parsed);
};
export const mergePipelineConfig = (
base: PipelineConfig,
overrides: Partial<PipelineConfig>,
): PipelineConfig => {
const paths = overrides.paths ?? base.paths;
const logging = { ...base.logging, ...(overrides.logging ?? {}) };
const fetch = {
client: { ...base.fetch.client, ...(overrides.fetch?.client ?? {}) },
crawler: { ...base.fetch.crawler, ...(overrides.fetch?.crawler ?? {}) },
};
const sources = createSourcesConfig({
html: overrides.sources?.html ?? base.sources.html,
wordpress: overrides.sources?.wordpress ?? base.sources.wordpress,
});
return {
paths,
logging,
fetch,
sources,
};
};
export const resolveConfigPath = (basePath: string, env?: string): string => {
if (!env || env === "development") {
return basePath;
}
const ext = path.extname(basePath);
const withoutExt = basePath.slice(0, basePath.length - ext.length);
return `${withoutExt}.${env}${ext}`;
};
export const schemaToJSON = <T extends z.ZodTypeAny>(schema: T): unknown => {
const toJSONSchema = (z as any).toJSONSchema as
| ((s: z.ZodTypeAny, opts?: Record<string, unknown>) => unknown)
| undefined;
if (typeof toJSONSchema === "function") {
try {
// target can be "draft-2020-12" | "draft-7" | "draft-4" | "openapi-3.0"
return toJSONSchema(schema, {
target: "draft-2020-12",
unrepresentable: "any",
});
} catch {
// fall through to minimal mapping
}
}
if (schema instanceof z.ZodObject) return { type: "object" };
if (schema instanceof z.ZodArray) return { type: "array" };
if (schema instanceof z.ZodString) return { type: "string" };
if (schema instanceof z.ZodNumber) return { type: "number" };
if (schema instanceof z.ZodBoolean) return { type: "boolean" };
return { type: "unknown" };
};
+2 -4
View File
@@ -1,11 +1,9 @@
{
"extends": "@basango/tsconfig/base.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"baseUrl": ".",
"paths": {
"@crawler": ["./src/index.ts"],
"@crawler/*": ["./src/*"]
"@/*": ["./src/*"]
}
},
"include": ["src"],
+8
View File
@@ -1,3 +1,5 @@
import path from "node:path";
import { defineConfig } from "vitest/config";
export default defineConfig({
@@ -5,5 +7,11 @@ export default defineConfig({
environment: "node",
globals: true,
include: ["src/**/*.test.ts"],
setupFiles: ["./vitest.setup.ts"],
},
resolve: {
alias: {
"@": path.resolve(__dirname, "src"),
},
},
});
+1
View File
@@ -0,0 +1 @@
process.env.NODE_ENV = process.env.NODE_ENV ?? "test";
+3 -3
View File
@@ -4,6 +4,7 @@
"": {
"name": "basango",
"devDependencies": {
"@basango/tsconfig": "workspace:*",
"@biomejs/biome": "^2.3.1",
"@manypkg/cli": "^0.25.1",
"turbo": "^2.5.8",
@@ -19,12 +20,11 @@
"date-fns": "^3.6.0",
"ioredis": "^5.3.2",
"tiktoken": "^1.0.14",
"zod": "^4.0.0",
"zod": "catalog:",
},
},
"packages/db": {
"name": "@basango/db",
"version": "1.0.0",
"dependencies": {
"@basango/logger": "workspace:*",
"@date-fns/utc": "^2.1.1",
@@ -52,12 +52,12 @@
},
"packages/tsconfig": {
"name": "@basango/tsconfig",
"version": "0.0.0",
},
},
"catalog": {
"@types/bun": "^1.3.1",
"typescript": "^5.9.3",
"zod": "^4.0.0",
},
"packages": {
"@basango/crawler": ["@basango/crawler@workspace:apps/crawler"],
+4 -2
View File
@@ -15,7 +15,8 @@
"@biomejs/biome": "^2.3.1",
"@manypkg/cli": "^0.25.1",
"turbo": "^2.5.8",
"typescript": "catalog:"
"typescript": "catalog:",
"@basango/tsconfig": "workspace:*"
},
"engines": {
"node": ">=22"
@@ -28,6 +29,7 @@
"catalog": {
"typescript": "^5.9.3",
"@types/bun": "^1.3.1",
"zod": "^4.0.0"
"zod": "^4.0.0",
"date-fns": "^3.6.0"
}
}
+1 -1
View File
@@ -15,7 +15,7 @@
"snakecase-keys": "^9.0.2"
},
"devDependencies": {
"@types/bun": "^1.3.1",
"@types/bun": "catalog:",
"@types/pg": "^8.15.6",
"drizzle-kit": "^0.31.6",
"typescript": "catalog:"
+1 -1
View File
@@ -1,6 +1,6 @@
import { drizzle } from "drizzle-orm/node-postgres";
import { Pool } from "pg";
import * as schema from "@db/schema";
import * as schema from "@/schema";
const isDevelopment = process.env.NODE_ENV === "development";
+53 -31
View File
@@ -1,24 +1,24 @@
import type { AnyColumn, SQL } from "drizzle-orm";
import { and, asc, desc, eq, gt, lt, or, sql } from "drizzle-orm";
import type { Database } from "@db/client";
import type { Database } from "@/client";
import {
users,
articles,
bookmarkArticles,
bookmarks,
comments,
sources,
} from "@db/schema";
users,
} from "@/schema";
import {
buildPaginationResult,
createPageState,
decodeCursor,
type PageRequest,
type PaginationMeta,
type PageState,
type PaginationMeta,
type SortDirection,
} from "@db/utils/pagination";
} from "@/utils/pagination";
export interface ArticleFilters {
search?: string | null;
@@ -128,7 +128,14 @@ export async function* getArticlesForExport(
if (params.dateRange) {
filters.push(
sql`${articles.publishedAt} BETWEEN to_timestamp(${params.dateRange.start}) AND to_timestamp(${params.dateRange.end})`,
sql`${articles.publishedAt} BETWEEN to_timestamp(
${params.dateRange.start}
)
AND
to_timestamp
(
${params.dateRange.end}
)`,
);
}
@@ -137,9 +144,8 @@ export async function* getArticlesForExport(
articleId: articles.id,
articleTitle: articles.title,
articleLink: articles.link,
articleCategories: sql<
string | null
>`array_to_string(${articles.categories}, ',')`,
articleCategories: sql<string | null>`array_to_string
(${articles.categories}, ',')`,
articleBody: articles.body,
articleSource: sources.name,
articleHash: articles.hash,
@@ -203,7 +209,9 @@ function buildArticleFilterConditions(filters: NormalizedArticleFilters): {
let searchQuery: string | undefined;
if (filters.category) {
conditions.push(sql`${filters.category} = ANY(${articles.categories})`);
conditions.push(sql`${filters.category} = ANY(
${articles.categories}
)`);
}
if (filters.search) {
@@ -211,14 +219,23 @@ function buildArticleFilterConditions(filters: NormalizedArticleFilters): {
if (sanitized.length > 0) {
searchQuery = sanitized;
conditions.push(
sql`${articles.tsv} @@ to_tsquery('french', ${sanitized})`,
sql`${articles.tsv} @@ to_tsquery('french',
${sanitized}
)`,
);
}
}
if (filters.dateRange) {
conditions.push(
sql`${articles.publishedAt} BETWEEN to_timestamp(${filters.dateRange.start}) AND to_timestamp(${filters.dateRange.end})`,
sql`${articles.publishedAt} BETWEEN to_timestamp(
${filters.dateRange.start}
)
AND
to_timestamp
(
${filters.dateRange.end}
)`,
);
}
@@ -226,12 +243,12 @@ function buildArticleFilterConditions(filters: NormalizedArticleFilters): {
}
function buildBookmarkExistsExpression(userId: string): SQL<boolean> {
return sql`EXISTS (
SELECT 1
FROM ${bookmarkArticles} ba
INNER JOIN ${bookmarks} b ON ba.bookmark_id = b.id
WHERE ba.article_id = ${articles.id} AND b.user_id = ${userId}
)`;
return sql`EXISTS
(SELECT 1
FROM ${bookmarkArticles} ba
INNER JOIN ${bookmarks} b ON ba.bookmark_id = b.id
WHERE ba.article_id = ${articles.id}
AND b.user_id = ${userId})`;
}
async function fetchArticleOverview(
@@ -254,9 +271,8 @@ async function fetchArticleOverview(
article_id: articles.id,
article_title: articles.title,
article_link: articles.link,
article_categories: sql<
string | null
>`array_to_string(${articles.categories}, ',')`,
article_categories: sql<string | null>`array_to_string
(${articles.categories}, ',')`,
article_excerpt: articles.excerpt,
article_published_at: articles.publishedAt,
article_image: articles.image,
@@ -306,8 +322,12 @@ async function fetchArticleOverview(
if (searchQuery) {
orderings.push(
options.filters.sortDirection === "asc"
? sql`ts_rank(${articles.tsv}, to_tsquery('french', ${searchQuery})) ASC`
: sql`ts_rank(${articles.tsv}, to_tsquery('french', ${searchQuery})) DESC`,
? sql`ts_rank
(${articles.tsv}, to_tsquery('french', ${searchQuery}))
ASC`
: sql`ts_rank
(${articles.tsv}, to_tsquery('french', ${searchQuery}))
DESC`,
);
}
@@ -387,9 +407,8 @@ export async function getBookmarkedArticleList(
article_id: articles.id,
article_title: articles.title,
article_link: articles.link,
article_categories: sql<
string | null
>`array_to_string(${articles.categories}, ',')`,
article_categories: sql<string | null>`array_to_string
(${articles.categories}, ',')`,
article_excerpt: articles.excerpt,
article_published_at: articles.publishedAt,
article_image: articles.image,
@@ -441,8 +460,12 @@ export async function getBookmarkedArticleList(
if (searchQuery) {
orderings.push(
filters.sortDirection === "asc"
? sql`ts_rank(${articles.tsv}, to_tsquery('french', ${searchQuery})) ASC`
: sql`ts_rank(${articles.tsv}, to_tsquery('french', ${searchQuery})) DESC`,
? sql`ts_rank
(${articles.tsv}, to_tsquery('french', ${searchQuery}))
ASC`
: sql`ts_rank
(${articles.tsv}, to_tsquery('french', ${searchQuery}))
DESC`,
);
}
@@ -471,9 +494,8 @@ export async function getArticleDetails(
article_id: articles.id,
article_title: articles.title,
article_link: articles.link,
article_categories: sql<
string | null
>`array_to_string(${articles.categories}, ',')`,
article_categories: sql<string | null>`array_to_string
(${articles.categories}, ',')`,
article_body: articles.body,
article_hash: articles.hash,
article_published_at: articles.publishedAt,
+3 -3
View File
@@ -1,15 +1,15 @@
import type { SQL } from "drizzle-orm";
import { and, desc, eq, lt, sql } from "drizzle-orm";
import type { Database } from "@db/client";
import { bookmarkArticles, bookmarks } from "@db/schema";
import type { Database } from "@/client";
import { bookmarkArticles, bookmarks } from "@/schema";
import {
buildPaginationResult,
createPageState,
decodeCursor,
type PageRequest,
type PaginationMeta,
} from "@db/utils/pagination";
} from "@/utils/pagination";
export interface BookmarkRow {
bookmark_id: string;
+4 -4
View File
@@ -1,16 +1,16 @@
import type { SQL } from "drizzle-orm";
import { and, desc, eq, lt, or, sql } from "drizzle-orm";
import type { Database } from "@db/client";
import { articles, followedSources, sources } from "@db/schema";
import type { Database } from "@/client";
import { articles, followedSources, sources } from "@/schema";
import {
buildPaginationResult,
createPageState,
decodeCursor,
type PageRequest,
type PaginationMeta,
} from "@db/utils/pagination";
import { PUBLICATION_GRAPH_DAYS, SOURCE_IMAGE_BASE } from "@db/constant";
} from "@/utils/pagination";
import { PUBLICATION_GRAPH_DAYS, SOURCE_IMAGE_BASE } from "@/constant";
export interface SourceOverviewRow {
source_id: string;
+2 -2
View File
@@ -1,7 +1,7 @@
import { eq } from "drizzle-orm";
import type { Database } from "@db/client";
import { users } from "@db/schema";
import type { Database } from "@/client";
import { users } from "@/schema";
export interface UserProfileRow {
user_id: string;
+1 -1
View File
@@ -1,5 +1,5 @@
import { sql } from "drizzle-orm";
import { db } from "@db/client";
import { db } from "@/client";
export async function checkHealth() {
await db.execute(sql`SELECT 1`);
+1 -1
View File
@@ -5,7 +5,7 @@
"compilerOptions": {
"baseUrl": ".",
"paths": {
"@db/*": ["./src/*"]
"@/*": ["./src/*"]
}
}
}
+11 -2
View File
@@ -1,5 +1,6 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"display": "Default",
"compilerOptions": {
"declaration": true,
"declarationMap": true,
@@ -9,11 +10,19 @@
"lib": ["es2022", "DOM", "DOM.Iterable"],
"module": "NodeNext",
"moduleDetection": "force",
"moduleResolution": "NodeNext",
"moduleResolution": "node",
"forceConsistentCasingInFileNames": true,
"noUncheckedIndexedAccess": true,
"resolveJsonModule": true,
"skipLibCheck": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true,
"allowImportingTsExtensions": true,
"strict": true,
"target": "ES2022"
"target": "ES2022",
"baseUrl": "."
}
}
-2
View File
@@ -1,8 +1,6 @@
{
"name": "@basango/tsconfig",
"version": "0.0.0",
"private": true,
"license": "MIT",
"publishConfig": {
"access": "public"
},
-258
View File
@@ -1,258 +0,0 @@
{
"lockfileVersion": 1,
"workspaces": {
"": {
"name": "basango-monorepo",
"devDependencies": {
"@types/node": "^20.11.30",
"typescript": "^5.4.0",
"vitest": "^1.6.0",
},
},
},
"packages": {
"@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.21.5", "", { "os": "aix", "cpu": "ppc64" }, "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ=="],
"@esbuild/android-arm": ["@esbuild/android-arm@0.21.5", "", { "os": "android", "cpu": "arm" }, "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg=="],
"@esbuild/android-arm64": ["@esbuild/android-arm64@0.21.5", "", { "os": "android", "cpu": "arm64" }, "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A=="],
"@esbuild/android-x64": ["@esbuild/android-x64@0.21.5", "", { "os": "android", "cpu": "x64" }, "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA=="],
"@esbuild/darwin-arm64": ["@esbuild/darwin-arm64@0.21.5", "", { "os": "darwin", "cpu": "arm64" }, "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ=="],
"@esbuild/darwin-x64": ["@esbuild/darwin-x64@0.21.5", "", { "os": "darwin", "cpu": "x64" }, "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw=="],
"@esbuild/freebsd-arm64": ["@esbuild/freebsd-arm64@0.21.5", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g=="],
"@esbuild/freebsd-x64": ["@esbuild/freebsd-x64@0.21.5", "", { "os": "freebsd", "cpu": "x64" }, "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ=="],
"@esbuild/linux-arm": ["@esbuild/linux-arm@0.21.5", "", { "os": "linux", "cpu": "arm" }, "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA=="],
"@esbuild/linux-arm64": ["@esbuild/linux-arm64@0.21.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q=="],
"@esbuild/linux-ia32": ["@esbuild/linux-ia32@0.21.5", "", { "os": "linux", "cpu": "ia32" }, "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg=="],
"@esbuild/linux-loong64": ["@esbuild/linux-loong64@0.21.5", "", { "os": "linux", "cpu": "none" }, "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg=="],
"@esbuild/linux-mips64el": ["@esbuild/linux-mips64el@0.21.5", "", { "os": "linux", "cpu": "none" }, "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg=="],
"@esbuild/linux-ppc64": ["@esbuild/linux-ppc64@0.21.5", "", { "os": "linux", "cpu": "ppc64" }, "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w=="],
"@esbuild/linux-riscv64": ["@esbuild/linux-riscv64@0.21.5", "", { "os": "linux", "cpu": "none" }, "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA=="],
"@esbuild/linux-s390x": ["@esbuild/linux-s390x@0.21.5", "", { "os": "linux", "cpu": "s390x" }, "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A=="],
"@esbuild/linux-x64": ["@esbuild/linux-x64@0.21.5", "", { "os": "linux", "cpu": "x64" }, "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ=="],
"@esbuild/netbsd-x64": ["@esbuild/netbsd-x64@0.21.5", "", { "os": "none", "cpu": "x64" }, "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg=="],
"@esbuild/openbsd-x64": ["@esbuild/openbsd-x64@0.21.5", "", { "os": "openbsd", "cpu": "x64" }, "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow=="],
"@esbuild/sunos-x64": ["@esbuild/sunos-x64@0.21.5", "", { "os": "sunos", "cpu": "x64" }, "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg=="],
"@esbuild/win32-arm64": ["@esbuild/win32-arm64@0.21.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A=="],
"@esbuild/win32-ia32": ["@esbuild/win32-ia32@0.21.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA=="],
"@esbuild/win32-x64": ["@esbuild/win32-x64@0.21.5", "", { "os": "win32", "cpu": "x64" }, "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw=="],
"@jest/schemas": ["@jest/schemas@29.6.3", "", { "dependencies": { "@sinclair/typebox": "^0.27.8" } }, "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA=="],
"@jridgewell/sourcemap-codec": ["@jridgewell/sourcemap-codec@1.5.5", "", {}, "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="],
"@rollup/rollup-android-arm-eabi": ["@rollup/rollup-android-arm-eabi@4.52.5", "", { "os": "android", "cpu": "arm" }, "sha512-8c1vW4ocv3UOMp9K+gToY5zL2XiiVw3k7f1ksf4yO1FlDFQ1C2u72iACFnSOceJFsWskc2WZNqeRhFRPzv+wtQ=="],
"@rollup/rollup-android-arm64": ["@rollup/rollup-android-arm64@4.52.5", "", { "os": "android", "cpu": "arm64" }, "sha512-mQGfsIEFcu21mvqkEKKu2dYmtuSZOBMmAl5CFlPGLY94Vlcm+zWApK7F/eocsNzp8tKmbeBP8yXyAbx0XHsFNA=="],
"@rollup/rollup-darwin-arm64": ["@rollup/rollup-darwin-arm64@4.52.5", "", { "os": "darwin", "cpu": "arm64" }, "sha512-takF3CR71mCAGA+v794QUZ0b6ZSrgJkArC+gUiG6LB6TQty9T0Mqh3m2ImRBOxS2IeYBo4lKWIieSvnEk2OQWA=="],
"@rollup/rollup-darwin-x64": ["@rollup/rollup-darwin-x64@4.52.5", "", { "os": "darwin", "cpu": "x64" }, "sha512-W901Pla8Ya95WpxDn//VF9K9u2JbocwV/v75TE0YIHNTbhqUTv9w4VuQ9MaWlNOkkEfFwkdNhXgcLqPSmHy0fA=="],
"@rollup/rollup-freebsd-arm64": ["@rollup/rollup-freebsd-arm64@4.52.5", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-QofO7i7JycsYOWxe0GFqhLmF6l1TqBswJMvICnRUjqCx8b47MTo46W8AoeQwiokAx3zVryVnxtBMcGcnX12LvA=="],
"@rollup/rollup-freebsd-x64": ["@rollup/rollup-freebsd-x64@4.52.5", "", { "os": "freebsd", "cpu": "x64" }, "sha512-jr21b/99ew8ujZubPo9skbrItHEIE50WdV86cdSoRkKtmWa+DDr6fu2c/xyRT0F/WazZpam6kk7IHBerSL7LDQ=="],
"@rollup/rollup-linux-arm-gnueabihf": ["@rollup/rollup-linux-arm-gnueabihf@4.52.5", "", { "os": "linux", "cpu": "arm" }, "sha512-PsNAbcyv9CcecAUagQefwX8fQn9LQ4nZkpDboBOttmyffnInRy8R8dSg6hxxl2Re5QhHBf6FYIDhIj5v982ATQ=="],
"@rollup/rollup-linux-arm-musleabihf": ["@rollup/rollup-linux-arm-musleabihf@4.52.5", "", { "os": "linux", "cpu": "arm" }, "sha512-Fw4tysRutyQc/wwkmcyoqFtJhh0u31K+Q6jYjeicsGJJ7bbEq8LwPWV/w0cnzOqR2m694/Af6hpFayLJZkG2VQ=="],
"@rollup/rollup-linux-arm64-gnu": ["@rollup/rollup-linux-arm64-gnu@4.52.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-a+3wVnAYdQClOTlyapKmyI6BLPAFYs0JM8HRpgYZQO02rMR09ZcV9LbQB+NL6sljzG38869YqThrRnfPMCDtZg=="],
"@rollup/rollup-linux-arm64-musl": ["@rollup/rollup-linux-arm64-musl@4.52.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-AvttBOMwO9Pcuuf7m9PkC1PUIKsfaAJ4AYhy944qeTJgQOqJYJ9oVl2nYgY7Rk0mkbsuOpCAYSs6wLYB2Xiw0Q=="],
"@rollup/rollup-linux-loong64-gnu": ["@rollup/rollup-linux-loong64-gnu@4.52.5", "", { "os": "linux", "cpu": "none" }, "sha512-DkDk8pmXQV2wVrF6oq5tONK6UHLz/XcEVow4JTTerdeV1uqPeHxwcg7aFsfnSm9L+OO8WJsWotKM2JJPMWrQtA=="],
"@rollup/rollup-linux-ppc64-gnu": ["@rollup/rollup-linux-ppc64-gnu@4.52.5", "", { "os": "linux", "cpu": "ppc64" }, "sha512-W/b9ZN/U9+hPQVvlGwjzi+Wy4xdoH2I8EjaCkMvzpI7wJUs8sWJ03Rq96jRnHkSrcHTpQe8h5Tg3ZzUPGauvAw=="],
"@rollup/rollup-linux-riscv64-gnu": ["@rollup/rollup-linux-riscv64-gnu@4.52.5", "", { "os": "linux", "cpu": "none" }, "sha512-sjQLr9BW7R/ZiXnQiWPkErNfLMkkWIoCz7YMn27HldKsADEKa5WYdobaa1hmN6slu9oWQbB6/jFpJ+P2IkVrmw=="],
"@rollup/rollup-linux-riscv64-musl": ["@rollup/rollup-linux-riscv64-musl@4.52.5", "", { "os": "linux", "cpu": "none" }, "sha512-hq3jU/kGyjXWTvAh2awn8oHroCbrPm8JqM7RUpKjalIRWWXE01CQOf/tUNWNHjmbMHg/hmNCwc/Pz3k1T/j/Lg=="],
"@rollup/rollup-linux-s390x-gnu": ["@rollup/rollup-linux-s390x-gnu@4.52.5", "", { "os": "linux", "cpu": "s390x" }, "sha512-gn8kHOrku8D4NGHMK1Y7NA7INQTRdVOntt1OCYypZPRt6skGbddska44K8iocdpxHTMMNui5oH4elPH4QOLrFQ=="],
"@rollup/rollup-linux-x64-gnu": ["@rollup/rollup-linux-x64-gnu@4.52.5", "", { "os": "linux", "cpu": "x64" }, "sha512-hXGLYpdhiNElzN770+H2nlx+jRog8TyynpTVzdlc6bndktjKWyZyiCsuDAlpd+j+W+WNqfcyAWz9HxxIGfZm1Q=="],
"@rollup/rollup-linux-x64-musl": ["@rollup/rollup-linux-x64-musl@4.52.5", "", { "os": "linux", "cpu": "x64" }, "sha512-arCGIcuNKjBoKAXD+y7XomR9gY6Mw7HnFBv5Rw7wQRvwYLR7gBAgV7Mb2QTyjXfTveBNFAtPt46/36vV9STLNg=="],
"@rollup/rollup-openharmony-arm64": ["@rollup/rollup-openharmony-arm64@4.52.5", "", { "os": "none", "cpu": "arm64" }, "sha512-QoFqB6+/9Rly/RiPjaomPLmR/13cgkIGfA40LHly9zcH1S0bN2HVFYk3a1eAyHQyjs3ZJYlXvIGtcCs5tko9Cw=="],
"@rollup/rollup-win32-arm64-msvc": ["@rollup/rollup-win32-arm64-msvc@4.52.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-w0cDWVR6MlTstla1cIfOGyl8+qb93FlAVutcor14Gf5Md5ap5ySfQ7R9S/NjNaMLSFdUnKGEasmVnu3lCMqB7w=="],
"@rollup/rollup-win32-ia32-msvc": ["@rollup/rollup-win32-ia32-msvc@4.52.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-Aufdpzp7DpOTULJCuvzqcItSGDH73pF3ko/f+ckJhxQyHtp67rHw3HMNxoIdDMUITJESNE6a8uh4Lo4SLouOUg=="],
"@rollup/rollup-win32-x64-gnu": ["@rollup/rollup-win32-x64-gnu@4.52.5", "", { "os": "win32", "cpu": "x64" }, "sha512-UGBUGPFp1vkj6p8wCRraqNhqwX/4kNQPS57BCFc8wYh0g94iVIW33wJtQAx3G7vrjjNtRaxiMUylM0ktp/TRSQ=="],
"@rollup/rollup-win32-x64-msvc": ["@rollup/rollup-win32-x64-msvc@4.52.5", "", { "os": "win32", "cpu": "x64" }, "sha512-TAcgQh2sSkykPRWLrdyy2AiceMckNf5loITqXxFI5VuQjS5tSuw3WlwdN8qv8vzjLAUTvYaH/mVjSFpbkFbpTg=="],
"@sinclair/typebox": ["@sinclair/typebox@0.27.8", "", {}, "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA=="],
"@types/estree": ["@types/estree@1.0.8", "", {}, "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w=="],
"@types/node": ["@types/node@20.19.24", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-FE5u0ezmi6y9OZEzlJfg37mqqf6ZDSF2V/NLjUyGrR9uTZ7Sb9F7bLNZ03S4XVUNRWGA7Ck4c1kK+YnuWjl+DA=="],
"@vitest/expect": ["@vitest/expect@1.6.1", "", { "dependencies": { "@vitest/spy": "1.6.1", "@vitest/utils": "1.6.1", "chai": "^4.3.10" } }, "sha512-jXL+9+ZNIJKruofqXuuTClf44eSpcHlgj3CiuNihUF3Ioujtmc0zIa3UJOW5RjDK1YLBJZnWBlPuqhYycLioog=="],
"@vitest/runner": ["@vitest/runner@1.6.1", "", { "dependencies": { "@vitest/utils": "1.6.1", "p-limit": "^5.0.0", "pathe": "^1.1.1" } }, "sha512-3nSnYXkVkf3mXFfE7vVyPmi3Sazhb/2cfZGGs0JRzFsPFvAMBEcrweV1V1GsrstdXeKCTXlJbvnQwGWgEIHmOA=="],
"@vitest/snapshot": ["@vitest/snapshot@1.6.1", "", { "dependencies": { "magic-string": "^0.30.5", "pathe": "^1.1.1", "pretty-format": "^29.7.0" } }, "sha512-WvidQuWAzU2p95u8GAKlRMqMyN1yOJkGHnx3M1PL9Raf7AQ1kwLKg04ADlCa3+OXUZE7BceOhVZiuWAbzCKcUQ=="],
"@vitest/spy": ["@vitest/spy@1.6.1", "", { "dependencies": { "tinyspy": "^2.2.0" } }, "sha512-MGcMmpGkZebsMZhbQKkAf9CX5zGvjkBTqf8Zx3ApYWXr3wG+QvEu2eXWfnIIWYSJExIp4V9FCKDEeygzkYrXMw=="],
"@vitest/utils": ["@vitest/utils@1.6.1", "", { "dependencies": { "diff-sequences": "^29.6.3", "estree-walker": "^3.0.3", "loupe": "^2.3.7", "pretty-format": "^29.7.0" } }, "sha512-jOrrUvXM4Av9ZWiG1EajNto0u96kWAhJ1LmPmJhXXQx/32MecEKd10pOLYgS2BQx1TgkGhloPU1ArDW2vvaY6g=="],
"acorn": ["acorn@8.15.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg=="],
"acorn-walk": ["acorn-walk@8.3.4", "", { "dependencies": { "acorn": "^8.11.0" } }, "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g=="],
"ansi-styles": ["ansi-styles@5.2.0", "", {}, "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="],
"assertion-error": ["assertion-error@1.1.0", "", {}, "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw=="],
"cac": ["cac@6.7.14", "", {}, "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ=="],
"chai": ["chai@4.5.0", "", { "dependencies": { "assertion-error": "^1.1.0", "check-error": "^1.0.3", "deep-eql": "^4.1.3", "get-func-name": "^2.0.2", "loupe": "^2.3.6", "pathval": "^1.1.1", "type-detect": "^4.1.0" } }, "sha512-RITGBfijLkBddZvnn8jdqoTypxvqbOLYQkGGxXzeFjVHvudaPw0HNFD9x928/eUwYWd2dPCugVqspGALTZZQKw=="],
"check-error": ["check-error@1.0.3", "", { "dependencies": { "get-func-name": "^2.0.2" } }, "sha512-iKEoDYaRmd1mxM90a2OEfWhjsjPpYPuQ+lMYsoxB126+t8fw7ySEO48nmDg5COTjxDI65/Y2OWpeEHk3ZOe8zg=="],
"confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="],
"cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],
"debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
"deep-eql": ["deep-eql@4.1.4", "", { "dependencies": { "type-detect": "^4.0.0" } }, "sha512-SUwdGfqdKOwxCPeVYjwSyRpJ7Z+fhpwIAtmCUdZIWZ/YP5R9WAsyuSgpLVDi9bjWoN2LXHNss/dk3urXtdQxGg=="],
"diff-sequences": ["diff-sequences@29.6.3", "", {}, "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q=="],
"esbuild": ["esbuild@0.21.5", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.21.5", "@esbuild/android-arm": "0.21.5", "@esbuild/android-arm64": "0.21.5", "@esbuild/android-x64": "0.21.5", "@esbuild/darwin-arm64": "0.21.5", "@esbuild/darwin-x64": "0.21.5", "@esbuild/freebsd-arm64": "0.21.5", "@esbuild/freebsd-x64": "0.21.5", "@esbuild/linux-arm": "0.21.5", "@esbuild/linux-arm64": "0.21.5", "@esbuild/linux-ia32": "0.21.5", "@esbuild/linux-loong64": "0.21.5", "@esbuild/linux-mips64el": "0.21.5", "@esbuild/linux-ppc64": "0.21.5", "@esbuild/linux-riscv64": "0.21.5", "@esbuild/linux-s390x": "0.21.5", "@esbuild/linux-x64": "0.21.5", "@esbuild/netbsd-x64": "0.21.5", "@esbuild/openbsd-x64": "0.21.5", "@esbuild/sunos-x64": "0.21.5", "@esbuild/win32-arm64": "0.21.5", "@esbuild/win32-ia32": "0.21.5", "@esbuild/win32-x64": "0.21.5" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw=="],
"estree-walker": ["estree-walker@3.0.3", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="],
"execa": ["execa@8.0.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^8.0.1", "human-signals": "^5.0.0", "is-stream": "^3.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^5.1.0", "onetime": "^6.0.0", "signal-exit": "^4.1.0", "strip-final-newline": "^3.0.0" } }, "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg=="],
"fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="],
"get-func-name": ["get-func-name@2.0.2", "", {}, "sha512-8vXOvuE167CtIc3OyItco7N/dpRtBbYOsPsXCz7X/PMnlGjYjSGuZJgM1Y7mmew7BKf9BqvLX2tnOVy1BBUsxQ=="],
"get-stream": ["get-stream@8.0.1", "", {}, "sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA=="],
"human-signals": ["human-signals@5.0.0", "", {}, "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ=="],
"is-stream": ["is-stream@3.0.0", "", {}, "sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA=="],
"isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="],
"js-tokens": ["js-tokens@9.0.1", "", {}, "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ=="],
"local-pkg": ["local-pkg@0.5.1", "", { "dependencies": { "mlly": "^1.7.3", "pkg-types": "^1.2.1" } }, "sha512-9rrA30MRRP3gBD3HTGnC6cDFpaE1kVDWxWgqWJUN0RvDNAo+Nz/9GxB+nHOH0ifbVFy0hSA1V6vFDvnx54lTEQ=="],
"loupe": ["loupe@2.3.7", "", { "dependencies": { "get-func-name": "^2.0.1" } }, "sha512-zSMINGVYkdpYSOBmLi0D1Uo7JU9nVdQKrHxC8eYlV+9YKK9WePqAlL7lSlorG/U2Fw1w0hTBmaa/jrQ3UbPHtA=="],
"magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="],
"merge-stream": ["merge-stream@2.0.0", "", {}, "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="],
"mimic-fn": ["mimic-fn@4.0.0", "", {}, "sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw=="],
"mlly": ["mlly@1.8.0", "", { "dependencies": { "acorn": "^8.15.0", "pathe": "^2.0.3", "pkg-types": "^1.3.1", "ufo": "^1.6.1" } }, "sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g=="],
"ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
"npm-run-path": ["npm-run-path@5.3.0", "", { "dependencies": { "path-key": "^4.0.0" } }, "sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ=="],
"onetime": ["onetime@6.0.0", "", { "dependencies": { "mimic-fn": "^4.0.0" } }, "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ=="],
"p-limit": ["p-limit@5.0.0", "", { "dependencies": { "yocto-queue": "^1.0.0" } }, "sha512-/Eaoq+QyLSiXQ4lyYV23f14mZRQcXnxfHrN0vCai+ak9G0pp9iEQukIIZq5NccEvwRB8PUnZT0KsOoDCINS1qQ=="],
"path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="],
"pathe": ["pathe@1.1.2", "", {}, "sha512-whLdWMYL2TwI08hn8/ZqAbrVemu0LNaNNJZX73O6qaIdCTfXutsLhMkjdENX0qhsQ9uIimo4/aQOmXkoon2nDQ=="],
"pathval": ["pathval@1.1.1", "", {}, "sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ=="],
"picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="],
"pkg-types": ["pkg-types@1.3.1", "", { "dependencies": { "confbox": "^0.1.8", "mlly": "^1.7.4", "pathe": "^2.0.1" } }, "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ=="],
"postcss": ["postcss@8.5.6", "", { "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", "source-map-js": "^1.2.1" } }, "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg=="],
"pretty-format": ["pretty-format@29.7.0", "", { "dependencies": { "@jest/schemas": "^29.6.3", "ansi-styles": "^5.0.0", "react-is": "^18.0.0" } }, "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ=="],
"react-is": ["react-is@18.3.1", "", {}, "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg=="],
"rollup": ["rollup@4.52.5", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.52.5", "@rollup/rollup-android-arm64": "4.52.5", "@rollup/rollup-darwin-arm64": "4.52.5", "@rollup/rollup-darwin-x64": "4.52.5", "@rollup/rollup-freebsd-arm64": "4.52.5", "@rollup/rollup-freebsd-x64": "4.52.5", "@rollup/rollup-linux-arm-gnueabihf": "4.52.5", "@rollup/rollup-linux-arm-musleabihf": "4.52.5", "@rollup/rollup-linux-arm64-gnu": "4.52.5", "@rollup/rollup-linux-arm64-musl": "4.52.5", "@rollup/rollup-linux-loong64-gnu": "4.52.5", "@rollup/rollup-linux-ppc64-gnu": "4.52.5", "@rollup/rollup-linux-riscv64-gnu": "4.52.5", "@rollup/rollup-linux-riscv64-musl": "4.52.5", "@rollup/rollup-linux-s390x-gnu": "4.52.5", "@rollup/rollup-linux-x64-gnu": "4.52.5", "@rollup/rollup-linux-x64-musl": "4.52.5", "@rollup/rollup-openharmony-arm64": "4.52.5", "@rollup/rollup-win32-arm64-msvc": "4.52.5", "@rollup/rollup-win32-ia32-msvc": "4.52.5", "@rollup/rollup-win32-x64-gnu": "4.52.5", "@rollup/rollup-win32-x64-msvc": "4.52.5", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-3GuObel8h7Kqdjt0gxkEzaifHTqLVW56Y/bjN7PSQtkKr0w3V/QYSdt6QWYtd7A1xUtYQigtdUfgj1RvWVtorw=="],
"shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="],
"shebang-regex": ["shebang-regex@3.0.0", "", {}, "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="],
"siginfo": ["siginfo@2.0.0", "", {}, "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g=="],
"signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
"source-map-js": ["source-map-js@1.2.1", "", {}, "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="],
"stackback": ["stackback@0.0.2", "", {}, "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw=="],
"std-env": ["std-env@3.10.0", "", {}, "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg=="],
"strip-final-newline": ["strip-final-newline@3.0.0", "", {}, "sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw=="],
"strip-literal": ["strip-literal@2.1.1", "", { "dependencies": { "js-tokens": "^9.0.1" } }, "sha512-631UJ6O00eNGfMiWG78ck80dfBab8X6IVFB51jZK5Icd7XAs60Z5y7QdSd/wGIklnWvRbUNloVzhOKKmutxQ6Q=="],
"tinybench": ["tinybench@2.9.0", "", {}, "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg=="],
"tinypool": ["tinypool@0.8.4", "", {}, "sha512-i11VH5gS6IFeLY3gMBQ00/MmLncVP7JLXOw1vlgkytLmJK7QnEr7NXf0LBdxfmNPAeyetukOk0bOYrJrFGjYJQ=="],
"tinyspy": ["tinyspy@2.2.1", "", {}, "sha512-KYad6Vy5VDWV4GH3fjpseMQ/XU2BhIYP7Vzd0LG44qRWm/Yt2WCOTicFdvmgo6gWaqooMQCawTtILVQJupKu7A=="],
"type-detect": ["type-detect@4.1.0", "", {}, "sha512-Acylog8/luQ8L7il+geoSxhEkazvkslg7PSNKOX59mbB9cOveP5aq9h74Y7YU8yDpJwetzQQrfIwtf4Wp4LKcw=="],
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
"ufo": ["ufo@1.6.1", "", {}, "sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA=="],
"undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
"vite": ["vite@5.4.21", "", { "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", "rollup": "^4.20.0" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || >=20.0.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.4.0" }, "optionalPeers": ["@types/node", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser"], "bin": { "vite": "bin/vite.js" } }, "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw=="],
"vite-node": ["vite-node@1.6.1", "", { "dependencies": { "cac": "^6.7.14", "debug": "^4.3.4", "pathe": "^1.1.1", "picocolors": "^1.0.0", "vite": "^5.0.0" }, "bin": { "vite-node": "vite-node.mjs" } }, "sha512-YAXkfvGtuTzwWbDSACdJSg4A4DZiAqckWe90Zapc/sEX3XvHcw1NdurM/6od8J207tSDqNbSsgdCacBgvJKFuA=="],
"vitest": ["vitest@1.6.1", "", { "dependencies": { "@vitest/expect": "1.6.1", "@vitest/runner": "1.6.1", "@vitest/snapshot": "1.6.1", "@vitest/spy": "1.6.1", "@vitest/utils": "1.6.1", "acorn-walk": "^8.3.2", "chai": "^4.3.10", "debug": "^4.3.4", "execa": "^8.0.1", "local-pkg": "^0.5.0", "magic-string": "^0.30.5", "pathe": "^1.1.1", "picocolors": "^1.0.0", "std-env": "^3.5.0", "strip-literal": "^2.0.0", "tinybench": "^2.5.1", "tinypool": "^0.8.3", "vite": "^5.0.0", "vite-node": "1.6.1", "why-is-node-running": "^2.2.2" }, "peerDependencies": { "@edge-runtime/vm": "*", "@types/node": "^18.0.0 || >=20.0.0", "@vitest/browser": "1.6.1", "@vitest/ui": "1.6.1", "happy-dom": "*", "jsdom": "*" }, "optionalPeers": ["@edge-runtime/vm", "@types/node", "@vitest/browser", "@vitest/ui", "happy-dom", "jsdom"], "bin": { "vitest": "vitest.mjs" } }, "sha512-Ljb1cnSJSivGN0LqXd/zmDbWEM0RNNg2t1QW/XUhYl/qPqyu7CsqeWtqQXHVaJsecLPuDoak2oJcZN2QoRIOag=="],
"which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
"why-is-node-running": ["why-is-node-running@2.3.0", "", { "dependencies": { "siginfo": "^2.0.0", "stackback": "0.0.2" }, "bin": { "why-is-node-running": "cli.js" } }, "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w=="],
"yocto-queue": ["yocto-queue@1.2.1", "", {}, "sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg=="],
"mlly/pathe": ["pathe@2.0.3", "", {}, "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w=="],
"npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="],
"pkg-types/pathe": ["pathe@2.0.3", "", {}, "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w=="],
}
}
-17
View File
@@ -1,17 +0,0 @@
{
"name": "basango-monorepo",
"private": true,
"version": "0.1.0",
"workspaces": [
"apps/*"
],
"scripts": {
"build": "tsc -b",
"test": "vitest --run"
},
"devDependencies": {
"typescript": "^5.4.0",
"vitest": "^1.6.0",
"@types/node": "^20.11.30"
}
}
+5 -5
View File
@@ -1,4 +1,4 @@
# This file is the entry point to configure your own services.
# This file is the entry point to configure your own process.
# Files in the packages/ subdirectory configure your dependencies.
# Put parameters here that don't need to change on each machine where the app is deployed
@@ -7,10 +7,10 @@ parameters:
basango_notification_email: '%env(BASANGO_NOTIFICATION_EMAIL)%'
services:
# default configuration for services in *this* file
# default configuration for process in *this* file
_defaults:
autowire: true # Automatically injects dependencies in your services.
autoconfigure: true # Automatically registers your services as commands, event subscribers, etc.
autowire: true # Automatically injects dependencies in your process.
autoconfigure: true # Automatically registers your process as commands, event subscribers, etc.
bind:
$projectDir: '%kernel.project_dir%'
$crawlingNotificationEmail: '%basango_notification_email%'
@@ -42,7 +42,7 @@ services:
tags:
- { name: monolog.formatter }
# makes classes in src/ available to be used as services
# makes classes in src/ available to be used as process
# this creates a service per class whose id is the fully-qualified class name
Basango\:
resource: '../src/'
@@ -2,7 +2,7 @@ from .date_parser import DateParser
from .http_client import BaseHttpClient, SyncHttpClient, AsyncHttpClient
from .open_graph import OpenGraphProvider
from .persistence import BasePersistor, CsvPersistor, JsonPersistor
from .user_agents import UserAgentProvider
from .user_agents import UserAgents
from .tokenizer import Tokenizer
HttpClient = SyncHttpClient
@@ -14,7 +14,7 @@ __all__ = [
"AsyncHttpClient",
"HttpClient",
"OpenGraphProvider",
"UserAgentProvider",
"UserAgents",
"BasePersistor",
"CsvPersistor",
"JsonPersistor",
@@ -8,7 +8,7 @@ from typing import Any, Optional, TypeAlias
import httpx
from basango.core.config import ClientConfig
from basango.services.user_agents import UserAgentProvider
from basango.services.user_agents import UserAgents
HttpHeaders: TypeAlias = dict[str, str] | None
HttpParams: TypeAlias = dict[str, Any] | None
@@ -20,13 +20,13 @@ TRANSIENT_STATUSES = (429, 500, 502, 503, 504)
@dataclass
class BaseHttpClient(ABC):
client_config: ClientConfig
user_agent_provider: UserAgentProvider | None = None
user_agent_provider: UserAgents | None = None
default_headers: HttpHeaders = None
_user_agent: str = field(init=False, repr=False)
_headers: dict[str, str] = field(init=False, repr=False)
def __post_init__(self) -> None:
provider = self.user_agent_provider or UserAgentProvider(
provider = self.user_agent_provider or UserAgents(
rotate=self.client_config.rotate,
fallback=self.client_config.user_agent,
)
@@ -6,7 +6,7 @@ import trafilatura
from basango.core.config import ClientConfig
from basango.services.http_client import SyncHttpClient
from basango.services.user_agents import UserAgentProvider
from basango.services.user_agents import UserAgents
@dataclass
@@ -19,7 +19,7 @@ class OpenGraphObject:
class OpenGraphProvider:
def __init__(
self, user_agent_provider: UserAgentProvider = UserAgentProvider(rotate=False)
self, user_agent_provider: UserAgents = UserAgents(rotate=False)
) -> None:
self._user_agent = user_agent_provider.og()
self._http_client = SyncHttpClient(
@@ -3,7 +3,7 @@ from dataclasses import dataclass
@dataclass
class UserAgentProvider:
class UserAgents:
USER_AGENTS = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5",
"Mozilla/5.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7",
-15
View File
@@ -1,15 +0,0 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "ESNext",
"moduleResolution": "Node",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"skipLibCheck": true,
"types": ["node"],
"resolveJsonModule": true,
"declaration": true,
"outDir": "dist"
}
}
-7
View File
@@ -1,7 +0,0 @@
{
"extends": "./tsconfig.base.json",
"files": [],
"references": [
{ "path": "./apps/crawler" }
]
}