refactor: centralize crawler schemas and json config

This commit is contained in:
Bernard Ngandu
2025-10-28 06:42:45 +02:00
parent 799cda6e06
commit eae1ede15f
19 changed files with 1052 additions and 0 deletions
+38
View File
@@ -0,0 +1,38 @@
import fs from "node:fs";
import type { RedisOptions } from "ioredis";
import { get_encoding } from "tiktoken";
import type { ProjectPaths } from "./schema";
export const ensureDirectories = (paths: ProjectPaths): void => {
for (const dir of [paths.data, paths.logs, paths.configs]) {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
}
};
export const parseRedisUrl = (url: string): RedisOptions => {
if (!url.startsWith("redis://")) {
return {};
}
const parsed = new URL(url);
return {
host: parsed.hostname,
port: Number(parsed.port || 6379),
password: parsed.password || undefined,
db: Number(parsed.pathname?.replace("/", "") || 0),
};
};
export const countTokens = (text: string, encoding = "cl100k_base"): number => {
try {
const encoder = get_encoding(encoding);
const tokens = encoder.encode(text);
encoder.free();
return tokens.length;
} catch {
return text.length;
}
};