fix(app): remove legacy and scoped namespace alias
This commit is contained in:
@@ -9,7 +9,7 @@ import {
|
||||
PageRangeSchema,
|
||||
UpdateDirectionSchema,
|
||||
WordPressSourceConfigSchema,
|
||||
} from "@/schema";
|
||||
} from "#crawler/schema";
|
||||
|
||||
export const PROJECT_DIR = path.resolve(__dirname, "../");
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { setTimeout as delay } from "node:timers/promises";
|
||||
|
||||
import { FetchClientConfig } from "@/config";
|
||||
import { FetchClientConfig } from "#crawler/config";
|
||||
import {
|
||||
DEFAULT_RETRY_AFTER_HEADER,
|
||||
DEFAULT_USER_AGENT,
|
||||
TRANSIENT_HTTP_STATUSES,
|
||||
} from "@/constants";
|
||||
import { UserAgents } from "@/http/user-agent";
|
||||
} from "#crawler/constants";
|
||||
import { UserAgents } from "#crawler/http/user-agent";
|
||||
|
||||
export type HttpHeaders = Record<string, string>;
|
||||
export type HttpParams = Record<string, string | number | boolean | null | undefined>;
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { parse } from "node-html-parser";
|
||||
|
||||
import { config } from "@/config";
|
||||
import { OPEN_GRAPH_USER_AGENT } from "@/constants";
|
||||
import { SyncHttpClient } from "@/http/http-client";
|
||||
import { UserAgents } from "@/http/user-agent";
|
||||
import { ArticleMetadata } from "@/schema";
|
||||
import { config } from "#crawler/config";
|
||||
import { OPEN_GRAPH_USER_AGENT } from "#crawler/constants";
|
||||
import { SyncHttpClient } from "#crawler/http/http-client";
|
||||
import { UserAgents } from "#crawler/http/user-agent";
|
||||
import { ArticleMetadata } from "#crawler/schema";
|
||||
|
||||
/**
|
||||
* Picks the first non-empty value from the provided array.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { DEFAULT_USER_AGENT, OPEN_GRAPH_USER_AGENT } from "@/constants";
|
||||
import { DEFAULT_USER_AGENT, OPEN_GRAPH_USER_AGENT } from "#crawler/constants";
|
||||
|
||||
/**
|
||||
* User agent provider with optional rotation.
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import { config, env } from "@/config";
|
||||
import { UnsupportedSourceKindError } from "@/errors";
|
||||
import { SyncHttpClient } from "@/http/http-client";
|
||||
import { QueueManager, createQueueManager } from "@/process/async/queue";
|
||||
import { config, env } from "#crawler/config";
|
||||
import { UnsupportedSourceKindError } from "#crawler/errors";
|
||||
import { SyncHttpClient } from "#crawler/http/http-client";
|
||||
import { QueueManager, createQueueManager } from "#crawler/process/async/queue";
|
||||
import {
|
||||
DetailsTaskPayload,
|
||||
ListingTaskPayload,
|
||||
ProcessingTaskPayload,
|
||||
} from "@/process/async/schemas";
|
||||
import { createPersistors, resolveCrawlerConfig } from "@/process/crawler";
|
||||
import { HtmlCrawler } from "@/process/parsers/html";
|
||||
import { WordPressCrawler } from "@/process/parsers/wordpress";
|
||||
import { Article, HtmlSourceConfig, SourceKindSchema, WordPressSourceConfig } from "@/schema";
|
||||
import { createDateRange, formatDateRange, formatPageRange, resolveSourceConfig } from "@/utils";
|
||||
} from "#crawler/process/async/schemas";
|
||||
import { createPersistors, resolveCrawlerConfig } from "#crawler/process/crawler";
|
||||
import { HtmlCrawler } from "#crawler/process/parsers/html";
|
||||
import { WordPressCrawler } from "#crawler/process/parsers/wordpress";
|
||||
import { Article, HtmlSourceConfig, WordPressSourceConfig } from "#crawler/schema";
|
||||
import {
|
||||
createDateRange,
|
||||
formatDateRange,
|
||||
formatPageRange,
|
||||
resolveSourceConfig,
|
||||
} from "#crawler/utils";
|
||||
|
||||
export const collectHtmlListing = async (
|
||||
payload: ListingTaskPayload,
|
||||
@@ -107,7 +112,7 @@ export const collectArticle = async (
|
||||
});
|
||||
const persistors = createPersistors(source);
|
||||
|
||||
if (source.sourceKind === SourceKindSchema.enum.html) {
|
||||
if (source.sourceKind === "html") {
|
||||
const crawler = new HtmlCrawler(settings, { persistors });
|
||||
const html = await crawler.crawl(payload.url);
|
||||
|
||||
@@ -118,7 +123,7 @@ export const collectArticle = async (
|
||||
} as ProcessingTaskPayload);
|
||||
}
|
||||
|
||||
if (source.sourceKind === SourceKindSchema.enum.wordpress) {
|
||||
if (source.sourceKind === "wordpress") {
|
||||
const crawler = new WordPressCrawler(settings, { persistors });
|
||||
|
||||
const article = await crawler.fetchOne(payload.data ?? {}, settings.dateRange);
|
||||
@@ -134,11 +139,24 @@ export const collectArticle = async (
|
||||
export const forwardForProcessing = async (payload: ProcessingTaskPayload): Promise<Article> => {
|
||||
logger.info({ article: payload.article.title }, "Ready for downstream processing");
|
||||
|
||||
const client = new SyncHttpClient(config.fetch.client);
|
||||
const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT");
|
||||
try {
|
||||
logger.info({ article: payload.article.title }, "Forwarding article to API");
|
||||
|
||||
await client.post(endpoint, { json: payload.article });
|
||||
logger.info({ article: payload.article.title }, "Forwarded article to API");
|
||||
const client = new SyncHttpClient(config.fetch.client);
|
||||
const response = await client.post(env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT"), {
|
||||
headers: {
|
||||
Authorization: `${env("BASANGO_CRAWLER_TOKEN")}`,
|
||||
},
|
||||
json: payload.article,
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
logger.info({ ...data }, "Article successfully forwarded to API");
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Failed to forward article to API");
|
||||
}
|
||||
|
||||
return payload.article;
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@ import { randomUUID } from "node:crypto";
|
||||
import { JobsOptions, Queue, QueueOptions } from "bullmq";
|
||||
import IORedis from "ioredis";
|
||||
|
||||
import { FetchAsyncConfig, config } from "@/config";
|
||||
import { FetchAsyncConfig, config } from "#crawler/config";
|
||||
import {
|
||||
DetailsTaskPayload,
|
||||
DetailsTaskPayloadSchema,
|
||||
@@ -11,8 +11,8 @@ import {
|
||||
ListingTaskPayloadSchema,
|
||||
ProcessingTaskPayload,
|
||||
ProcessingTaskPayloadSchema,
|
||||
} from "@/process/async/schemas";
|
||||
import { parseRedisUrl } from "@/utils";
|
||||
} from "#crawler/process/async/schemas";
|
||||
import { parseRedisUrl } from "#crawler/utils";
|
||||
|
||||
export interface QueueBackend<T = unknown> {
|
||||
add: (name: string, data: T, opts?: JobsOptions) => Promise<{ id: string }>;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { z } from "zod";
|
||||
|
||||
import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "@/schema";
|
||||
import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "#crawler/schema";
|
||||
|
||||
export const ListingTaskPayloadSchema = z.object({
|
||||
category: z.string().optional(),
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import * as handlers from "@/process/async/handlers";
|
||||
import { createQueueManager } from "@/process/async/queue";
|
||||
import * as handlers from "#crawler/process/async/handlers";
|
||||
import { createQueueManager } from "#crawler/process/async/queue";
|
||||
import {
|
||||
DetailsTaskPayloadSchema,
|
||||
ListingTaskPayloadSchema,
|
||||
ProcessingTaskPayloadSchema,
|
||||
} from "@/process/async/schemas";
|
||||
import { CrawlingOptions } from "@/process/crawler";
|
||||
} from "#crawler/process/async/schemas";
|
||||
import { CrawlingOptions } from "#crawler/process/crawler";
|
||||
|
||||
export const collectListing = async (payload: unknown): Promise<number> => {
|
||||
const data = ListingTaskPayloadSchema.parse(payload);
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { QueueEvents, Worker } from "bullmq";
|
||||
import IORedis from "ioredis";
|
||||
|
||||
import { QueueFactory, QueueManager } from "@/process/async/queue";
|
||||
import { collectArticle, collectListing, forwardForProcessing } from "@/process/async/tasks";
|
||||
import { QueueFactory, QueueManager } from "#crawler/process/async/queue";
|
||||
import { collectArticle, collectListing, forwardForProcessing } from "#crawler/process/async/tasks";
|
||||
|
||||
export interface WorkerOptions {
|
||||
queueNames?: string[];
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import logger from "@basango/logger";
|
||||
|
||||
import { FetchCrawlerConfig, config } from "@/config";
|
||||
import { JsonlPersistor, Persistor } from "@/process/persistence";
|
||||
import { AnySourceConfig } from "@/schema";
|
||||
import { createDateRange, createPageRange } from "@/utils";
|
||||
import { FetchCrawlerConfig, config } from "#crawler/config";
|
||||
import { JsonlPersistor, Persistor } from "#crawler/process/persistence";
|
||||
import { AnySourceConfig } from "#crawler/schema";
|
||||
import { createDateRange, createPageRange } from "#crawler/utils";
|
||||
|
||||
export interface CrawlingOptions {
|
||||
sourceId: string;
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { HTMLElement, parse as parseHtml } from "node-html-parser";
|
||||
|
||||
import { FetchCrawlerConfig, config } from "@/config";
|
||||
import { SyncHttpClient } from "@/http/http-client";
|
||||
import { OpenGraph } from "@/http/open-graph";
|
||||
import type { Persistor } from "@/process/persistence";
|
||||
import { AnySourceConfig, Article } from "@/schema";
|
||||
import { FetchCrawlerConfig, config } from "#crawler/config";
|
||||
import { SyncHttpClient } from "#crawler/http/http-client";
|
||||
import { OpenGraph } from "#crawler/http/open-graph";
|
||||
import type { Persistor } from "#crawler/process/persistence";
|
||||
import { AnySourceConfig, Article } from "#crawler/schema";
|
||||
|
||||
export interface CrawlerOptions {
|
||||
persistors?: Persistor[];
|
||||
|
||||
@@ -3,17 +3,17 @@ import { getUnixTime, isMatch as isDateMatch, parse as parseDateFns } from "date
|
||||
import { HTMLElement } from "node-html-parser";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
import { FetchCrawlerConfig } from "@/config";
|
||||
import { FetchCrawlerConfig } from "#crawler/config";
|
||||
import {
|
||||
ArticleOutOfDateRangeError,
|
||||
InvalidArticleError,
|
||||
InvalidSourceSelectorsError,
|
||||
UnsupportedSourceKindError,
|
||||
} from "@/errors";
|
||||
import { BaseCrawler } from "@/process/parsers/base";
|
||||
import { Persistor, persist } from "@/process/persistence";
|
||||
import { Article, DateRange, HtmlSourceConfig } from "@/schema";
|
||||
import { createAbsoluteUrl, isTimestampInRange } from "@/utils";
|
||||
} from "#crawler/errors";
|
||||
import { BaseCrawler } from "#crawler/process/parsers/base";
|
||||
import { Persistor, persist } from "#crawler/process/persistence";
|
||||
import { Article, DateRange, HtmlSourceConfig } from "#crawler/schema";
|
||||
import { createAbsoluteUrl, isTimestampInRange } from "#crawler/utils";
|
||||
|
||||
const md = new TurndownService({
|
||||
bulletListMarker: "-",
|
||||
@@ -148,8 +148,8 @@ export class HtmlCrawler extends BaseCrawler {
|
||||
body,
|
||||
categories,
|
||||
link,
|
||||
source: this.source.sourceId,
|
||||
timestamp,
|
||||
publishedAt: new Date(timestamp * 1000),
|
||||
sourceId: this.source.sourceId,
|
||||
title,
|
||||
},
|
||||
link,
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import { logger } from "@basango/logger";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
import { FetchCrawlerConfig } from "@/config";
|
||||
import { FetchCrawlerConfig } from "#crawler/config";
|
||||
import {
|
||||
ArticleOutOfDateRangeError,
|
||||
InvalidArticleError,
|
||||
UnsupportedSourceKindError,
|
||||
} from "@/errors";
|
||||
import { BaseCrawler } from "@/process/parsers/base";
|
||||
import { Persistor, persist } from "@/process/persistence";
|
||||
import { Article, DateRange, PageRange, WordPressSourceConfig } from "@/schema";
|
||||
import { isTimestampInRange } from "@/utils";
|
||||
} from "#crawler/errors";
|
||||
import { BaseCrawler } from "#crawler/process/parsers/base";
|
||||
import { Persistor, persist } from "#crawler/process/persistence";
|
||||
import { Article, DateRange, PageRange, WordPressSourceConfig } from "#crawler/schema";
|
||||
import { isTimestampInRange } from "#crawler/utils";
|
||||
|
||||
const md = new TurndownService({
|
||||
bulletListMarker: "-",
|
||||
@@ -148,8 +148,8 @@ export class WordPressCrawler extends BaseCrawler {
|
||||
body,
|
||||
categories,
|
||||
link,
|
||||
source: this.source.sourceId,
|
||||
timestamp,
|
||||
publishedAt: new Date(timestamp * 1000),
|
||||
sourceId: this.source.sourceId,
|
||||
title,
|
||||
},
|
||||
link,
|
||||
|
||||
@@ -4,8 +4,7 @@ import path from "node:path";
|
||||
import { md5 } from "@basango/encryption";
|
||||
import logger from "@basango/logger";
|
||||
|
||||
import { Article } from "@/schema";
|
||||
import { countTokens } from "@/utils";
|
||||
import { Article } from "#crawler/schema";
|
||||
|
||||
export interface Persistor {
|
||||
persist(record: Article): Promise<void> | void;
|
||||
@@ -47,12 +46,6 @@ export const persist = async (payload: Article, persistors: Persistor[]): Promis
|
||||
const article = {
|
||||
...data,
|
||||
hash: md5(data.link),
|
||||
tokenStatistics: {
|
||||
body: countTokens(data.body),
|
||||
categories: countTokens(data.categories.join(",")),
|
||||
excerpt: countTokens(data.body.substring(0, 200)),
|
||||
title: countTokens(data.title),
|
||||
},
|
||||
} as Article;
|
||||
|
||||
for (const persistor of persistors) {
|
||||
|
||||
@@ -5,10 +5,10 @@ import {
|
||||
closePersistors,
|
||||
createPersistors,
|
||||
resolveCrawlerConfig,
|
||||
} from "@/process/crawler";
|
||||
import { HtmlCrawler } from "@/process/parsers/html";
|
||||
import { WordPressCrawler } from "@/process/parsers/wordpress";
|
||||
import { resolveSourceConfig } from "@/utils";
|
||||
} from "#crawler/process/crawler";
|
||||
import { HtmlCrawler } from "#crawler/process/parsers/html";
|
||||
import { WordPressCrawler } from "#crawler/process/parsers/wordpress";
|
||||
import { resolveSourceConfig } from "#crawler/utils";
|
||||
|
||||
export const runSyncCrawl = async (options: CrawlingOptions): Promise<void> => {
|
||||
const source = resolveSourceConfig(options.sourceId);
|
||||
|
||||
@@ -112,8 +112,8 @@ export const ArticleSchema = z.object({
|
||||
hash: z.string().optional(),
|
||||
link: z.url(),
|
||||
metadata: ArticleMetadataSchema.optional(),
|
||||
source: z.string(),
|
||||
timestamp: z.number().int(),
|
||||
publishedAt: z.date(),
|
||||
sourceId: z.string(),
|
||||
title: z.string(),
|
||||
tokenStatistics: ArticleTokenStatisticsSchema.optional(),
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import { runSyncCrawl } from "@/process/sync/tasks";
|
||||
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
|
||||
import { runSyncCrawl } from "#crawler/process/sync/tasks";
|
||||
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "#crawler/scripts/utils";
|
||||
|
||||
const main = async (): Promise<void> => {
|
||||
const options = parseCrawlingCliArgs();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import { scheduleAsyncCrawl } from "@/process/async/tasks";
|
||||
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
|
||||
import { scheduleAsyncCrawl } from "#crawler/process/async/tasks";
|
||||
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "#crawler/scripts/utils";
|
||||
|
||||
const main = async (): Promise<void> => {
|
||||
const options = parseCrawlingCliArgs();
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { createInterface } from "node:readline";
|
||||
import { parseArgs } from "node:util";
|
||||
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import { config, env } from "#crawler/config";
|
||||
import { SyncHttpClient } from "#crawler/http/http-client";
|
||||
import type { Article } from "#crawler/schema";
|
||||
|
||||
const USAGE = `
|
||||
Usage: bun run crawler:sync -- --sourceId <id>
|
||||
`;
|
||||
|
||||
const parseCliArgs = (): { sourceId?: string } => {
|
||||
const { values } = parseArgs({
|
||||
options: {
|
||||
sourceId: { type: "string" },
|
||||
},
|
||||
});
|
||||
return values as { sourceId?: string };
|
||||
};
|
||||
|
||||
const forwardArticle = async (article: Article): Promise<void> => {
|
||||
const client = new SyncHttpClient(config.fetch.client);
|
||||
const endpoint = env("BASANGO_CRAWLER_BACKEND_API_ENDPOINT");
|
||||
const token = env("BASANGO_CRAWLER_TOKEN");
|
||||
|
||||
try {
|
||||
const response = await client.post(endpoint, {
|
||||
headers: {
|
||||
Authorization: `${token}`,
|
||||
},
|
||||
json: article,
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
logger.info({ ...data }, "Article forwarded");
|
||||
return;
|
||||
}
|
||||
|
||||
logger.error({ link: article.link, status: response.status }, "Forwarding failed");
|
||||
} catch (error) {
|
||||
logger.error({ error, link: article.link }, "Failed to forward article");
|
||||
}
|
||||
};
|
||||
|
||||
const main = async (): Promise<void> => {
|
||||
const { sourceId } = parseCliArgs();
|
||||
if (!sourceId) {
|
||||
console.log(USAGE);
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const filePath = path.join(config.paths.data, `${sourceId}.jsonl`);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL not found");
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const stat = fs.statSync(filePath);
|
||||
if (stat.size === 0) {
|
||||
logger.error({ filePath, sourceId }, "Source must be crawled first; JSONL is empty");
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info({ filePath, sourceId }, "Syncing articles from JSONL to backend");
|
||||
|
||||
const stream = fs.createReadStream(filePath, { encoding: "utf-8" });
|
||||
const rl = createInterface({ crlfDelay: Infinity, input: stream });
|
||||
|
||||
let count = 0;
|
||||
try {
|
||||
for await (const raw of rl) {
|
||||
const line = raw.trim();
|
||||
if (!line) continue;
|
||||
|
||||
try {
|
||||
const article = JSON.parse(line) as Article & { publishedAt: string };
|
||||
await forwardArticle({
|
||||
...article,
|
||||
publishedAt: new Date(article.publishedAt),
|
||||
});
|
||||
|
||||
count += 1;
|
||||
} catch (error) {
|
||||
logger.error({ error, linePreview: line.slice(0, 100) }, "Invalid JSONL line");
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
rl.close();
|
||||
}
|
||||
|
||||
logger.info({ forwarded: count, sourceId }, "Sync completed");
|
||||
};
|
||||
|
||||
void main();
|
||||
@@ -1,6 +1,6 @@
|
||||
import { parseArgs } from "node:util";
|
||||
|
||||
import { CrawlingOptions } from "@/process/crawler";
|
||||
import { CrawlingOptions } from "#crawler/process/crawler";
|
||||
|
||||
interface WorkerCliOptions {
|
||||
queue?: string[];
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { logger } from "@basango/logger";
|
||||
|
||||
import { createQueueManager } from "@/process/async/queue";
|
||||
import { startWorker } from "@/process/async/worker";
|
||||
import { parseWorkerCliArgs } from "@/scripts/utils";
|
||||
import { createQueueManager } from "#crawler/process/async/queue";
|
||||
import { startWorker } from "#crawler/process/async/worker";
|
||||
import { parseWorkerCliArgs } from "#crawler/scripts/utils";
|
||||
|
||||
const main = async (): Promise<void> => {
|
||||
const options = parseWorkerCliArgs();
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import { format, getUnixTime, isMatch, parse } from "date-fns";
|
||||
import type { RedisOptions } from "ioredis";
|
||||
import { TiktokenEncoding, get_encoding } from "tiktoken";
|
||||
|
||||
import { config } from "@/config";
|
||||
import { DEFAULT_DATE_FORMAT } from "@/constants";
|
||||
import { config } from "#crawler/config";
|
||||
import { DEFAULT_DATE_FORMAT } from "#crawler/constants";
|
||||
import {
|
||||
AnySourceConfig,
|
||||
CreateDateRangeOptions,
|
||||
@@ -15,7 +14,7 @@ import {
|
||||
PageRangeSchema,
|
||||
PageRangeSpecSchema,
|
||||
WordPressSourceConfig,
|
||||
} from "@/schema";
|
||||
} from "#crawler/schema";
|
||||
|
||||
/**
|
||||
* Resolve a source configuration by its ID.
|
||||
@@ -66,22 +65,6 @@ const parseDate = (value: string, format: string): Date => {
|
||||
return parsed;
|
||||
};
|
||||
|
||||
/**
|
||||
* Count the number of tokens in the given text using the specified encoding.
|
||||
* @param text - The input text
|
||||
* @param encoding - The token encoding (default: "cl100k_base")
|
||||
*/
|
||||
export const countTokens = (text: string, encoding: TiktokenEncoding = "cl100k_base"): number => {
|
||||
try {
|
||||
const encoder = get_encoding(encoding);
|
||||
const tokens = encoder.encode(text);
|
||||
encoder.free();
|
||||
return tokens.length;
|
||||
} catch {
|
||||
return text.length;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a page range from a string specification.
|
||||
* @param spec - The page range specification (e.g., "1:10")
|
||||
|
||||
Reference in New Issue
Block a user