feat(crawler): fix async crawling
This commit is contained in:
Vendored
+2
-2
@@ -9,8 +9,8 @@
|
|||||||
"editor.defaultFormatter": "biomejs.biome"
|
"editor.defaultFormatter": "biomejs.biome"
|
||||||
},
|
},
|
||||||
"editor.codeActionsOnSave": {
|
"editor.codeActionsOnSave": {
|
||||||
"source.organizeImports.biome": "explicit",
|
"source.fixAll.biome": "explicit",
|
||||||
"source.fixAll.biome": "explicit"
|
"source.organizeImports.biome": "explicit"
|
||||||
},
|
},
|
||||||
"editor.defaultFormatter": "biomejs.biome",
|
"editor.defaultFormatter": "biomejs.biome",
|
||||||
"editor.formatOnSave": true,
|
"editor.formatOnSave": true,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import path from "node:path";
|
|||||||
|
|
||||||
import { loadConfig as defineConfig } from "@devscast/config";
|
import { loadConfig as defineConfig } from "@devscast/config";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
DateRangeSchema,
|
DateRangeSchema,
|
||||||
HtmlSourceConfigSchema,
|
HtmlSourceConfigSchema,
|
||||||
|
|||||||
@@ -1,6 +1,29 @@
|
|||||||
|
/**
|
||||||
|
* Default date format used for parsing and formatting dates.
|
||||||
|
* Follows the "yyyy-LL-dd" pattern (e.g., "2024-06-15").
|
||||||
|
*/
|
||||||
export const DEFAULT_DATE_FORMAT = "yyyy-LL-dd";
|
export const DEFAULT_DATE_FORMAT = "yyyy-LL-dd";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default User-Agent string for HTTP requests made by the crawler.
|
||||||
|
* Some websites may block requests with missing or generic User-Agent headers.
|
||||||
|
*/
|
||||||
export const DEFAULT_USER_AGENT = "Basango/0.1 (+https://github.com/bernard-ng/basango)";
|
export const DEFAULT_USER_AGENT = "Basango/0.1 (+https://github.com/bernard-ng/basango)";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User-Agent string used for Open Graph requests.
|
||||||
|
* Some services require a specific User-Agent to return Open Graph data.
|
||||||
|
*/
|
||||||
export const OPEN_GRAPH_USER_AGENT = "facebookexternalhit/1.1";
|
export const OPEN_GRAPH_USER_AGENT = "facebookexternalhit/1.1";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HTTP status codes considered transient errors.
|
||||||
|
* Used for retry logic in HTTP clients.
|
||||||
|
*/
|
||||||
export const TRANSIENT_HTTP_STATUSES = [429, 500, 502, 503, 504];
|
export const TRANSIENT_HTTP_STATUSES = [429, 500, 502, 503, 504];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default header name for Retry-After responses.
|
||||||
|
* Used when handling rate limiting.
|
||||||
|
*/
|
||||||
export const DEFAULT_RETRY_AFTER_HEADER = "retry-after";
|
export const DEFAULT_RETRY_AFTER_HEADER = "retry-after";
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
/**
|
||||||
|
* Error thrown when an article is invalid or cannot be processed.
|
||||||
|
*/
|
||||||
|
export class InvalidArticleError extends Error {
|
||||||
|
constructor(message: string) {
|
||||||
|
super(message);
|
||||||
|
this.name = "InvalidArticleError";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error thrown when a source kind is not supported by the crawler.
|
||||||
|
*/
|
||||||
|
export class UnsupportedSourceKindError extends Error {
|
||||||
|
constructor(message: string) {
|
||||||
|
super(message);
|
||||||
|
this.name = "UnsupportedSourceKindError";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error thrown when a source's selectors are invalid or missing.
|
||||||
|
*/
|
||||||
|
export class InvalidSourceSelectorsError extends Error {
|
||||||
|
constructor(message: string) {
|
||||||
|
super(message);
|
||||||
|
this.name = "InvalidSourceSelectorsError";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error thrown when an article's publication date is outside the specified date range.
|
||||||
|
*/
|
||||||
|
export class ArticleOutOfDateRangeError extends Error {
|
||||||
|
constructor(message: string, _meta: Record<string, unknown>) {
|
||||||
|
super(message);
|
||||||
|
this.name = "ArticleOutOfDateRangeError";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import { setTimeout as delay } from "node:timers/promises";
|
import { setTimeout as delay } from "node:timers/promises";
|
||||||
|
|
||||||
import { FetchClientConfig } from "@/config";
|
import { FetchClientConfig } from "@/config";
|
||||||
import {
|
import {
|
||||||
DEFAULT_RETRY_AFTER_HEADER,
|
DEFAULT_RETRY_AFTER_HEADER,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { parse } from "node-html-parser";
|
import { parse } from "node-html-parser";
|
||||||
|
|
||||||
import { config } from "@/config";
|
import { config } from "@/config";
|
||||||
import { OPEN_GRAPH_USER_AGENT } from "@/constants";
|
import { OPEN_GRAPH_USER_AGENT } from "@/constants";
|
||||||
import { SyncHttpClient } from "@/http/http-client";
|
import { SyncHttpClient } from "@/http/http-client";
|
||||||
|
|||||||
@@ -1,17 +1,17 @@
|
|||||||
import { logger } from "@basango/logger";
|
import { logger } from "@basango/logger";
|
||||||
|
|
||||||
import { config, env } from "@/config";
|
import { config, env } from "@/config";
|
||||||
|
import { UnsupportedSourceKindError } from "@/errors";
|
||||||
import { SyncHttpClient } from "@/http/http-client";
|
import { SyncHttpClient } from "@/http/http-client";
|
||||||
import { createQueueManager, QueueManager } from "@/process/async/queue";
|
import { QueueManager, createQueueManager } from "@/process/async/queue";
|
||||||
import {
|
import {
|
||||||
DetailsTaskPayload,
|
DetailsTaskPayload,
|
||||||
ListingTaskPayload,
|
ListingTaskPayload,
|
||||||
ProcessingTaskPayload,
|
ProcessingTaskPayload,
|
||||||
} from "@/process/async/schemas";
|
} from "@/process/async/schemas";
|
||||||
import { resolveCrawlerConfig } from "@/process/crawler";
|
import { createPersistors, resolveCrawlerConfig } from "@/process/crawler";
|
||||||
import { HtmlCrawler } from "@/process/parsers/html";
|
import { HtmlCrawler } from "@/process/parsers/html";
|
||||||
import { WordPressCrawler } from "@/process/parsers/wordpress";
|
import { WordPressCrawler } from "@/process/parsers/wordpress";
|
||||||
import { JsonlPersistor } from "@/process/persistence";
|
|
||||||
import { Article, HtmlSourceConfig, SourceKindSchema, WordPressSourceConfig } from "@/schema";
|
import { Article, HtmlSourceConfig, SourceKindSchema, WordPressSourceConfig } from "@/schema";
|
||||||
import { createDateRange, formatDateRange, formatPageRange, resolveSourceConfig } from "@/utils";
|
import { createDateRange, formatDateRange, formatPageRange, resolveSourceConfig } from "@/utils";
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ export const collectHtmlListing = async (
|
|||||||
|
|
||||||
let queued = 0;
|
let queued = 0;
|
||||||
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
||||||
const target = crawler.buildPageUrl(page) ?? `${source.sourceUrl}`;
|
const target = crawler.buildEndpointUrl(page) ?? `${source.sourceUrl}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const items = await crawler.fetchLinks(target, source.sourceSelectors.articles);
|
const items = await crawler.fetchLinks(target, source.sourceSelectors.articles);
|
||||||
@@ -69,7 +69,7 @@ export const collectWordPressListing = async (
|
|||||||
|
|
||||||
let queued = 0;
|
let queued = 0;
|
||||||
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
||||||
const url = crawler.postsEndpoint(page);
|
const url = crawler.buildEndpointUrl(page);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const entries = await crawler.fetchLinks(url);
|
const entries = await crawler.fetchLinks(url);
|
||||||
@@ -94,7 +94,10 @@ export const collectWordPressListing = async (
|
|||||||
return queued;
|
return queued;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const collectArticle = async (payload: DetailsTaskPayload): Promise<unknown> => {
|
export const collectArticle = async (
|
||||||
|
payload: DetailsTaskPayload,
|
||||||
|
manager: QueueManager = createQueueManager(),
|
||||||
|
): Promise<unknown> => {
|
||||||
const source = resolveSourceConfig(payload.sourceId);
|
const source = resolveSourceConfig(payload.sourceId);
|
||||||
const settings = resolveCrawlerConfig(source, {
|
const settings = resolveCrawlerConfig(source, {
|
||||||
category: payload.category,
|
category: payload.category,
|
||||||
@@ -102,26 +105,30 @@ export const collectArticle = async (payload: DetailsTaskPayload): Promise<unkno
|
|||||||
pageRange: payload.pageRange ? formatPageRange(payload.pageRange) : undefined,
|
pageRange: payload.pageRange ? formatPageRange(payload.pageRange) : undefined,
|
||||||
sourceId: payload.sourceId,
|
sourceId: payload.sourceId,
|
||||||
});
|
});
|
||||||
const persistors = [
|
const persistors = createPersistors(source);
|
||||||
new JsonlPersistor({
|
|
||||||
directory: config.paths.data,
|
|
||||||
sourceId: String(source.sourceId),
|
|
||||||
}),
|
|
||||||
];
|
|
||||||
|
|
||||||
if (source.sourceKind === SourceKindSchema.enum.html) {
|
if (source.sourceKind === SourceKindSchema.enum.html) {
|
||||||
if (!payload.url) throw new Error("Missing article url");
|
|
||||||
const crawler = new HtmlCrawler(settings, { persistors });
|
const crawler = new HtmlCrawler(settings, { persistors });
|
||||||
const html = await crawler.crawl(payload.url);
|
const html = await crawler.crawl(payload.url);
|
||||||
return await crawler.fetchOne(html, settings.dateRange);
|
|
||||||
|
const article = await crawler.fetchOne(html, settings.dateRange);
|
||||||
|
await manager.enqueueProcessed({
|
||||||
|
article,
|
||||||
|
sourceId: payload.sourceId,
|
||||||
|
} as ProcessingTaskPayload);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (source.sourceKind === SourceKindSchema.enum.wordpress) {
|
if (source.sourceKind === SourceKindSchema.enum.wordpress) {
|
||||||
const crawler = new WordPressCrawler(settings, { persistors });
|
const crawler = new WordPressCrawler(settings, { persistors });
|
||||||
return await crawler.fetchOne(payload.data ?? {}, settings.dateRange);
|
|
||||||
|
const article = await crawler.fetchOne(payload.data ?? {}, settings.dateRange);
|
||||||
|
await manager.enqueueProcessed({
|
||||||
|
article,
|
||||||
|
sourceId: payload.sourceId,
|
||||||
|
} as ProcessingTaskPayload);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`Unsupported source kind`);
|
throw new UnsupportedSourceKindError(`Unsupported source kind`);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const forwardForProcessing = async (payload: ProcessingTaskPayload): Promise<Article> => {
|
export const forwardForProcessing = async (payload: ProcessingTaskPayload): Promise<Article> => {
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
import { randomUUID } from "node:crypto";
|
import { randomUUID } from "node:crypto";
|
||||||
|
|
||||||
import { JobsOptions, Queue, QueueOptions } from "bullmq";
|
import { JobsOptions, Queue, QueueOptions } from "bullmq";
|
||||||
import IORedis from "ioredis";
|
import IORedis from "ioredis";
|
||||||
import { config, FetchAsyncConfig } from "@/config";
|
|
||||||
|
import { FetchAsyncConfig, config } from "@/config";
|
||||||
import {
|
import {
|
||||||
DetailsTaskPayload,
|
DetailsTaskPayload,
|
||||||
DetailsTaskPayloadSchema,
|
DetailsTaskPayloadSchema,
|
||||||
@@ -97,9 +99,9 @@ export const createQueueManager = (options: CreateQueueManagerOptions = {}): Que
|
|||||||
return queue.add("forward_for_processing", data);
|
return queue.add("forward_for_processing", data);
|
||||||
},
|
},
|
||||||
iterQueueNames: () => [
|
iterQueueNames: () => [
|
||||||
`${settings.prefix}:${settings.queues.listing}`,
|
settings.queues.listing,
|
||||||
`${settings.prefix}:${settings.queues.details}`,
|
settings.queues.details,
|
||||||
`${settings.prefix}:${settings.queues.processing}`,
|
settings.queues.processing,
|
||||||
],
|
],
|
||||||
queueName: (suffix: string) => `${settings.prefix}:${suffix}`,
|
queueName: (suffix: string) => `${settings.prefix}:${suffix}`,
|
||||||
settings,
|
settings,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "@/schema";
|
import { ArticleSchema, DateRangeSchema, PageRangeSchema } from "@/schema";
|
||||||
|
|
||||||
export const ListingTaskPayloadSchema = z.object({
|
export const ListingTaskPayloadSchema = z.object({
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { logger } from "@basango/logger";
|
import { logger } from "@basango/logger";
|
||||||
|
|
||||||
import * as handlers from "@/process/async/handlers";
|
import * as handlers from "@/process/async/handlers";
|
||||||
import { createQueueManager } from "@/process/async/queue";
|
import { createQueueManager } from "@/process/async/queue";
|
||||||
import {
|
import {
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ export const startWorker = (options: WorkerOptions): WorkerHandle => {
|
|||||||
{
|
{
|
||||||
concurrency: options.concurrency ?? 5,
|
concurrency: options.concurrency ?? 5,
|
||||||
connection,
|
connection,
|
||||||
|
prefix: manager.settings.prefix,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -53,7 +54,10 @@ export const startWorker = (options: WorkerOptions): WorkerHandle => {
|
|||||||
worker.on("error", (err) => options.onError?.(err as Error));
|
worker.on("error", (err) => options.onError?.(err as Error));
|
||||||
}
|
}
|
||||||
|
|
||||||
const queueEvents = new QueueEvents(queueName, { connection });
|
const queueEvents = new QueueEvents(queueName, {
|
||||||
|
connection,
|
||||||
|
prefix: manager.settings.prefix,
|
||||||
|
});
|
||||||
|
|
||||||
workers.push(worker);
|
workers.push(worker);
|
||||||
events.push(queueEvents);
|
events.push(queueEvents);
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import logger from "@basango/logger";
|
import logger from "@basango/logger";
|
||||||
import { config, FetchCrawlerConfig } from "@/config";
|
|
||||||
|
import { FetchCrawlerConfig, config } from "@/config";
|
||||||
import { JsonlPersistor, Persistor } from "@/process/persistence";
|
import { JsonlPersistor, Persistor } from "@/process/persistence";
|
||||||
import { AnySourceConfig } from "@/schema";
|
import { AnySourceConfig } from "@/schema";
|
||||||
import { createDateRange, createPageRange } from "@/utils";
|
import { createDateRange, createPageRange } from "@/utils";
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { HTMLElement, parse as parseHtml } from "node-html-parser";
|
import { HTMLElement, parse as parseHtml } from "node-html-parser";
|
||||||
import { config, FetchCrawlerConfig } from "@/config";
|
|
||||||
|
import { FetchCrawlerConfig, config } from "@/config";
|
||||||
import { SyncHttpClient } from "@/http/http-client";
|
import { SyncHttpClient } from "@/http/http-client";
|
||||||
import { OpenGraph } from "@/http/open-graph";
|
import { OpenGraph } from "@/http/open-graph";
|
||||||
import type { Persistor } from "@/process/persistence";
|
import type { Persistor } from "@/process/persistence";
|
||||||
|
|||||||
@@ -2,10 +2,17 @@ import { logger } from "@basango/logger";
|
|||||||
import { getUnixTime, isMatch as isDateMatch, parse as parseDateFns } from "date-fns";
|
import { getUnixTime, isMatch as isDateMatch, parse as parseDateFns } from "date-fns";
|
||||||
import { HTMLElement } from "node-html-parser";
|
import { HTMLElement } from "node-html-parser";
|
||||||
import TurndownService from "turndown";
|
import TurndownService from "turndown";
|
||||||
|
|
||||||
import { FetchCrawlerConfig } from "@/config";
|
import { FetchCrawlerConfig } from "@/config";
|
||||||
|
import {
|
||||||
|
ArticleOutOfDateRangeError,
|
||||||
|
InvalidArticleError,
|
||||||
|
InvalidSourceSelectorsError,
|
||||||
|
UnsupportedSourceKindError,
|
||||||
|
} from "@/errors";
|
||||||
import { BaseCrawler } from "@/process/parsers/base";
|
import { BaseCrawler } from "@/process/parsers/base";
|
||||||
import { Persistor, persist } from "@/process/persistence";
|
import { Persistor, persist } from "@/process/persistence";
|
||||||
import { DateRange, HtmlSourceConfig } from "@/schema";
|
import { Article, DateRange, HtmlSourceConfig } from "@/schema";
|
||||||
import { createAbsoluteUrl, isTimestampInRange } from "@/utils";
|
import { createAbsoluteUrl, isTimestampInRange } from "@/utils";
|
||||||
|
|
||||||
const md = new TurndownService({
|
const md = new TurndownService({
|
||||||
@@ -32,13 +39,13 @@ const safeRegExp = (pattern?: string | null): RegExp | null => {
|
|||||||
*/
|
*/
|
||||||
export class HtmlCrawler extends BaseCrawler {
|
export class HtmlCrawler extends BaseCrawler {
|
||||||
readonly source: HtmlSourceConfig;
|
readonly source: HtmlSourceConfig;
|
||||||
private currentArticleUrl: string | null = null;
|
private currentNode: string | null = null;
|
||||||
|
|
||||||
constructor(settings: FetchCrawlerConfig, options: { persistors?: Persistor[] } = {}) {
|
constructor(settings: FetchCrawlerConfig, options: { persistors?: Persistor[] } = {}) {
|
||||||
super(settings, options);
|
super(settings, options);
|
||||||
|
|
||||||
if (!settings.source || settings.source.sourceKind !== "html") {
|
if (!settings.source || settings.source.sourceKind !== "html") {
|
||||||
throw new Error("HtmlCrawler requires a source of kind 'html'");
|
throw new UnsupportedSourceKindError("HtmlCrawler requires a source of kind 'html'");
|
||||||
}
|
}
|
||||||
this.source = this.settings.source as HtmlSourceConfig;
|
this.source = this.settings.source as HtmlSourceConfig;
|
||||||
}
|
}
|
||||||
@@ -46,69 +53,64 @@ export class HtmlCrawler extends BaseCrawler {
|
|||||||
async fetch(): Promise<void> {
|
async fetch(): Promise<void> {
|
||||||
const pageRange = this.settings.pageRange ?? (await this.getPagination());
|
const pageRange = this.settings.pageRange ?? (await this.getPagination());
|
||||||
const dateRange = this.settings.dateRange;
|
const dateRange = this.settings.dateRange;
|
||||||
|
const selectors = this.source.sourceSelectors;
|
||||||
|
|
||||||
const articleSelector = this.source.sourceSelectors.articles;
|
if (!selectors.articles) {
|
||||||
if (!articleSelector) {
|
throw new InvalidSourceSelectorsError("No article selector configured for HTML source");
|
||||||
logger.error(
|
|
||||||
{ source: this.source.sourceId },
|
|
||||||
"No article selector configured for HTML source",
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let stop = false;
|
|
||||||
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
||||||
const pageUrl = this.buildPageUrl(page);
|
const endpoint = this.buildEndpointUrl(page);
|
||||||
let html: string;
|
let html: string;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
html = await this.crawl(pageUrl);
|
html = await this.crawl(endpoint);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error({ error, page, pageUrl }, "> page %s => [failed]", page);
|
logger.error({ endpoint, error, page }, `Failed to crawl page ${page}`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const root = this.parseHtml(html);
|
const root = this.parseHtml(html);
|
||||||
const articles = this.extractAll(root, articleSelector);
|
const articles = this.extractAll(root, selectors.articles);
|
||||||
if (!articles.length) {
|
if (!articles.length) {
|
||||||
logger.info({ page }, "No articles found on page");
|
logger.error({ page }, "No articles found on page");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const node of articles) {
|
for (const node of articles) {
|
||||||
try {
|
try {
|
||||||
this.currentArticleUrl = this.extractLink(node);
|
this.currentNode = this.extractLink(node);
|
||||||
let targetHtml = node.toString();
|
let nodeHtml = node.toString();
|
||||||
|
|
||||||
if (this.source.requiresDetails) {
|
if (this.source.requiresDetails) {
|
||||||
if (!this.currentArticleUrl) {
|
if (!this.currentNode) {
|
||||||
logger.debug({ page }, "Skipping article without link for details");
|
logger.error({ page }, "Skipping article without link for details");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
targetHtml = await this.crawl(this.currentArticleUrl);
|
nodeHtml = await this.crawl(this.currentNode);
|
||||||
} catch (err) {
|
} catch (error) {
|
||||||
logger.error(
|
logger.error({ error, url: this.currentNode }, "Failed to fetch detail page");
|
||||||
{ error: err, url: this.currentArticleUrl },
|
|
||||||
"Failed to fetch detail page",
|
|
||||||
);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const saved = await this.fetchOne(targetHtml, dateRange);
|
await this.fetchOne(nodeHtml, dateRange);
|
||||||
// stop early on first out-of-range if pages are sorted by date desc
|
} catch (error: unknown) {
|
||||||
if (saved === null) {
|
if (error instanceof ArticleOutOfDateRangeError) {
|
||||||
stop = true;
|
logger.info(
|
||||||
|
{ url: this.currentNode },
|
||||||
|
"Article out of date range, stopping further processing",
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
logger.error({ error, pageUrl }, "Failed to process article on page");
|
logger.error({ error, url: this.currentNode }, "Failed to process HTML article");
|
||||||
} finally {
|
} finally {
|
||||||
this.currentArticleUrl = null;
|
this.currentNode = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stop) break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,43 +119,43 @@ export class HtmlCrawler extends BaseCrawler {
|
|||||||
* @param html - The HTML content of the article
|
* @param html - The HTML content of the article
|
||||||
* @param dateRange - Optional date range for filtering
|
* @param dateRange - Optional date range for filtering
|
||||||
*/
|
*/
|
||||||
async fetchOne(html: string, dateRange?: DateRange | null) {
|
async fetchOne(html: string, dateRange?: DateRange | null): Promise<Article> {
|
||||||
const root = this.parseHtml(html);
|
const root = this.parseHtml(html);
|
||||||
const sel = this.source.sourceSelectors;
|
const selectors = this.source.sourceSelectors;
|
||||||
|
|
||||||
const titleText = this.extractText(root, sel.articleTitle) ?? "Untitled";
|
const title = this.extractText(root, selectors.articleTitle) ?? "Untitled";
|
||||||
const link = this.currentArticleUrl ?? this.extractLink(root);
|
const link = this.currentNode ?? this.extractLink(root);
|
||||||
if (!link) {
|
if (!link) {
|
||||||
logger.warn({ title: titleText }, "Skipping article without link");
|
throw new InvalidArticleError("Missing article link");
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const body = this.extractBody(root, sel.articleBody);
|
const body = this.extractBody(root, selectors.articleBody);
|
||||||
const categories = this.extractCategories(root, sel.articleCategories);
|
const categories = this.extractCategories(root, selectors.articleCategories);
|
||||||
const rawDate = this.extractText(root, sel.articleDate);
|
const date = this.extractText(root, selectors.articleDate);
|
||||||
const timestamp = this.computeTimestamp(rawDate);
|
const timestamp = this.computeTimestamp(date);
|
||||||
|
|
||||||
if (dateRange && !isTimestampInRange(dateRange, timestamp)) {
|
if (dateRange && !isTimestampInRange(dateRange, timestamp)) {
|
||||||
logger.info(
|
throw new ArticleOutOfDateRangeError("Article outside date range", {
|
||||||
{ date: rawDate, link, timestamp, title: titleText },
|
date,
|
||||||
"Skipping article outside date range",
|
link,
|
||||||
);
|
timestamp,
|
||||||
return null;
|
title,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const enriched = await this.enrichWithOpenGraph(
|
const data = await this.enrichWithOpenGraph(
|
||||||
{
|
{
|
||||||
body,
|
body,
|
||||||
categories,
|
categories,
|
||||||
link,
|
link,
|
||||||
source: this.source.sourceId,
|
source: this.source.sourceId,
|
||||||
timestamp,
|
timestamp,
|
||||||
title: titleText,
|
title,
|
||||||
},
|
},
|
||||||
link,
|
link,
|
||||||
);
|
);
|
||||||
|
|
||||||
return await persist(enriched, this.persistors);
|
return await persist(data, this.persistors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -211,7 +213,7 @@ export class HtmlCrawler extends BaseCrawler {
|
|||||||
* Build the URL for a given page number.
|
* Build the URL for a given page number.
|
||||||
* @param page - The page number
|
* @param page - The page number
|
||||||
*/
|
*/
|
||||||
buildPageUrl(page: number): string {
|
buildEndpointUrl(page: number): string {
|
||||||
let template = this.applyCategory(this.source.paginationTemplate);
|
let template = this.applyCategory(this.source.paginationTemplate);
|
||||||
if (template.includes("{page}")) {
|
if (template.includes("{page}")) {
|
||||||
template = template.replace("{page}", String(page));
|
template = template.replace("{page}", String(page));
|
||||||
|
|||||||
@@ -1,9 +1,16 @@
|
|||||||
import { logger } from "@basango/logger";
|
import { logger } from "@basango/logger";
|
||||||
import TurndownService from "turndown";
|
import TurndownService from "turndown";
|
||||||
|
|
||||||
import { FetchCrawlerConfig } from "@/config";
|
import { FetchCrawlerConfig } from "@/config";
|
||||||
|
import {
|
||||||
|
ArticleOutOfDateRangeError,
|
||||||
|
InvalidArticleError,
|
||||||
|
UnsupportedSourceKindError,
|
||||||
|
} from "@/errors";
|
||||||
import { BaseCrawler } from "@/process/parsers/base";
|
import { BaseCrawler } from "@/process/parsers/base";
|
||||||
import { Persistor, persist } from "@/process/persistence";
|
import { Persistor, persist } from "@/process/persistence";
|
||||||
import { DateRange, PageRange, WordPressSourceConfig } from "@/schema";
|
import { Article, DateRange, PageRange, WordPressSourceConfig } from "@/schema";
|
||||||
|
import { isTimestampInRange } from "@/utils";
|
||||||
|
|
||||||
const md = new TurndownService({
|
const md = new TurndownService({
|
||||||
bulletListMarker: "-",
|
bulletListMarker: "-",
|
||||||
@@ -38,7 +45,9 @@ export class WordPressCrawler extends BaseCrawler {
|
|||||||
super(settings, options);
|
super(settings, options);
|
||||||
|
|
||||||
if (!settings.source || settings.source.sourceKind !== "wordpress") {
|
if (!settings.source || settings.source.sourceKind !== "wordpress") {
|
||||||
throw new Error("HtmlCrawler requires a source of kind 'wordpress'");
|
throw new UnsupportedSourceKindError(
|
||||||
|
"WordPressCrawler requires a source of kind 'wordpress'",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
this.source = this.settings.source as WordPressSourceConfig;
|
this.source = this.settings.source as WordPressSourceConfig;
|
||||||
}
|
}
|
||||||
@@ -50,29 +59,31 @@ export class WordPressCrawler extends BaseCrawler {
|
|||||||
const pageRange = this.settings.pageRange ?? (await this.getPagination());
|
const pageRange = this.settings.pageRange ?? (await this.getPagination());
|
||||||
const dateRange = this.settings.dateRange;
|
const dateRange = this.settings.dateRange;
|
||||||
|
|
||||||
let stop = false;
|
|
||||||
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
for (let page = pageRange.start; page <= pageRange.end; page += 1) {
|
||||||
const endpoint = this.postsEndpoint(page);
|
const endpoint = this.buildEndpointUrl(page);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await this.http.get(endpoint);
|
const response = await this.http.get(endpoint);
|
||||||
const data = (await response.json()) as unknown;
|
const articles = (await response.json()) as WordPressPost[];
|
||||||
const articles = Array.isArray(data) ? (data as WordPressPost[]) : [];
|
|
||||||
if (!Array.isArray(data)) {
|
|
||||||
logger.warn({ page, type: typeof data }, "Unexpected WordPress payload type");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const entry of articles) {
|
for (const node of articles) {
|
||||||
const saved = await this.fetchOne(entry, dateRange);
|
try {
|
||||||
if (saved === null) {
|
await this.fetchOne(node, dateRange);
|
||||||
stop = true;
|
} catch (error: unknown) {
|
||||||
break;
|
if (error instanceof ArticleOutOfDateRangeError) {
|
||||||
|
logger.info(
|
||||||
|
{ url: node.link },
|
||||||
|
"Article out of date range, stopping further processing",
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.error({ error, url: node.link }, "Failed to process WordPress article");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error({ error, page }, "> page %s => [failed]", page);
|
logger.error({ error, page }, `Failed to fetch WordPress page ${page}`);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
if (stop) break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,7 +106,7 @@ export class WordPressCrawler extends BaseCrawler {
|
|||||||
* @param input - Decoded JSON object or raw JSON string
|
* @param input - Decoded JSON object or raw JSON string
|
||||||
* @param dateRange - Optional date range for filtering
|
* @param dateRange - Optional date range for filtering
|
||||||
*/
|
*/
|
||||||
async fetchOne(input: unknown, dateRange?: DateRange | null) {
|
async fetchOne(input: unknown, dateRange?: DateRange | null): Promise<Article> {
|
||||||
// input can be the decoded JSON object or a raw JSON string
|
// input can be the decoded JSON object or a raw JSON string
|
||||||
let data: WordPressPost | null = null;
|
let data: WordPressPost | null = null;
|
||||||
try {
|
try {
|
||||||
@@ -110,35 +121,29 @@ export class WordPressCrawler extends BaseCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!data || typeof data !== "object") {
|
if (!data || typeof data !== "object") {
|
||||||
throw new Error("Unexpected WordPress payload type");
|
throw new InvalidArticleError("Unexpected WordPress payload type");
|
||||||
}
|
}
|
||||||
|
|
||||||
const link = data.link;
|
const link = data.link;
|
||||||
if (!link) {
|
if (!link) {
|
||||||
logger.error("Skipping WordPress article without link");
|
throw new InvalidArticleError("Missing article link");
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const titleHtml = data.title?.rendered ?? "";
|
const title =
|
||||||
const bodyHtml = data.content?.rendered ?? "";
|
this.textContent(this.parseHtml(data.title?.rendered ?? "")) ?? data.slug ?? "Untitled";
|
||||||
const title = this.textContent(this.parseHtml(titleHtml)) ?? data.slug ?? "Untitled";
|
const body = md.turndown(data.content?.rendered ?? "");
|
||||||
const body = md.turndown(bodyHtml);
|
|
||||||
const timestamp = this.computeTimestamp(data.date);
|
const timestamp = this.computeTimestamp(data.date);
|
||||||
const categories = await this.mapCategories(data.categories ?? []);
|
const categories = await this.mapCategories(data.categories ?? []);
|
||||||
|
|
||||||
// date range skip as in HTML crawler
|
if (dateRange && !isTimestampInRange(dateRange, timestamp)) {
|
||||||
if (dateRange) {
|
throw new ArticleOutOfDateRangeError("Article outside date range", {
|
||||||
const { isTimestampInRange } = await import("@/utils");
|
link,
|
||||||
if (!isTimestampInRange(dateRange, timestamp)) {
|
timestamp,
|
||||||
logger.info(
|
title,
|
||||||
{ date: data.date, link, timestamp, title },
|
});
|
||||||
"Skipping article outside date range",
|
|
||||||
);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const enriched = await this.enrichWithOpenGraph(
|
const article = await this.enrichWithOpenGraph(
|
||||||
{
|
{
|
||||||
body,
|
body,
|
||||||
categories,
|
categories,
|
||||||
@@ -150,7 +155,7 @@ export class WordPressCrawler extends BaseCrawler {
|
|||||||
link,
|
link,
|
||||||
);
|
);
|
||||||
|
|
||||||
return await persist(enriched, this.persistors);
|
return await persist(article, this.persistors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -188,7 +193,7 @@ export class WordPressCrawler extends BaseCrawler {
|
|||||||
* Construct posts endpoint URL for a given page.
|
* Construct posts endpoint URL for a given page.
|
||||||
* @param page - Page number
|
* @param page - Page number
|
||||||
*/
|
*/
|
||||||
postsEndpoint(page: number): string {
|
buildEndpointUrl(page: number): string {
|
||||||
return `${this.baseUrl()}wp-json/wp/v2/posts?${WordPressCrawler.POST_QUERY}&page=${page}&per_page=100`;
|
return `${this.baseUrl()}wp-json/wp/v2/posts?${WordPressCrawler.POST_QUERY}&page=${page}&per_page=100`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import fs from "node:fs";
|
import fs from "node:fs";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
|
||||||
import logger from "@basango/logger";
|
import logger from "@basango/logger";
|
||||||
|
|
||||||
import { Article } from "@/schema";
|
import { Article } from "@/schema";
|
||||||
import { countTokens } from "@/utils";
|
import { countTokens } from "@/utils";
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import logger from "@basango/logger";
|
import logger from "@basango/logger";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
CrawlingOptions,
|
CrawlingOptions,
|
||||||
closePersistors,
|
closePersistors,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { logger } from "@basango/logger";
|
import { logger } from "@basango/logger";
|
||||||
|
|
||||||
import { runSyncCrawl } from "@/process/sync/tasks";
|
import { runSyncCrawl } from "@/process/sync/tasks";
|
||||||
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
|
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { logger } from "@basango/logger";
|
import { logger } from "@basango/logger";
|
||||||
|
|
||||||
import { scheduleAsyncCrawl } from "@/process/async/tasks";
|
import { scheduleAsyncCrawl } from "@/process/async/tasks";
|
||||||
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
|
import { CRAWLING_USAGE, parseCrawlingCliArgs } from "@/scripts/utils";
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { parseArgs } from "node:util";
|
import { parseArgs } from "node:util";
|
||||||
|
|
||||||
import { CrawlingOptions } from "@/process/crawler";
|
import { CrawlingOptions } from "@/process/crawler";
|
||||||
|
|
||||||
interface WorkerCliOptions {
|
interface WorkerCliOptions {
|
||||||
@@ -6,13 +7,12 @@ interface WorkerCliOptions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const CRAWLING_USAGE = `
|
export const CRAWLING_USAGE = `
|
||||||
Usage: bun run crawl:[async|sync] -- --sourceId <id> [options]
|
Usage: bun run crawler:[async|sync] -- --sourceId <id> [options]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--pageRange <range> Optional page range filter (e.g. 1:5)
|
--pageRange <range> Optional page range filter (e.g. 1:5)
|
||||||
--dateRange <range> Optional date range filter (e.g. 2024-01-01:2024-01-31)
|
--dateRange <range> Optional date range filter (e.g. 2024-01-01:2024-01-31)
|
||||||
--category <slug> Optional category to crawl
|
--category <slug> Optional category to crawl
|
||||||
-h, --help Show this message
|
|
||||||
`;
|
`;
|
||||||
|
|
||||||
export const parseWorkerCliArgs = (): WorkerCliOptions => {
|
export const parseWorkerCliArgs = (): WorkerCliOptions => {
|
||||||
|
|||||||
@@ -8,9 +8,7 @@ const main = async (): Promise<void> => {
|
|||||||
const options = parseWorkerCliArgs();
|
const options = parseWorkerCliArgs();
|
||||||
|
|
||||||
const manager = createQueueManager();
|
const manager = createQueueManager();
|
||||||
const queues = options.queue?.length
|
const queues = options.queue?.length ? options.queue : undefined;
|
||||||
? options.queue.map((name) => manager.queueName(name))
|
|
||||||
: undefined;
|
|
||||||
|
|
||||||
const handle = startWorker({
|
const handle = startWorker({
|
||||||
queueManager: manager,
|
queueManager: manager,
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { format, getUnixTime, isMatch, parse } from "date-fns";
|
import { format, getUnixTime, isMatch, parse } from "date-fns";
|
||||||
import type { RedisOptions } from "ioredis";
|
import type { RedisOptions } from "ioredis";
|
||||||
import { get_encoding, TiktokenEncoding } from "tiktoken";
|
import { TiktokenEncoding, get_encoding } from "tiktoken";
|
||||||
|
|
||||||
import { config } from "@/config";
|
import { config } from "@/config";
|
||||||
import { DEFAULT_DATE_FORMAT } from "@/constants";
|
import { DEFAULT_DATE_FORMAT } from "@/constants";
|
||||||
import {
|
import {
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
@@ -1,8 +1,6 @@
|
|||||||
{
|
{
|
||||||
"commitlint": {
|
"commitlint": {
|
||||||
"extends": [
|
"extends": ["@commitlint/config-conventional"]
|
||||||
"@commitlint/config-conventional"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"config": {
|
"config": {
|
||||||
"commitizen": {
|
"commitizen": {
|
||||||
@@ -85,14 +83,8 @@
|
|||||||
"preset": "jest-expo"
|
"preset": "jest-expo"
|
||||||
},
|
},
|
||||||
"lint-staged": {
|
"lint-staged": {
|
||||||
"*.ts": [
|
"*.ts": ["prettier --write", "eslint --fix"],
|
||||||
"prettier --write",
|
"*.tsx": ["prettier --write", "eslint --fix"]
|
||||||
"eslint --fix"
|
|
||||||
],
|
|
||||||
"*.tsx": [
|
|
||||||
"prettier --write",
|
|
||||||
"eslint --fix"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"main": "expo-router/entry",
|
"main": "expo-router/entry",
|
||||||
"name": "drc-news",
|
"name": "drc-news",
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
import { endpoint } from "@/api/endpoint";
|
import { endpoint } from "@/api/endpoint";
|
||||||
import {
|
import {
|
||||||
Bookmark,
|
Bookmark,
|
||||||
BookmarkedArticle,
|
|
||||||
BookmarkPayload,
|
BookmarkPayload,
|
||||||
|
BookmarkedArticle,
|
||||||
} from "@/api/schema/feed-management/bookmark";
|
} from "@/api/schema/feed-management/bookmark";
|
||||||
import {
|
import {
|
||||||
ArticleFilters,
|
ArticleFilters,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { formatDistanceToNowStrict, Locale } from "date-fns";
|
import { Locale, formatDistanceToNowStrict } from "date-fns";
|
||||||
import { fr } from "date-fns/locale";
|
import { fr } from "date-fns/locale";
|
||||||
import { useEffect, useState } from "react";
|
import { useEffect, useState } from "react";
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import type React from "react";
|
import type React from "react";
|
||||||
|
|
||||||
import { GestureHandlerRootView } from "react-native-gesture-handler";
|
import { GestureHandlerRootView } from "react-native-gesture-handler";
|
||||||
import { SafeAreaProvider } from "react-native-safe-area-context";
|
import { SafeAreaProvider } from "react-native-safe-area-context";
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import type React from "react";
|
import type React from "react";
|
||||||
|
|
||||||
import { TamaguiProvider } from "tamagui";
|
import { TamaguiProvider } from "tamagui";
|
||||||
|
|
||||||
import { config } from "~/tamagui.config";
|
import { config } from "~/tamagui.config";
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import React, { useCallback } from "react";
|
import React, { useCallback } from "react";
|
||||||
|
|
||||||
import { ActivityIndicator, Dimensions, FlatList, FlatListProps } from "react-native";
|
import { ActivityIndicator, Dimensions, FlatList, FlatListProps } from "react-native";
|
||||||
import { View, XStack, YStack } from "tamagui";
|
import { View, XStack, YStack } from "tamagui";
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import { useCallback } from "react";
|
import { useCallback } from "react";
|
||||||
|
|
||||||
import ContentLoader, { Circle, Rect } from "react-content-loader/native";
|
import ContentLoader, { Circle, Rect } from "react-content-loader/native";
|
||||||
import { Dimensions, FlatList } from "react-native";
|
import { Dimensions, FlatList } from "react-native";
|
||||||
import { View } from "tamagui";
|
import { View } from "tamagui";
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import React, { useCallback } from "react";
|
import React, { useCallback } from "react";
|
||||||
|
|
||||||
import { ActivityIndicator, FlatList, FlatListProps } from "react-native";
|
import { ActivityIndicator, FlatList, FlatListProps } from "react-native";
|
||||||
import { YStack } from "tamagui";
|
import { YStack } from "tamagui";
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import { useCallback, useState } from "react";
|
import { useCallback, useState } from "react";
|
||||||
|
|
||||||
import { ActivityIndicator, Alert } from "react-native";
|
import { ActivityIndicator, Alert } from "react-native";
|
||||||
import { Button, GetProps } from "tamagui";
|
import { Button, GetProps } from "tamagui";
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import React, { useCallback } from "react";
|
import React, { useCallback } from "react";
|
||||||
|
|
||||||
import { FlatList, FlatListProps } from "react-native";
|
import { FlatList, FlatListProps } from "react-native";
|
||||||
import { Paragraph, XStack, YStack } from "tamagui";
|
import { Paragraph, XStack, YStack } from "tamagui";
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { Link } from "expo-router";
|
import { Link } from "expo-router";
|
||||||
import { GetProps, styled, XStack, YStack } from "tamagui";
|
import { GetProps, XStack, YStack, styled } from "tamagui";
|
||||||
|
|
||||||
import { SourceOverview } from "@/api/schema/feed-management/source";
|
import { SourceOverview } from "@/api/schema/feed-management/source";
|
||||||
import { SourceFollowButton } from "@/ui/components/content/source/SourceFollowButton";
|
import { SourceFollowButton } from "@/ui/components/content/source/SourceFollowButton";
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import { useCallback } from "react";
|
import { useCallback } from "react";
|
||||||
|
|
||||||
import ContentLoader, { Circle, Rect } from "react-content-loader/native";
|
import ContentLoader, { Circle, Rect } from "react-content-loader/native";
|
||||||
import { FlatList } from "react-native";
|
import { FlatList } from "react-native";
|
||||||
import { YStack } from "tamagui";
|
import { YStack } from "tamagui";
|
||||||
|
|||||||
@@ -5,10 +5,10 @@ import {
|
|||||||
GetProps,
|
GetProps,
|
||||||
Label,
|
Label,
|
||||||
SizeTokens,
|
SizeTokens,
|
||||||
styled,
|
|
||||||
Input as TamaguiInput,
|
Input as TamaguiInput,
|
||||||
XStack,
|
XStack,
|
||||||
YStack,
|
YStack,
|
||||||
|
styled,
|
||||||
} from "tamagui";
|
} from "tamagui";
|
||||||
|
|
||||||
import { Caption } from "@/ui/components/typography";
|
import { Caption } from "@/ui/components/typography";
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { GetProps, Label, styled, TextArea as TamaguiTextArea, XStack, YStack } from "tamagui";
|
import { GetProps, Label, TextArea as TamaguiTextArea, XStack, YStack, styled } from "tamagui";
|
||||||
|
|
||||||
import { withController } from "@/ui/components/controls/forms/withController";
|
import { withController } from "@/ui/components/controls/forms/withController";
|
||||||
import { Caption } from "@/ui/components/typography";
|
import { Caption } from "@/ui/components/typography";
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import React from "react";
|
import React from "react";
|
||||||
|
|
||||||
import { Controller, ControllerProps } from "react-hook-form";
|
import { Controller, ControllerProps } from "react-hook-form";
|
||||||
|
|
||||||
type WithControllerProps = {
|
type WithControllerProps = {
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import React from "react";
|
import React from "react";
|
||||||
|
import { View, XStack, styled } from "tamagui";
|
||||||
import { styled, View, XStack } from "tamagui";
|
|
||||||
|
|
||||||
import { Text } from "@/ui/components/typography";
|
import { Text } from "@/ui/components/typography";
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { ArrowRight } from "@tamagui/lucide-icons";
|
import { ArrowRight } from "@tamagui/lucide-icons";
|
||||||
import { Href, Link } from "expo-router";
|
import { Href, Link } from "expo-router";
|
||||||
import { GetProps, Paragraph, styled, XStack } from "tamagui";
|
import { GetProps, Paragraph, XStack, styled } from "tamagui";
|
||||||
|
|
||||||
import { Text } from "@/ui/components/typography";
|
import { Text } from "@/ui/components/typography";
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { StatusBar } from "expo-status-bar";
|
import { StatusBar } from "expo-status-bar";
|
||||||
import React from "react";
|
import React from "react";
|
||||||
import { useSafeAreaInsets } from "react-native-safe-area-context";
|
import { useSafeAreaInsets } from "react-native-safe-area-context";
|
||||||
import { styled, YStack } from "tamagui";
|
import { YStack, styled } from "tamagui";
|
||||||
|
|
||||||
import { ScreenHeading } from "@/ui/components/layout/ScreenHeading";
|
import { ScreenHeading } from "@/ui/components/layout/ScreenHeading";
|
||||||
import { ScreenSection } from "@/ui/components/layout/ScreenSection";
|
import { ScreenSection } from "@/ui/components/layout/ScreenSection";
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import type React from "react";
|
import type React from "react";
|
||||||
|
|
||||||
import { Paragraph, ParagraphProps } from "tamagui";
|
import { Paragraph, ParagraphProps } from "tamagui";
|
||||||
|
|
||||||
export const Caption = (props: React.PropsWithChildren<ParagraphProps>) => {
|
export const Caption = (props: React.PropsWithChildren<ParagraphProps>) => {
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import type React from "react";
|
import type React from "react";
|
||||||
|
|
||||||
import { H2, ParagraphProps } from "tamagui";
|
import { H2, ParagraphProps } from "tamagui";
|
||||||
|
|
||||||
export const Display = (props: React.PropsWithChildren<ParagraphProps>) => {
|
export const Display = (props: React.PropsWithChildren<ParagraphProps>) => {
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import type React from "react";
|
import type React from "react";
|
||||||
|
|
||||||
import { H4, ParagraphProps } from "tamagui";
|
import { H4, ParagraphProps } from "tamagui";
|
||||||
|
|
||||||
export const Heading = (props: React.PropsWithChildren<ParagraphProps>) => {
|
export const Heading = (props: React.PropsWithChildren<ParagraphProps>) => {
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import type React from "react";
|
import type React from "react";
|
||||||
|
|
||||||
import { Paragraph, ParagraphProps } from "tamagui";
|
import { Paragraph, ParagraphProps } from "tamagui";
|
||||||
|
|
||||||
export const Text = (props: React.PropsWithChildren<ParagraphProps>) => {
|
export const Text = (props: React.PropsWithChildren<ParagraphProps>) => {
|
||||||
|
|||||||
+1
-1
@@ -39,7 +39,7 @@
|
|||||||
},
|
},
|
||||||
"files": {
|
"files": {
|
||||||
"ignoreUnknown": true,
|
"ignoreUnknown": true,
|
||||||
"includes": ["**/apps", "**/packages", "!/apps/api-legacy", "!/apps/mobile-legacy"]
|
"includes": ["**", "!apps/mobile-legacy", "!apps/api-legacy"]
|
||||||
},
|
},
|
||||||
"formatter": {
|
"formatter": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { drizzle } from "drizzle-orm/node-postgres";
|
import { drizzle } from "drizzle-orm/node-postgres";
|
||||||
import { Pool } from "pg";
|
import { Pool } from "pg";
|
||||||
|
|
||||||
import * as schema from "@/schema";
|
import * as schema from "@/schema";
|
||||||
|
|
||||||
const isDevelopment = process.env.NODE_ENV === "development";
|
const isDevelopment = process.env.NODE_ENV === "development";
|
||||||
@@ -21,7 +22,7 @@ const pool = new Pool({
|
|||||||
export const getConnectionPoolStats = () => {
|
export const getConnectionPoolStats = () => {
|
||||||
const stats = {
|
const stats = {
|
||||||
active: Math.max(0, (pool.totalCount ?? 0) - (pool.idleCount ?? 0)),
|
active: Math.max(0, (pool.totalCount ?? 0) - (pool.idleCount ?? 0)),
|
||||||
ended: (pool as any).ended ?? false,
|
ended: pool.ended ?? false,
|
||||||
idle: pool.idleCount ?? 0,
|
idle: pool.idleCount ?? 0,
|
||||||
name: "primary",
|
name: "primary",
|
||||||
total: pool.options.max ?? 0,
|
total: pool.options.max ?? 0,
|
||||||
|
|||||||
@@ -4,13 +4,13 @@ import { and, asc, desc, eq, gt, lt, or, sql } from "drizzle-orm";
|
|||||||
import type { Database } from "@/client";
|
import type { Database } from "@/client";
|
||||||
import { articles, bookmarkArticles, bookmarks, comments, sources, users } from "@/schema";
|
import { articles, bookmarkArticles, bookmarks, comments, sources, users } from "@/schema";
|
||||||
import {
|
import {
|
||||||
buildPaginationResult,
|
|
||||||
createPageState,
|
|
||||||
decodeCursor,
|
|
||||||
type PageRequest,
|
type PageRequest,
|
||||||
type PageState,
|
type PageState,
|
||||||
type PaginationMeta,
|
type PaginationMeta,
|
||||||
type SortDirection,
|
type SortDirection,
|
||||||
|
buildPaginationResult,
|
||||||
|
createPageState,
|
||||||
|
decodeCursor,
|
||||||
} from "@/utils/pagination";
|
} from "@/utils/pagination";
|
||||||
|
|
||||||
export interface ArticleFilters {
|
export interface ArticleFilters {
|
||||||
|
|||||||
@@ -4,11 +4,11 @@ import { and, desc, eq, lt, sql } from "drizzle-orm";
|
|||||||
import type { Database } from "@/client";
|
import type { Database } from "@/client";
|
||||||
import { bookmarkArticles, bookmarks } from "@/schema";
|
import { bookmarkArticles, bookmarks } from "@/schema";
|
||||||
import {
|
import {
|
||||||
|
type PageRequest,
|
||||||
|
type PaginationMeta,
|
||||||
buildPaginationResult,
|
buildPaginationResult,
|
||||||
createPageState,
|
createPageState,
|
||||||
decodeCursor,
|
decodeCursor,
|
||||||
type PageRequest,
|
|
||||||
type PaginationMeta,
|
|
||||||
} from "@/utils/pagination";
|
} from "@/utils/pagination";
|
||||||
|
|
||||||
export interface BookmarkRow {
|
export interface BookmarkRow {
|
||||||
|
|||||||
@@ -5,11 +5,11 @@ import type { Database } from "@/client";
|
|||||||
import { PUBLICATION_GRAPH_DAYS, SOURCE_IMAGE_BASE } from "@/constant";
|
import { PUBLICATION_GRAPH_DAYS, SOURCE_IMAGE_BASE } from "@/constant";
|
||||||
import { articles, followedSources, sources } from "@/schema";
|
import { articles, followedSources, sources } from "@/schema";
|
||||||
import {
|
import {
|
||||||
|
type PageRequest,
|
||||||
|
type PaginationMeta,
|
||||||
buildPaginationResult,
|
buildPaginationResult,
|
||||||
createPageState,
|
createPageState,
|
||||||
decodeCursor,
|
decodeCursor,
|
||||||
type PageRequest,
|
|
||||||
type PaginationMeta,
|
|
||||||
} from "@/utils/pagination";
|
} from "@/utils/pagination";
|
||||||
|
|
||||||
export interface SourceOverviewRow {
|
export interface SourceOverviewRow {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { sql } from "drizzle-orm";
|
import { sql } from "drizzle-orm";
|
||||||
|
|
||||||
import { db } from "@/client";
|
import { db } from "@/client";
|
||||||
|
|
||||||
export async function checkHealth() {
|
export async function checkHealth() {
|
||||||
|
|||||||
@@ -7,14 +7,15 @@
|
|||||||
"incremental": false,
|
"incremental": false,
|
||||||
"isolatedModules": true,
|
"isolatedModules": true,
|
||||||
"lib": ["es2022", "DOM", "DOM.Iterable"],
|
"lib": ["es2022", "DOM", "DOM.Iterable"],
|
||||||
"module": "NodeNext",
|
"module": "ESNext",
|
||||||
"moduleDetection": "force",
|
"moduleDetection": "force",
|
||||||
"moduleResolution": "NodeNext",
|
"moduleResolution": "Bundler",
|
||||||
"noUncheckedIndexedAccess": true,
|
"noUncheckedIndexedAccess": true,
|
||||||
"resolveJsonModule": true,
|
"resolveJsonModule": true,
|
||||||
"skipLibCheck": true,
|
"skipLibCheck": true,
|
||||||
"strict": true,
|
"strict": true,
|
||||||
"target": "ES2022"
|
"target": "ES2022",
|
||||||
|
"verbatimModuleSyntax": false
|
||||||
},
|
},
|
||||||
"display": "Default"
|
"display": "Default"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { cn } from "@basango/ui/lib/utils";
|
import { cn } from "@basango/ui/lib/utils";
|
||||||
import { Slot } from "@radix-ui/react-slot";
|
import { Slot } from "@radix-ui/react-slot";
|
||||||
import { cva, type VariantProps } from "class-variance-authority";
|
import { type VariantProps, cva } from "class-variance-authority";
|
||||||
import * as React from "react";
|
import * as React from "react";
|
||||||
|
|
||||||
const buttonVariants = cva(
|
const buttonVariants = cva(
|
||||||
|
|||||||
Reference in New Issue
Block a user