feat(monorepo): migrate to typescript monorepo

This commit is contained in:
2025-11-07 17:09:29 +02:00
committed by BernardNganduDev
parent 3e09956f05
commit 075a388ccb
745 changed files with 2341 additions and 5082 deletions
+241
View File
@@ -0,0 +1,241 @@
import { setTimeout as delay } from "node:timers/promises";
import { FetchClientConfig } from "@/config";
import {
DEFAULT_RETRY_AFTER_HEADER,
DEFAULT_USER_AGENT,
TRANSIENT_HTTP_STATUSES,
} from "@/constants";
import { UserAgents } from "@/http/user-agent";
export type HttpHeaders = Record<string, string>;
export type HttpParams = Record<string, string | number | boolean | null | undefined>;
export type HttpData = unknown;
export interface HttpClientOptions {
userAgentProvider?: UserAgents;
defaultHeaders?: HttpHeaders;
fetchImpl?: typeof fetch;
sleep?: (ms: number) => Promise<void>;
}
export interface HttpRequestOptions {
headers?: HttpHeaders;
params?: HttpParams;
data?: HttpData;
json?: HttpData;
retryAfterHeader?: string;
}
export class HttpError extends Error {
readonly status: number;
readonly response: Response;
constructor(message: string, response: Response) {
super(message);
this.status = response.status;
this.response = response;
}
}
/**
* Default sleep function using setTimeout.
* @param ms - Milliseconds to sleep
*/
const defaultSleep = (ms: number): Promise<void> => {
return delay(ms).then(() => undefined);
};
/**
* Builds a URL with query parameters.
* @param url - The base URL
* @param params - The query parameters to append
*/
const buildUrl = (url: string, params?: HttpParams): string => {
if (!params || Object.keys(params).length === 0) {
return url;
}
const target = new URL(url);
for (const [key, value] of Object.entries(params)) {
if (value === undefined || value === null) continue;
target.searchParams.set(key, String(value));
}
return target.toString();
};
/**
* Computes the backoff time in milliseconds based on the configuration and attempt number.
* @param config - Fetch client configuration
* @param attempt - Current attempt number
*/
const computeBackoff = (config: FetchClientConfig, attempt: number): number => {
const base = Math.min(
config.backoffInitial * config.backoffMultiplier ** attempt,
config.backoffMax,
);
const jitter = Math.random() * base * 0.25;
return (base + jitter) * 1000;
};
const parseRetryAfter = (header: string): number => {
const numeric = Number.parseInt(header, 10);
if (!Number.isNaN(numeric)) {
return Math.max(0, numeric * 1000);
}
const parsed = Date.parse(header);
if (Number.isNaN(parsed)) {
return 0;
}
const delta = parsed - Date.now();
return delta > 0 ? delta : 0;
};
/**
* Base HTTP client providing common functionality.
*
* @author Bernard Ngandu <bernard@devscast.tech>
*/
export class BaseHttpClient {
protected readonly config: FetchClientConfig;
protected readonly fetchImpl: typeof fetch;
protected readonly sleep: (ms: number) => Promise<void>;
protected readonly headers: HttpHeaders;
constructor(config: FetchClientConfig, options: HttpClientOptions = {}) {
this.config = config;
const provider =
options.userAgentProvider ??
new UserAgents(config.rotate, config.userAgent ?? DEFAULT_USER_AGENT);
const userAgent = provider.get() ?? config.userAgent ?? DEFAULT_USER_AGENT;
const baseHeaders: HttpHeaders = { "User-Agent": userAgent };
if (options.defaultHeaders) {
Object.assign(baseHeaders, options.defaultHeaders);
}
this.headers = baseHeaders;
this.fetchImpl = options.fetchImpl ?? fetch;
this.sleep = options.sleep ?? defaultSleep;
}
protected buildHeaders(headers?: HttpHeaders): HeadersInit {
return { ...this.headers, ...(headers ?? {}) };
}
protected async maybeDelay(
attempt: number,
response?: Response,
retryAfterHeader: string = DEFAULT_RETRY_AFTER_HEADER,
): Promise<void> {
let waitMs = 0;
if (response) {
const retryAfter = response.headers.get(retryAfterHeader);
if (retryAfter && this.config.respectRetryAfter) {
waitMs = parseRetryAfter(retryAfter);
}
}
if (waitMs === 0) {
waitMs = computeBackoff(this.config, attempt);
}
if (waitMs > 0) {
await this.sleep(waitMs);
}
}
}
/**
* Synchronous HTTP client with retry and timeout capabilities.
*
* @author Bernard Ngandu <bernard@devscast.tech>
*/
export class SyncHttpClient extends BaseHttpClient {
async request(method: string, url: string, options: HttpRequestOptions = {}): Promise<Response> {
const retryAfterHeader = options.retryAfterHeader ?? DEFAULT_RETRY_AFTER_HEADER;
const target = buildUrl(url, options.params);
const maxAttempts = this.config.maxRetries + 1;
let attempt = 0;
let lastError: unknown;
while (attempt < maxAttempts) {
const controller = new AbortController();
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
try {
timeoutHandle = setTimeout(() => controller.abort(), this.config.timeout * 1000);
const headers = this.buildHeaders(options.headers);
const init: RequestInit = {
body: options.data as BodyInit | undefined,
headers,
method,
redirect: this.config.followRedirects ? "follow" : "manual",
signal: controller.signal,
};
if (options.json !== undefined) {
init.body = JSON.stringify(options.json);
(init.headers as Record<string, string>)["Content-Type"] ??= "application/json";
}
const response = await this.fetchImpl(target, init);
if (
TRANSIENT_HTTP_STATUSES.includes(response.status as number) &&
attempt < this.config.maxRetries
) {
await this.maybeDelay(attempt, response, retryAfterHeader);
attempt += 1;
continue;
}
if (!response.ok) {
throw new HttpError(`HTTP ${response.status} ${response.statusText}`, response);
}
return response;
} catch (error) {
if (error instanceof HttpError) {
lastError = error;
throw error;
}
if (error instanceof DOMException && error.name === "AbortError") {
lastError = error;
if (attempt >= this.config.maxRetries) {
throw error;
}
} else {
lastError = error;
if (attempt >= this.config.maxRetries) {
throw error;
}
}
await this.maybeDelay(attempt);
attempt += 1;
} finally {
if (timeoutHandle) {
clearTimeout(timeoutHandle);
}
}
}
throw lastError instanceof Error ? lastError : new Error("HTTP request failed after retries");
}
get(url: string, options?: Omit<HttpRequestOptions, "data" | "json">): Promise<Response> {
return this.request("GET", url, options);
}
post(url: string, options: HttpRequestOptions = {}): Promise<Response> {
return this.request("POST", url, options);
}
}
export type HttpClient = SyncHttpClient;
+102
View File
@@ -0,0 +1,102 @@
import { parse } from "node-html-parser";
import { config } from "@/config";
import { OPEN_GRAPH_USER_AGENT } from "@/constants";
import { SyncHttpClient } from "@/http/http-client";
import { UserAgents } from "@/http/user-agent";
import { ArticleMetadata } from "@/schema";
/**
* Picks the first non-empty value from the provided array.
* @param values - An array of string values
*/
const pick = (values: Array<string | null | undefined>): string | undefined => {
for (const value of values) {
if (value && value.trim().length > 0) {
return value.trim();
}
}
return undefined;
};
/**
* Extracts the content of a meta tag given its property or name.
* @param root - The root HTML element
* @param property - The property or name of the meta tag to extract
*/
const extract = (root: ReturnType<typeof parse>, property: string): string | null => {
const selector = `meta[property='${property}'], meta[name='${property}']`;
const node = root.querySelector(selector);
if (!node) {
return null;
}
return node.getAttribute("content") ?? null;
};
/**
* OpenGraph consumer for extracting Open Graph metadata from HTML pages.
* Uses a synchronous HTTP client to fetch the HTML content.
*
* @author Bernard Ngandu <bernard@devscast.tech>
*/
export class OpenGraph {
private readonly client: Pick<SyncHttpClient, "get">;
constructor() {
const settings = config.fetch.client;
const provider = new UserAgents(true, OPEN_GRAPH_USER_AGENT);
this.client = new SyncHttpClient(settings, {
defaultHeaders: { "User-Agent": provider.og() },
userAgentProvider: provider,
});
}
/**
* Consume a URL and extract Open Graph metadata.
* @param url - The URL to fetch and parse
*/
async consumeUrl(url: string): Promise<ArticleMetadata | undefined> {
try {
const response = await this.client.get(url);
const html = await response.text();
return OpenGraph.consumeHtml(html, url);
} catch {
return undefined;
}
}
/**
* Consume HTML content and extract Open Graph metadata.
* @param html - HTML content as a string
* @param url - Optional URL of the page
*/
static consumeHtml(html: string, url?: string): ArticleMetadata | undefined {
if (!html) {
return undefined;
}
const root = parse(html);
const title = pick([extract(root, "og:title"), root.querySelector("title")?.text]);
const description = pick([extract(root, "og:description"), extract(root, "description")]);
const image = pick([
extract(root, "og:image"),
root.querySelector("img")?.getAttribute("src") ?? null,
]);
const canonical = pick([
extract(root, "og:url"),
root.querySelector("link[rel='canonical']")?.getAttribute("href") ?? null,
url ?? null,
]);
if (!title && !description && !image && !canonical) {
return undefined;
}
return {
description,
image,
title,
url: canonical,
};
}
}
+41
View File
@@ -0,0 +1,41 @@
import { DEFAULT_USER_AGENT, OPEN_GRAPH_USER_AGENT } from "@/constants";
/**
* User agent provider with optional rotation.
* Allows fetching a random user agent from a predefined list
* or using a fallback user agent.
*
* @author Bernard Ngandu <bernard@devscast.tech>
*/
export class UserAgents {
private static readonly USER_AGENTS: string[] = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5",
"Mozilla/50.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7",
"Mozilla/5.0 (Linux; U; Android 5.0; SM-P815 Build/LRX22G) AppleWebKit/600.4 (KHTML, like Gecko) Chrome/48.0.1562.260 Mobile Safari/600.0",
"Mozilla/5.0 (Windows; U; Windows NT 6.3;) AppleWebKit/533.34 (KHTML, like Gecko) Chrome/51.0.1883.215 Safari/533",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; x64; en-US Trident/4.0)",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_10_3) Gecko/20100101 Firefox/63.4",
"Mozilla/5.0 (Linux; Linux x86_64; en-US) AppleWebKit/603.50 (KHTML, like Gecko) Chrome/55.0.2226.116 Safari/601",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 7_8_3; en-US) Gecko/20100101 Firefox/68.9",
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_9_8; like Mac OS X) AppleWebKit/603.34 (KHTML, like Gecko) Chrome/47.0.1126.107 Mobile Safari/602.7",
"Mozilla/5.0 (iPod; CPU iPod OS 8_2_0; like Mac OS X) AppleWebKit/601.40 (KHTML, like Gecko) Chrome/47.0.1590.178 Mobile Safari/535.2",
];
private readonly rotate: boolean;
private readonly fallback: string;
constructor(rotate: boolean = true, fallback: string = DEFAULT_USER_AGENT) {
this.rotate = rotate;
this.fallback = fallback;
}
og(): string {
return OPEN_GRAPH_USER_AGENT;
}
get(): string {
if (!this.rotate) return this.fallback;
const idx = Math.floor(Math.random() * UserAgents.USER_AGENTS.length);
return UserAgents.USER_AGENTS[idx]!;
}
}