feat(monorepo): migrate to typescript monorepo
This commit is contained in:
@@ -0,0 +1,241 @@
|
||||
import { setTimeout as delay } from "node:timers/promises";
|
||||
import { FetchClientConfig } from "@/config";
|
||||
import {
|
||||
DEFAULT_RETRY_AFTER_HEADER,
|
||||
DEFAULT_USER_AGENT,
|
||||
TRANSIENT_HTTP_STATUSES,
|
||||
} from "@/constants";
|
||||
import { UserAgents } from "@/http/user-agent";
|
||||
|
||||
export type HttpHeaders = Record<string, string>;
|
||||
export type HttpParams = Record<string, string | number | boolean | null | undefined>;
|
||||
export type HttpData = unknown;
|
||||
|
||||
export interface HttpClientOptions {
|
||||
userAgentProvider?: UserAgents;
|
||||
defaultHeaders?: HttpHeaders;
|
||||
fetchImpl?: typeof fetch;
|
||||
sleep?: (ms: number) => Promise<void>;
|
||||
}
|
||||
|
||||
export interface HttpRequestOptions {
|
||||
headers?: HttpHeaders;
|
||||
params?: HttpParams;
|
||||
data?: HttpData;
|
||||
json?: HttpData;
|
||||
retryAfterHeader?: string;
|
||||
}
|
||||
|
||||
export class HttpError extends Error {
|
||||
readonly status: number;
|
||||
readonly response: Response;
|
||||
|
||||
constructor(message: string, response: Response) {
|
||||
super(message);
|
||||
this.status = response.status;
|
||||
this.response = response;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default sleep function using setTimeout.
|
||||
* @param ms - Milliseconds to sleep
|
||||
*/
|
||||
const defaultSleep = (ms: number): Promise<void> => {
|
||||
return delay(ms).then(() => undefined);
|
||||
};
|
||||
|
||||
/**
|
||||
* Builds a URL with query parameters.
|
||||
* @param url - The base URL
|
||||
* @param params - The query parameters to append
|
||||
*/
|
||||
const buildUrl = (url: string, params?: HttpParams): string => {
|
||||
if (!params || Object.keys(params).length === 0) {
|
||||
return url;
|
||||
}
|
||||
|
||||
const target = new URL(url);
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
if (value === undefined || value === null) continue;
|
||||
target.searchParams.set(key, String(value));
|
||||
}
|
||||
|
||||
return target.toString();
|
||||
};
|
||||
|
||||
/**
|
||||
* Computes the backoff time in milliseconds based on the configuration and attempt number.
|
||||
* @param config - Fetch client configuration
|
||||
* @param attempt - Current attempt number
|
||||
*/
|
||||
const computeBackoff = (config: FetchClientConfig, attempt: number): number => {
|
||||
const base = Math.min(
|
||||
config.backoffInitial * config.backoffMultiplier ** attempt,
|
||||
config.backoffMax,
|
||||
);
|
||||
const jitter = Math.random() * base * 0.25;
|
||||
return (base + jitter) * 1000;
|
||||
};
|
||||
|
||||
const parseRetryAfter = (header: string): number => {
|
||||
const numeric = Number.parseInt(header, 10);
|
||||
if (!Number.isNaN(numeric)) {
|
||||
return Math.max(0, numeric * 1000);
|
||||
}
|
||||
|
||||
const parsed = Date.parse(header);
|
||||
if (Number.isNaN(parsed)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const delta = parsed - Date.now();
|
||||
return delta > 0 ? delta : 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Base HTTP client providing common functionality.
|
||||
*
|
||||
* @author Bernard Ngandu <bernard@devscast.tech>
|
||||
*/
|
||||
export class BaseHttpClient {
|
||||
protected readonly config: FetchClientConfig;
|
||||
protected readonly fetchImpl: typeof fetch;
|
||||
protected readonly sleep: (ms: number) => Promise<void>;
|
||||
protected readonly headers: HttpHeaders;
|
||||
|
||||
constructor(config: FetchClientConfig, options: HttpClientOptions = {}) {
|
||||
this.config = config;
|
||||
const provider =
|
||||
options.userAgentProvider ??
|
||||
new UserAgents(config.rotate, config.userAgent ?? DEFAULT_USER_AGENT);
|
||||
const userAgent = provider.get() ?? config.userAgent ?? DEFAULT_USER_AGENT;
|
||||
|
||||
const baseHeaders: HttpHeaders = { "User-Agent": userAgent };
|
||||
if (options.defaultHeaders) {
|
||||
Object.assign(baseHeaders, options.defaultHeaders);
|
||||
}
|
||||
|
||||
this.headers = baseHeaders;
|
||||
this.fetchImpl = options.fetchImpl ?? fetch;
|
||||
this.sleep = options.sleep ?? defaultSleep;
|
||||
}
|
||||
|
||||
protected buildHeaders(headers?: HttpHeaders): HeadersInit {
|
||||
return { ...this.headers, ...(headers ?? {}) };
|
||||
}
|
||||
|
||||
protected async maybeDelay(
|
||||
attempt: number,
|
||||
response?: Response,
|
||||
retryAfterHeader: string = DEFAULT_RETRY_AFTER_HEADER,
|
||||
): Promise<void> {
|
||||
let waitMs = 0;
|
||||
|
||||
if (response) {
|
||||
const retryAfter = response.headers.get(retryAfterHeader);
|
||||
if (retryAfter && this.config.respectRetryAfter) {
|
||||
waitMs = parseRetryAfter(retryAfter);
|
||||
}
|
||||
}
|
||||
|
||||
if (waitMs === 0) {
|
||||
waitMs = computeBackoff(this.config, attempt);
|
||||
}
|
||||
|
||||
if (waitMs > 0) {
|
||||
await this.sleep(waitMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous HTTP client with retry and timeout capabilities.
|
||||
*
|
||||
* @author Bernard Ngandu <bernard@devscast.tech>
|
||||
*/
|
||||
export class SyncHttpClient extends BaseHttpClient {
|
||||
async request(method: string, url: string, options: HttpRequestOptions = {}): Promise<Response> {
|
||||
const retryAfterHeader = options.retryAfterHeader ?? DEFAULT_RETRY_AFTER_HEADER;
|
||||
const target = buildUrl(url, options.params);
|
||||
|
||||
const maxAttempts = this.config.maxRetries + 1;
|
||||
let attempt = 0;
|
||||
let lastError: unknown;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
const controller = new AbortController();
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
|
||||
try {
|
||||
timeoutHandle = setTimeout(() => controller.abort(), this.config.timeout * 1000);
|
||||
|
||||
const headers = this.buildHeaders(options.headers);
|
||||
const init: RequestInit = {
|
||||
body: options.data as BodyInit | undefined,
|
||||
headers,
|
||||
method,
|
||||
redirect: this.config.followRedirects ? "follow" : "manual",
|
||||
signal: controller.signal,
|
||||
};
|
||||
|
||||
if (options.json !== undefined) {
|
||||
init.body = JSON.stringify(options.json);
|
||||
(init.headers as Record<string, string>)["Content-Type"] ??= "application/json";
|
||||
}
|
||||
|
||||
const response = await this.fetchImpl(target, init);
|
||||
|
||||
if (
|
||||
TRANSIENT_HTTP_STATUSES.includes(response.status as number) &&
|
||||
attempt < this.config.maxRetries
|
||||
) {
|
||||
await this.maybeDelay(attempt, response, retryAfterHeader);
|
||||
attempt += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new HttpError(`HTTP ${response.status} ${response.statusText}`, response);
|
||||
}
|
||||
|
||||
return response;
|
||||
} catch (error) {
|
||||
if (error instanceof HttpError) {
|
||||
lastError = error;
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error instanceof DOMException && error.name === "AbortError") {
|
||||
lastError = error;
|
||||
if (attempt >= this.config.maxRetries) {
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
lastError = error;
|
||||
if (attempt >= this.config.maxRetries) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
await this.maybeDelay(attempt);
|
||||
attempt += 1;
|
||||
} finally {
|
||||
if (timeoutHandle) {
|
||||
clearTimeout(timeoutHandle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError instanceof Error ? lastError : new Error("HTTP request failed after retries");
|
||||
}
|
||||
|
||||
get(url: string, options?: Omit<HttpRequestOptions, "data" | "json">): Promise<Response> {
|
||||
return this.request("GET", url, options);
|
||||
}
|
||||
|
||||
post(url: string, options: HttpRequestOptions = {}): Promise<Response> {
|
||||
return this.request("POST", url, options);
|
||||
}
|
||||
}
|
||||
|
||||
export type HttpClient = SyncHttpClient;
|
||||
@@ -0,0 +1,102 @@
|
||||
import { parse } from "node-html-parser";
|
||||
import { config } from "@/config";
|
||||
import { OPEN_GRAPH_USER_AGENT } from "@/constants";
|
||||
import { SyncHttpClient } from "@/http/http-client";
|
||||
import { UserAgents } from "@/http/user-agent";
|
||||
import { ArticleMetadata } from "@/schema";
|
||||
|
||||
/**
|
||||
* Picks the first non-empty value from the provided array.
|
||||
* @param values - An array of string values
|
||||
*/
|
||||
const pick = (values: Array<string | null | undefined>): string | undefined => {
|
||||
for (const value of values) {
|
||||
if (value && value.trim().length > 0) {
|
||||
return value.trim();
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts the content of a meta tag given its property or name.
|
||||
* @param root - The root HTML element
|
||||
* @param property - The property or name of the meta tag to extract
|
||||
*/
|
||||
const extract = (root: ReturnType<typeof parse>, property: string): string | null => {
|
||||
const selector = `meta[property='${property}'], meta[name='${property}']`;
|
||||
const node = root.querySelector(selector);
|
||||
if (!node) {
|
||||
return null;
|
||||
}
|
||||
return node.getAttribute("content") ?? null;
|
||||
};
|
||||
|
||||
/**
|
||||
* OpenGraph consumer for extracting Open Graph metadata from HTML pages.
|
||||
* Uses a synchronous HTTP client to fetch the HTML content.
|
||||
*
|
||||
* @author Bernard Ngandu <bernard@devscast.tech>
|
||||
*/
|
||||
export class OpenGraph {
|
||||
private readonly client: Pick<SyncHttpClient, "get">;
|
||||
|
||||
constructor() {
|
||||
const settings = config.fetch.client;
|
||||
const provider = new UserAgents(true, OPEN_GRAPH_USER_AGENT);
|
||||
|
||||
this.client = new SyncHttpClient(settings, {
|
||||
defaultHeaders: { "User-Agent": provider.og() },
|
||||
userAgentProvider: provider,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a URL and extract Open Graph metadata.
|
||||
* @param url - The URL to fetch and parse
|
||||
*/
|
||||
async consumeUrl(url: string): Promise<ArticleMetadata | undefined> {
|
||||
try {
|
||||
const response = await this.client.get(url);
|
||||
const html = await response.text();
|
||||
return OpenGraph.consumeHtml(html, url);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume HTML content and extract Open Graph metadata.
|
||||
* @param html - HTML content as a string
|
||||
* @param url - Optional URL of the page
|
||||
*/
|
||||
static consumeHtml(html: string, url?: string): ArticleMetadata | undefined {
|
||||
if (!html) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const root = parse(html);
|
||||
const title = pick([extract(root, "og:title"), root.querySelector("title")?.text]);
|
||||
const description = pick([extract(root, "og:description"), extract(root, "description")]);
|
||||
const image = pick([
|
||||
extract(root, "og:image"),
|
||||
root.querySelector("img")?.getAttribute("src") ?? null,
|
||||
]);
|
||||
const canonical = pick([
|
||||
extract(root, "og:url"),
|
||||
root.querySelector("link[rel='canonical']")?.getAttribute("href") ?? null,
|
||||
url ?? null,
|
||||
]);
|
||||
|
||||
if (!title && !description && !image && !canonical) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
description,
|
||||
image,
|
||||
title,
|
||||
url: canonical,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
import { DEFAULT_USER_AGENT, OPEN_GRAPH_USER_AGENT } from "@/constants";
|
||||
|
||||
/**
|
||||
* User agent provider with optional rotation.
|
||||
* Allows fetching a random user agent from a predefined list
|
||||
* or using a fallback user agent.
|
||||
*
|
||||
* @author Bernard Ngandu <bernard@devscast.tech>
|
||||
*/
|
||||
export class UserAgents {
|
||||
private static readonly USER_AGENTS: string[] = [
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5",
|
||||
"Mozilla/50.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7",
|
||||
"Mozilla/5.0 (Linux; U; Android 5.0; SM-P815 Build/LRX22G) AppleWebKit/600.4 (KHTML, like Gecko) Chrome/48.0.1562.260 Mobile Safari/600.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.3;) AppleWebKit/533.34 (KHTML, like Gecko) Chrome/51.0.1883.215 Safari/533",
|
||||
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; x64; en-US Trident/4.0)",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_10_3) Gecko/20100101 Firefox/63.4",
|
||||
"Mozilla/5.0 (Linux; Linux x86_64; en-US) AppleWebKit/603.50 (KHTML, like Gecko) Chrome/55.0.2226.116 Safari/601",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 7_8_3; en-US) Gecko/20100101 Firefox/68.9",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_9_8; like Mac OS X) AppleWebKit/603.34 (KHTML, like Gecko) Chrome/47.0.1126.107 Mobile Safari/602.7",
|
||||
"Mozilla/5.0 (iPod; CPU iPod OS 8_2_0; like Mac OS X) AppleWebKit/601.40 (KHTML, like Gecko) Chrome/47.0.1590.178 Mobile Safari/535.2",
|
||||
];
|
||||
|
||||
private readonly rotate: boolean;
|
||||
private readonly fallback: string;
|
||||
|
||||
constructor(rotate: boolean = true, fallback: string = DEFAULT_USER_AGENT) {
|
||||
this.rotate = rotate;
|
||||
this.fallback = fallback;
|
||||
}
|
||||
|
||||
og(): string {
|
||||
return OPEN_GRAPH_USER_AGENT;
|
||||
}
|
||||
|
||||
get(): string {
|
||||
if (!this.rotate) return this.fallback;
|
||||
const idx = Math.floor(Math.random() * UserAgents.USER_AGENTS.length);
|
||||
return UserAgents.USER_AGENTS[idx]!;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user