import { setTimeout as delay } from "node:timers/promises"; import type { CrawlerHttpOptions } from "@basango/domain/config"; import { DEFAULT_RETRY_AFTER_HEADER, DEFAULT_TRANSIENT_HTTP_STATUSES, DEFAULT_USER_AGENT, } from "@basango/domain/constants"; import { UserAgents } from "#crawler/http/user-agent"; export type HttpHeaders = Record; export type HttpParams = Record; export type HttpData = unknown; export interface HttpClientOptions { userAgentProvider?: UserAgents; defaultHeaders?: HttpHeaders; fetchImpl?: typeof fetch; sleep?: (ms: number) => Promise; } export interface HttpRequestOptions { headers?: HttpHeaders; params?: HttpParams; data?: HttpData; json?: HttpData; retryAfterHeader?: string; } export class HttpError extends Error { readonly status: number; readonly response: Response; constructor(message: string, response: Response) { super(message); this.status = response.status; this.response = response; } } /** * Default sleep function using setTimeout. * @param ms - Milliseconds to sleep */ const defaultSleep = (ms: number): Promise => { return delay(ms).then(() => undefined); }; /** * Builds a URL with query parameters. * @param url - The base URL * @param params - The query parameters to append */ const buildUrl = (url: string, params?: HttpParams): string => { if (!params || Object.keys(params).length === 0) { return url; } const target = new URL(url); for (const [key, value] of Object.entries(params)) { if (value === undefined || value === null) continue; target.searchParams.set(key, String(value)); } return target.toString(); }; /** * Computes the backoff time in milliseconds based on the configuration and attempt number. * @param config - Fetch client configuration * @param attempt - Current attempt number */ const computeBackoff = (config: CrawlerHttpOptions, attempt: number): number => { const base = Math.min( config.backoffInitial * config.backoffMultiplier ** attempt, config.backoffMax, ); const jitter = Math.random() * base * 0.25; return (base + jitter) * 1000; }; const parseRetryAfter = (header: string): number => { const numeric = Number.parseInt(header, 10); if (!Number.isNaN(numeric)) { return Math.max(0, numeric * 1000); } const parsed = Date.parse(header); if (Number.isNaN(parsed)) { return 0; } const delta = parsed - Date.now(); return delta > 0 ? delta : 0; }; /** * Base HTTP client providing common functionality. * * @author Bernard Ngandu */ export class BaseHttpClient { protected readonly options: CrawlerHttpOptions; protected readonly fetchImpl: typeof fetch; protected readonly sleep: (ms: number) => Promise; protected readonly headers: HttpHeaders; constructor(options: CrawlerHttpOptions, clientOptions: HttpClientOptions = {}) { this.options = options; const provider = clientOptions.userAgentProvider ?? new UserAgents(options.rotate, options.userAgent ?? DEFAULT_USER_AGENT); const userAgent = provider.get() ?? options.userAgent ?? DEFAULT_USER_AGENT; const baseHeaders: HttpHeaders = { "User-Agent": userAgent }; if (clientOptions.defaultHeaders) { Object.assign(baseHeaders, clientOptions.defaultHeaders); } this.headers = baseHeaders; this.fetchImpl = clientOptions.fetchImpl ?? fetch; this.sleep = clientOptions.sleep ?? defaultSleep; } protected buildHeaders(headers?: HttpHeaders): HeadersInit { return { ...this.headers, ...(headers ?? {}) }; } protected async maybeDelay( attempt: number, response?: Response, retryAfterHeader: string = DEFAULT_RETRY_AFTER_HEADER, ): Promise { let waitMs = 0; if (response) { const retryAfter = response.headers.get(retryAfterHeader); if (retryAfter && this.options.respectRetryAfter) { waitMs = parseRetryAfter(retryAfter); } } if (waitMs === 0) { waitMs = computeBackoff(this.options, attempt); } if (waitMs > 0) { await this.sleep(waitMs); } } } /** * Synchronous HTTP client with retry and timeout capabilities. * * @author Bernard Ngandu */ export class SyncHttpClient extends BaseHttpClient { async request(method: string, url: string, options: HttpRequestOptions = {}): Promise { const retryAfterHeader = options.retryAfterHeader ?? DEFAULT_RETRY_AFTER_HEADER; const target = buildUrl(url, options.params); const maxAttempts = this.options.maxRetries + 1; let attempt = 0; let lastError: unknown; while (attempt < maxAttempts) { const controller = new AbortController(); let timeoutHandle: ReturnType | undefined; try { timeoutHandle = setTimeout(() => controller.abort(), this.options.timeout * 1000); const headers = this.buildHeaders(options.headers); const init: RequestInit = { body: options.data as BodyInit | undefined, headers, method, redirect: this.options.followRedirects ? "follow" : "manual", signal: controller.signal, }; if (options.json !== undefined) { init.body = JSON.stringify(options.json); (init.headers as Record)["Content-Type"] ??= "application/json"; } const response = await this.fetchImpl(target, init); if ( DEFAULT_TRANSIENT_HTTP_STATUSES.includes(response.status as number) && attempt < this.options.maxRetries ) { await this.maybeDelay(attempt, response, retryAfterHeader); attempt += 1; continue; } if (!response.ok) { throw new HttpError(`HTTP ${response.status} ${response.statusText}`, response); } return response; } catch (error) { if (error instanceof HttpError) { lastError = error; throw error; } if (error instanceof DOMException && error.name === "AbortError") { lastError = error; if (attempt >= this.options.maxRetries) { throw error; } } else { lastError = error; if (attempt >= this.options.maxRetries) { throw error; } } await this.maybeDelay(attempt); attempt += 1; } finally { if (timeoutHandle) { clearTimeout(timeoutHandle); } } } throw lastError instanceof Error ? lastError : new Error("HTTP request failed after retries"); } get(url: string, options?: Omit): Promise { return this.request("GET", url, options); } post(url: string, options: HttpRequestOptions = {}): Promise { return this.request("POST", url, options); } } export type HttpClient = SyncHttpClient;