[crawler] remove python implementation

This commit is contained in:
2025-11-07 13:30:19 +02:00
committed by BernardNganduDev
parent 1c478ae443
commit 0f6d7f3697
59 changed files with 0 additions and 4971 deletions
-23
View File
@@ -1,23 +0,0 @@
# Ignore Python cache files
__pycache__/
*.pyc
# Ignore virtual environments
.venv/
# Ignore local environment files
.env.local
.env.*.local
# Ignore logs
*.log
# Ignore Docker-related files
Dockerfile
docker-compose.yml
# Ignore other unnecessary files
*.swp
.idea/
.vscode/
.DS_Store
-7
View File
@@ -1,7 +0,0 @@
BASANGO_CRAWLER_TOKEN=some-token
BASANGO_API_ENDPOINT=http://localhost:8000/api/aggregator/articles?token=dev
BASANGO_REDIS_URL=redis://localhost:6379/0
BASANGO_QUEUE_PREFIX=basango
BASANGO_QUEUE_TIMEOUT=30
BASANGO_QUEUE_RESULT_TTL=3600
BASANGO_QUEUE_FAILURE_TTL=86400
-22
View File
@@ -1,22 +0,0 @@
.idea/
.vscode/
.ipynb_checkpoints/
*.pyc
.env.local
.env.*.local
var/
.DS_Store
# Python-generated files
__pycache__/
.pytest_cache/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv
data/
-6
View File
@@ -1,6 +0,0 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.12
hooks:
- id: ruff-check
- id: ruff-format
-1
View File
@@ -1 +0,0 @@
3.13
-34
View File
@@ -1,34 +0,0 @@
# Use the official Python image as a base
FROM python:3.13-slim
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install the project into `/app`
WORKDIR /app
# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1
# Copy from the cache instead of linking since it's a mounted volume
ENV UV_LINK_MODE=copy
# Ensure installed tools can be executed out of the box
ENV UV_TOOL_BIN_DIR=/usr/local/bin
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --locked --no-install-project --no-dev
# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked --no-dev
# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"
# Reset the entrypoint, don't invoke `uv`
ENTRYPOINT []
-46
View File
@@ -1,46 +0,0 @@
# Crawler
[![crawler audit](https://github.com/bernard-ng/basango/actions/workflows/crawler_audit.yml/badge.svg)](https://github.com/bernard-ng/basango/actions/workflows/crawler_audit.yml)
[![crawler quality](https://github.com/bernard-ng/basango/actions/workflows/crawler_quality.yml/badge.svg)](https://github.com/bernard-ng/basango/actions/workflows/crawler_quality.yml)
[![crawler tests](https://github.com/bernard-ng/basango/actions/workflows/crawler_tests.yml/badge.svg)](https://github.com/bernard-ng/basango/actions/workflows/crawler_tests.yml)
[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
[![security: bandit](https://img.shields.io/badge/security-bandit-yellow.svg)](https://github.com/PyCQA/bandit)
---
### Usage
- Install the project in your virtualenv so the `basango` CLI is available:
- With uv: `uv run --with . basango --help`
- Or install locally: `uv sync` then `basango --help`
#### Sync crawl (in-process)
- Crawl a configured source by id and write to CSV/JSON:
- `basango crawl --source-id my-source`
- Limit by page range: `basango crawl --source-id my-source -p 1:3`
- Limit by date range: `basango crawl --source-id my-source -d 2024-10-01:2024-10-31`
- Category, when supported: `basango crawl --source-id my-source -g tech`
#### Async crawl (Redis + RQ)
- Enqueue a crawl job and return immediately:
- `basango crawl --source-id my-source --async`
- Start one or more workers to process queues:
- Article-only (default): `basango worker`
- Multiple queues: `basango worker -q listing -q articles -q processed`
- macOS friendly (no forking): `basango worker --simple`
- One-shot draining for CI: `basango worker --burst`
#### Environment
- `BASANGO_REDIS_URL` (default `redis://localhost:6379/0`)
- `BASANGO_QUEUE_PREFIX` (default `crawler`)
- `BASANGO_QUEUE_TIMEOUT` (default `600` seconds)
- `BASANGO_QUEUE_RESULT_TTL` (default `3600` seconds)
- `BASANGO_QUEUE_FAILURE_TTL` (default `3600` seconds)
#### Configuration
- See `config/pipeline.*.yaml` for source definitions and HTTP client settings.
- Use `-c/--env` to select which pipeline to load (default `development`).
-38
View File
@@ -1,38 +0,0 @@
services:
basango:
build: .
container_name: basango-app
restart: unless-stopped
networks:
- basango-network
redis:
image: redis:7-alpine
container_name: basango-redis
restart: unless-stopped
ports:
- "6379:6379"
volumes:
- redis_data:/var/redis
command: redis-server --appendonly yes
networks:
- basango-network
redis-commander:
image: rediscommander/redis-commander:latest
container_name: basango-redis-commander
restart: unless-stopped
ports:
- "8081:8081"
environment:
- REDIS_HOSTS=local:redis:6379
depends_on:
- redis
networks:
- basango-network
networks:
basango-network:
volumes:
redis_data:
-97
View File
@@ -1,97 +0,0 @@
# Fetching and crawling configuration
fetch:
client:
timeout: 20
user_agent: Basango/0.1 (+https://github.com/bernard-ng/basango)
follow_redirects: true
verify_ssl: true
rotate_user_agent: true
max_retries: 3
backoff_initial: 1.0
backoff_multiplier: 2.0
backoff_max: 30.0
respect_retry_after: true
crawler:
notify: false
use_multi_threading: false
max_workers: 5
# Source configurations
sources:
html:
- source_id: radiookapi.net
source_url: https://www.radiookapi.net
source_date:
pattern: "/(\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
source_selectors:
articles: ".view-content > .views-row.content-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field-name-body"
article_date: ".views-field-created"
article_categories: ".views-field-field-cat-gorie a"
pagination: "ul.pagination > li a(:last-child)"
pagination_template: "/actualite?page={page}"
supports_categories: false
requires_details: false
requires_rate_limit: false
- source_id: 7sur7.cd
source_url: https://7sur7.cd
source_date:
pattern: "/\\w{3} (\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
categories: [ "politique", "economie", "culture", "sport", "societe" ]
source_selectors:
articles: ".view-content > .row.views-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field.field--name-body"
article_date: ".views-field-created"
pagination: "ul.pagination > li a(:last-child)"
pagination_template: "/index.php/category/{category}?page={page}"
supports_categories: true
requires_details: false
requires_rate_limit: false
- source_id: mediacongo.net
source_url: https://mediacongo.net
source_date:
format: "%d.%m.%Y %H:%M"
source_selectors:
articles: ".for_aitems > .article_other_item"
article_title: "img"
article_link: "a(:first-child)"
article_categories: "a.color_link"
article_body: ".article_ttext"
article_date: ".article_other_about"
pagination: ".nav > a(:last-child)"
pagination_template: "/articles.html?page={page}"
supports_categories: false
requires_details: true
requires_rate_limit: false
- source_id: actualite.cd
source_url: https://actualite.cd
source_date:
pattern: "/(\\d{1}) (\\d{1,2}) (\\d{2}) (\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$4-$3-$2 $5"
source_selectors:
articles: "#views-bootstrap-taxonomy-term-page-2 > div > div"
article_title: "#actu-titre a"
article_link: "#actu-titre a"
article_categories: "#actu-cat a"
article_body: ".views-field.views-field-body"
article_date: "#p-date"
pagination_template: "/actualite?page={page}"
supports_categories: false
requires_details: true
requires_rate_limit: false
wordpress:
- source_id: beto.cd
source_url: https://beto.cd
requires_rate_limit: true
- source_id: newscd.net
source_url: https://newscd.net
-160
View File
@@ -1,160 +0,0 @@
# Fetching and crawling configuration
fetch:
client:
timeout: 20
user_agent: Basango/0.1 (+https://github.com/bernard-ng/basango)
follow_redirects: true
verify_ssl: true
rotate_user_agent: true
max_retries: 3
backoff_initial: 1.0
backoff_multiplier: 2.0
backoff_max: 30.0
respect_retry_after: true
crawler:
notify: false
use_multi_threading: false
max_workers: 5
# Logging configuration
# Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging:
level: "ERROR"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file_logging: true # Enable logging to file
console_logging: true # Enable logging to console
log_file: "pipeline.log" # Log file name
max_log_size: 10485760 # Maximum size of log file before rotation (10MB)
backup_count: 5 # Number of backup log files to keep
# Source configurations
sources:
html:
- source_id: radiookapi.net
source_url: https://www.radiookapi.net
source_date:
pattern: "/(\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
source_selectors:
articles: ".view-content > .views-row.content-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field-name-body"
article_date: ".views-field-created"
article_categories: ".views-field-field-cat-gorie a"
pagination: "ul.pagination > li a(:last-child)"
pagination_template: "/actualite?page={page}"
supports_categories: false
requires_details: false
requires_rate_limit: false
- source_id: 7sur7.cd
source_url: https://7sur7.cd
source_date:
pattern: "/\\w{3} (\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
categories: [ "politique", "economie", "culture", "sport", "societe" ]
source_selectors:
articles: ".view-content > .row.views-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field.field--name-body"
article_date: ".views-field-created"
pagination: "ul.pagination > li a(:last-child)"
pagination_template: "/index.php/category/{category}?page={page}"
supports_categories: true
requires_details: false
requires_rate_limit: false
- source_id: mediacongo.net
source_url: https://mediacongo.net
source_date:
format: "%d.%m.%Y %H:%M"
source_selectors:
articles: ".for_aitems > .article_other_item"
article_title: "img"
article_link: "a(:first-child)"
article_categories: "a.color_link"
article_body: ".article_ttext"
article_date: ".article_other_about"
pagination: ".nav > a(:last-child)"
pagination_template: "/articles.html?page={page}"
supports_categories: false
requires_details: true
requires_rate_limit: false
- source_id: actualite.cd
source_url: https://actualite.cd
source_date:
pattern: "/(\\d{1}) (\\d{1,2}) (\\d{2}) (\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$4-$3-$2 $5"
source_selectors:
articles: "#views-bootstrap-taxonomy-term-page-2 > div > div"
article_title: "#actu-titre a"
article_link: "#actu-titre a"
article_categories: "#actu-cat a"
article_body: ".views-field.views-field-body"
article_date: "#p-date"
pagination_template: "/actualite?page={page}"
supports_categories: false
requires_details: true
requires_rate_limit: false
wordpress:
- source_id: beto.cd
source_url: https://beto.cd
requires_rate_limit: true
- source_id: newscd.net
source_url: https://newscd.net
- source_id: africanewsrdc.net
source_url: https://www.africanewsrdc.net
- source_id: angazainstitute.ac.cd
source_url: https://angazainstitute.ac.cd
- source_id: b-onetv.cd
source_url: https://b-onetv.cd
- source_id: bukavufm.com
source_url: https://bukavufm.com
- source_id: changement7.net
source_url: https://changement7.net
- source_id: congoactu.net
source_url: https://congoactu.net
- source_id: congoindependant.com
source_url: https://www.congoindependant.com
- source_id: congoquotidien.com
source_url: https://www.congoquotidien.com
- source_id: cumulard.cd
source_url: https://www.cumulard.cd
- source_id: environews-rdc.net
source_url: https://environews-rdc.net
- source_id: freemediardc.info
source_url: https://www.freemediardc.info
- source_id: geopolismagazine.org
source_url: https://geopolismagazine.org
- source_id: habarirdc.net
source_url: https://habarirdc.net
- source_id: infordc.com
source_url: https://infordc.com
- source_id: kilalopress.net
source_url: https://kilalopress.net
- source_id: laprosperiteonline.net
source_url: https://laprosperiteonline.net
- source_id: laprunellerdc.cd
source_url: https://laprunellerdc.cd
- source_id: lesmedias.net
source_url: https://lesmedias.net
- source_id: lesvolcansnews.net
source_url: https://lesvolcansnews.net
- source_id: netic-news.net
source_url: https://www.netic-news.net
- source_id: objectif-infos.cd
source_url: https://objectif-infos.cd
- source_id: scooprdc.net
source_url: https://scooprdc.net
- source_id: journaldekinshasa.com
source_url: https://www.journaldekinshasa.com
- source_id: lepotentiel.cd
source_url: https://lepotentiel.cd
- source_id: acturdc.com
source_url: https://acturdc.com
- source_id: matininfos.net
source_url: https://matininfos.net
-160
View File
@@ -1,160 +0,0 @@
# Fetching and crawling configuration
fetch:
client:
timeout: 20
user_agent: Basango/0.1 (+https://github.com/bernard-ng/basango)
follow_redirects: true
verify_ssl: true
rotate: true
max_retries: 3
backoff_initial: 1.0
backoff_multiplier: 2.0
backoff_max: 30.0
respect_retry_after: true
crawler:
notify: false
use_multi_threading: false
max_workers: 5
# Logging configuration
# Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file_logging: true # Enable logging to file
console_logging: true # Enable logging to console
log_file: "pipeline.log" # Log file name
max_log_size: 10485760 # Maximum size of log file before rotation (10MB)
backup_count: 5 # Number of backup log files to keep
# Source configurations
sources:
html:
- source_id: radiookapi.net
source_url: https://www.radiookapi.net
source_date:
pattern: "/(\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
source_selectors:
articles: ".view-content > .views-row.content-row"
article_title: "h1.page-header"
article_link: ".views-field-title a"
article_body: ".field-name-body"
article_date: ".views-field-created"
article_categories: ".views-field-field-cat-gorie a"
pagination: "ul.pagination > li.pager-last > a"
pagination_template: "actualite"
supports_categories: false
requires_details: true
requires_rate_limit: false
- source_id: 7sur7.cd
source_url: https://7sur7.cd
source_date:
pattern: "/\\w{3} (\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
categories: [ "politique", "economie", "culture", "sport", "societe" ]
source_selectors:
articles: ".view-content > .row.views-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field.field--name-body"
article_date: ".views-field-created"
pagination: "ul.pagination > li.pager__item.pager__item--last > a"
pagination_template: "index.php/category/{category}"
supports_categories: true
requires_details: false
requires_rate_limit: false
- source_id: mediacongo.net
source_url: https://www.mediacongo.net
source_date:
format: "%d.%m.%Y %H:%M"
source_selectors:
articles: ".for_aitems > .article_other_item"
article_title: "img"
article_link: "a:first-child"
article_categories: "a.color_link"
article_body: ".article_ttext"
article_date: ".article_other_about"
pagination: "div.pagination > div > a:last-child"
pagination_template: "articles.html"
supports_categories: false
requires_details: true
requires_rate_limit: false
- source_id: actualite.cd
source_url: https://actualite.cd
source_date:
pattern: "/(\\d{1}) (\\d{1,2}) (\\d{2}) (\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$4-$3-$2 $5"
source_selectors:
articles: "#views-bootstrap-taxonomy-term-page-2 > div > div"
article_title: "#actu-titre a"
article_link: "#actu-titre a"
article_categories: "#actu-cat a"
article_body: ".views-field.views-field-body"
article_date: "#p-date"
pagination_template: "actualite"
supports_categories: false
requires_details: true
requires_rate_limit: false
wordpress:
- source_id: beto.cd
source_url: https://beto.cd
requires_rate_limit: true
- source_id: newscd.net
source_url: https://newscd.net
- source_id: africanewsrdc.net
source_url: https://www.africanewsrdc.net
- source_id: angazainstitute.ac.cd
source_url: https://angazainstitute.ac.cd
- source_id: b-onetv.cd
source_url: https://b-onetv.cd
- source_id: bukavufm.com
source_url: https://bukavufm.com
- source_id: changement7.net
source_url: https://changement7.net
- source_id: congoactu.net
source_url: https://congoactu.net
- source_id: congoindependant.com
source_url: https://www.congoindependant.com
- source_id: congoquotidien.com
source_url: https://www.congoquotidien.com
- source_id: cumulard.cd
source_url: https://www.cumulard.cd
- source_id: environews-rdc.net
source_url: https://environews-rdc.net
- source_id: freemediardc.info
source_url: https://www.freemediardc.info
- source_id: geopolismagazine.org
source_url: https://geopolismagazine.org
- source_id: habarirdc.net
source_url: https://habarirdc.net
- source_id: infordc.com
source_url: https://infordc.com
- source_id: kilalopress.net
source_url: https://kilalopress.net
- source_id: laprosperiteonline.net
source_url: https://laprosperiteonline.net
- source_id: laprunellerdc.cd
source_url: https://laprunellerdc.cd
- source_id: lesmedias.net
source_url: https://lesmedias.net
- source_id: lesvolcansnews.net
source_url: https://lesvolcansnews.net
- source_id: netic-news.net
source_url: https://www.netic-news.net
- source_id: objectif-infos.cd
source_url: https://objectif-infos.cd
- source_id: scooprdc.net
source_url: https://scooprdc.net
- source_id: journaldekinshasa.com
source_url: https://www.journaldekinshasa.com
- source_id: lepotentiel.cd
source_url: https://lepotentiel.cd
- source_id: acturdc.com
source_url: https://acturdc.com
- source_id: matininfos.net
source_url: https://matininfos.net
-39
View File
@@ -1,39 +0,0 @@
[project]
name = "basango"
version = "0.1.0"
description = "Basango : Web Scrapper for DRC News"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"pydantic>=2.11.7",
"pydantic-settings>=2.10.1",
"rq>=2.5.0",
"typer>=0.16.1",
"uv-build>=0.8.12,<0.9.0",
"pyyaml>=6.0.2",
"httpx>=0.27.2",
"trafilatura>=1.7.0",
"selectolax>=0.3.20",
"markdownify>=0.13.1",
"readability-lxml>=0.8.1",
"beautifulsoup4>=4.13.5",
"tiktoken>=0.12.0",
]
[dependency-groups]
dev = [
"bandit>=1.8.6",
"pyright>=1.1.404",
"pytest>=8.4.1",
"ruff>=0.12.9",
]
[project.scripts]
basango = "basango:main"
[build-system]
requires = ["uv_build>=0.8.12,<0.9.0"]
build-backend = "uv_build"
[tool.pytest.ini_options]
testpaths = ["tests"]
-9
View File
@@ -1,9 +0,0 @@
def main() -> None:
# Lazy import to avoid importing CLI deps during package import
from basango.cli import app
app()
if __name__ == "__main__": # pragma: no cover
main()
-186
View File
@@ -1,186 +0,0 @@
"""
CLI entry points for crawling and worker management.
Sync vs async usage
- Synchronous crawl: runs the selected crawler in-process and writes results
via configured persistors (CSV/JSON). Suitable for local development or
small runs.
- Asynchronous crawl: enqueues a listing job in Redis (RQ) and returns
immediately. One or more RQ workers must be running to process jobs.
Examples
- Sync: `basango crawl --source-id my-source --page 1:3`
- Async: `basango crawl --source-id my-source --async`
- Worker (macOS friendly): `basango worker --simple -q articles`
Environment
- `BASANGO_REDIS_URL` points the worker/queues to Redis.
- `BASANGO_QUEUE_PREFIX` namespaces queues (default: `crawler`).
"""
from typing import List, Optional
from enum import Enum
import typer
from basango.core.config import CrawlerConfig
from basango.core.config_manager import ConfigManager
from basango.domain import DateRange, PageRange, UpdateDirection
from basango.services import JsonPersistor
from basango.services.crawler.async_api import (
QueueSettings,
schedule_async_crawl,
start_worker,
)
from basango.services.crawler.html_crawler import HtmlCrawler
from basango.services.crawler.wordpress_crawler import WordpressCrawler
app = typer.Typer(no_args_is_help=True, add_completion=False)
class QueueName(str, Enum):
listing = "listing"
articles = "articles"
processed = "processed"
@app.command("crawl")
def crawl_cmd(
source_id: str = typer.Option(
..., help="Source id to crawl (as defined in config)"
),
page: str = typer.Option(None, "--page", "-p", help="Page range e.g. '1:10'"),
date: str = typer.Option(
None, "--date", "-d", help="Date range e.g. '2024-10-01:2024-10-31'"
),
category: str = typer.Option(None, "--category", "-g", help="Optional category"),
notify: bool = typer.Option(False, "--notify", "-n", help="Enable notifications"),
env: str = typer.Option("development", "--env", "-c", help="Environment"),
async_mode: bool = typer.Option(
False,
"--async/--no-async",
help="Schedule crawl through Redis queues instead of running synchronously.",
),
) -> None:
"""Crawl a single source, either synchronously or via the async queue.
Technical notes
- When `--async` is set, we only enqueue a job (no crawling happens here).
This keeps the CLI responsive and leaves fault-tolerance to RQ workers.
- Persistors (CSV/JSON) are instantiated only for the sync path; the async
path assigns them inside worker tasks to avoid importing heavy deps in the
CLI process and to better isolate failures.
"""
manager = ConfigManager()
pipeline = manager.get(env)
manager.ensure_directories(pipeline)
manager.setup_logging(pipeline)
source = pipeline.sources.find(source_id)
if source is None:
raise typer.BadParameter(f"Source '{source_id}' not found in config")
if async_mode:
job_id = schedule_async_crawl(
source_id=source_id,
env=env,
page_range=page,
date_range=date,
category=category,
)
typer.echo(
f"Scheduled async crawl job {job_id} for source '{source_id}' on queue"
)
return
crawler_config = CrawlerConfig(
source=source,
page_range=PageRange.create(page) if page else None,
date_range=DateRange.create(date) if date else None,
category=category,
notify=notify,
direction=UpdateDirection.FORWARD,
)
crawlers = [
HtmlCrawler,
WordpressCrawler,
]
source_identifier = getattr(source, "source_id", source_id) or source_id
persistors = [
JsonPersistor(
data_dir=pipeline.paths.data,
source_id=str(source_identifier),
),
]
for crawler in crawlers:
if crawler.supports() == source.source_kind:
crawler = crawler(
crawler_config,
pipeline.fetch.client,
persistors=persistors,
)
crawler.fetch()
break
@app.command("worker")
def worker_cmd(
queue: Optional[List[QueueName]] = typer.Option(
None,
"--queue",
"-q",
help=(
"Queue name(s) (without prefix). Choices: listing, articles, processed. "
"Provide multiple times to listen to more than one queue."
),
),
simple: bool = typer.Option(
False,
"--simple/--no-simple",
help=(
"Run jobs in-process using RQ SimpleWorker (no forking). "
"Recommended on macOS to avoid fork-related crashes."
),
),
burst: bool = typer.Option(
False,
"--burst",
help="Process available jobs and exit instead of running continuously.",
),
redis_url: str = typer.Option(
None,
"--redis-url",
help="Redis connection URL. Defaults to BASANGO_REDIS_URL.",
),
env: str = typer.Option(
"development",
"--env",
"-c",
help="Environment used to configure logging before starting the worker.",
),
) -> None:
"""Run an RQ worker that consumes crawler queues.
Notes
- By default the worker listens to the `articles` queue (detail jobs). Use
`-q listing -q articles -q processed` to listen to multiple.
- `--simple` uses RQ's SimpleWorker (no forking). On macOS this avoids
fork-related crashes when libraries aren't fork-safe.
- Use `--burst` to drain the queue and exit, useful for CI or one-off runs.
"""
manager = ConfigManager()
pipeline = manager.get(env)
manager.ensure_directories(pipeline)
manager.setup_logging(pipeline)
settings = QueueSettings(redis_url=redis_url) if redis_url else QueueSettings()
queue_names = [q.value for q in queue] if queue else None
start_worker(
queue_names=queue_names,
settings=settings,
burst=burst,
simple=simple,
)
@@ -1,19 +0,0 @@
from .fetch_config import ClientConfig, FetchConfig, CrawlerConfig
from .logging_config import LoggingConfig
from .pipeline_config import PipelineConfig
from .source_config import (
WordPressSourceConfig,
HtmlSourceConfig,
SourcesConfig,
)
__all__ = [
"ClientConfig",
"FetchConfig",
"CrawlerConfig",
"LoggingConfig",
"PipelineConfig",
"WordPressSourceConfig",
"HtmlSourceConfig",
"SourcesConfig",
]
@@ -1,71 +0,0 @@
from typing import Optional, Union
from pydantic import BaseModel, Field
from basango.domain import PageRange, DateRange, UpdateDirection
from basango.core.config.source_config import HtmlSourceConfig, WordPressSourceConfig
class ClientConfig(BaseModel):
timeout: float = Field(default=20.0, description="Request timeout in seconds")
user_agent: str = Field(
default="Basango/0.1 (+https://github.com/bernard-ng/basango)"
)
follow_redirects: bool = Field(default=True, description="Follow HTTP redirects")
verify_ssl: bool = Field(default=True, description="Verify SSL certificates")
rotate: bool = Field(default=True, description="Rotate User-Agent header")
max_retries: int = Field(
default=3, description="Maximum number of retries on failure"
)
backoff_initial: float = Field(
default=1.0, description="Initial backoff delay in seconds"
)
backoff_multiplier: float = Field(default=2.0, description="Backoff multiplier")
backoff_max: float = Field(
default=30.0, description="Maximum backoff delay in seconds"
)
respect_retry_after: bool = Field(
default=True, description="Respect Retry-After header if present"
)
class CrawlerConfig(BaseModel):
source: Optional[Union[HtmlSourceConfig, WordPressSourceConfig]] = Field(
default=None, description="Source configuration to crawl"
)
page_range: Optional[PageRange] = Field(
default=None, description="Page range to crawl, e.g: 1:10"
)
date_range: Optional[DateRange] = Field(
default=None,
description="Date range to filter articles, e.g: 2024-10-01:2024-10-31",
)
category: Optional[str] = Field(
default=None, description="Optional category to filter articles"
)
notify: bool = Field(
default=False, description="Enable notifications after crawling"
)
is_update: bool = Field(
default=False,
description="Whether this crawl is an update (True) or a full crawl (False)",
)
use_multi_threading: bool = Field(
default=False, description="Enable multiprocessing for concurrent crawling"
)
max_workers: int = Field(
default=5, description="Maximum number of concurrent crawling workers"
)
direction: UpdateDirection = Field(
default=UpdateDirection.FORWARD, description="Crawling direction"
)
class FetchConfig(BaseModel):
client: ClientConfig = Field(
default_factory=ClientConfig, description="Http client configuration"
)
crawler: CrawlerConfig = Field(
default_factory=CrawlerConfig, description="Crawler configuration"
)
@@ -1,11 +0,0 @@
from pydantic import BaseModel
class LoggingConfig(BaseModel):
level: str = "INFO"
format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file_logging: bool = True
console_logging: bool = True
log_file: str = "pipeline.log"
max_log_size: int = 10 * 1024 * 1024 # 10MB
backup_count: int = 5
@@ -1,25 +0,0 @@
from pathlib import Path
from pydantic import Field, BaseModel
from basango.core.config.fetch_config import FetchConfig
from basango.core.config.logging_config import LoggingConfig
from basango.core.config.source_config import SourcesConfig
from basango.core.project_paths import ProjectPaths
def _default_project_paths() -> ProjectPaths:
"""Create default project paths relative to the project root."""
root = Path.cwd()
return ProjectPaths(
root=root,
configs=root / "config",
data=root / "data" / "dataset",
logs=root / "data" / "logs",
)
class PipelineConfig(BaseModel):
paths: ProjectPaths = Field(default_factory=_default_project_paths, alias="paths")
logging: LoggingConfig = Field(default_factory=LoggingConfig)
fetch: FetchConfig = Field(default_factory=FetchConfig)
sources: SourcesConfig = Field(default_factory=SourcesConfig)
@@ -1,66 +0,0 @@
from typing import Union
from pydantic import BaseModel, Field, HttpUrl
from basango.domain import SourceDate, SourceKind, SourceSelectors
class SourceConfigBase(BaseModel):
source_id: str = Field(..., description="Unique identifier for the source")
source_url: HttpUrl = Field(..., description="URL of the source")
source_date: SourceDate = Field(
default_factory=SourceDate, description="Date extraction schema"
)
source_kind: SourceKind = Field(
..., description="Type of the source, e.g., 'wordpress' or 'html'"
)
categories: list[str] = Field(
default_factory=list, description="List of categories to filter articles"
)
supports_categories: bool = Field(
default=False, description="the source supports categories"
)
requires_details: bool = Field(
default=False, description="detailed article is required to compute date range"
)
requires_rate_limit: bool = Field(
default=False, description="requires rate limit to avoid being blocked"
)
class WordPressSourceConfig(SourceConfigBase):
source_kind: SourceKind = Field(
default=SourceKind.WORDPRESS, description="Type of the source"
)
source_date: SourceDate = SourceDate(
format="%Y-%m-%dT%H:%M:%S", pattern=None, replacement=None
)
class HtmlSourceConfig(SourceConfigBase):
source_kind: SourceKind = Field(
default=SourceKind.HTML, description="Type of the source"
)
source_selectors: SourceSelectors = Field(
default_factory=lambda: SourceSelectors(),
description="CSS selectors for extracting articles",
)
pagination_template: str = Field(
..., description="Template URL for pagination, e.g., '/actualite?page={page}'"
)
class SourcesConfig(BaseModel):
html: list[HtmlSourceConfig] = Field(
default_factory=list, description="List of source configurations"
)
wordpress: list[WordPressSourceConfig] = Field(
default_factory=list, description="List of source configurations"
)
def find(self, source_id: str) -> Union[HtmlSourceConfig, WordPressSourceConfig]:
for source in self.html + self.wordpress:
if source.source_id == source_id:
return source
raise ValueError(f"Source with id '{source_id}' not found")
@@ -1,149 +0,0 @@
import logging
import sys
from pathlib import Path
from typing import Optional, Union, Dict
import yaml
from basango.core.config import PipelineConfig
from basango.core.project_paths import ProjectPaths
def _ensure_utf8_stream(stream):
try:
if hasattr(stream, "reconfigure"):
stream.reconfigure(encoding="utf-8", errors="replace")
except (AttributeError, ValueError):
return stream
return stream
class ConfigManager:
def __init__(self, config_path: Optional[Union[str, Path]] = None):
self.config_path = Path(config_path) if config_path else self._find_config()
self._config: Optional[PipelineConfig] = None
self._setup_paths()
def get(self, env: Optional[str] = None) -> PipelineConfig:
if env:
path = self.config_path.parent / f"pipeline.{env}.yaml"
if path.exists():
base = self.load().model_dump()
self._override(base, self.load(path).model_dump())
return PipelineConfig(**base)
if self._config is None:
self._config = self.load()
return self._config
def load(self, config_path: Optional[Path] = None) -> PipelineConfig:
"""Load configuration from file"""
self.config_path = config_path if config_path else self._find_config()
if not self.config_path.exists():
logging.warning(
f"Config file not found: {self.config_path}. Using defaults."
)
return self._create_default()
try:
with open(self.config_path, "r") as f:
config_data = yaml.safe_load(f)
if "paths" not in config_data:
config_data["paths"] = self.default_paths.model_dump()
self._config = PipelineConfig(**config_data)
return self._config
except Exception as e:
logging.error(f"Failed to load config from {self.config_path}: {e}")
return self._create_default()
@classmethod
def ensure_directories(cls, cfg: PipelineConfig) -> None:
directories = [cfg.paths.data, cfg.paths.logs, cfg.paths.configs]
for directory in directories:
Path(directory).mkdir(parents=True, exist_ok=True)
logging.info("Ensured all required directories exist")
@classmethod
def setup_logging(cls, cfg: PipelineConfig):
logs_path = cfg.paths.logs
logs_path.mkdir(parents=True, exist_ok=True)
# Setup logging configuration
log_level = getattr(logging, cfg.logging.level.upper(), logging.INFO)
# Create formatter
formatter = logging.Formatter(cfg.logging.format)
# Setup root logger
root_logger = logging.getLogger()
root_logger.setLevel(log_level)
# Clear existing handlers
root_logger.handlers.clear()
_ensure_utf8_stream(sys.stdout)
_ensure_utf8_stream(sys.stderr)
# Console handler
if cfg.logging.console_logging:
console_handler = logging.StreamHandler(
stream=_ensure_utf8_stream(sys.stderr)
)
console_handler.setFormatter(formatter)
root_logger.addHandler(console_handler)
# File handler
if cfg.logging.file_logging:
from logging.handlers import RotatingFileHandler
log_file_path = logs_path / cfg.logging.log_file
file_handler = RotatingFileHandler(
log_file_path,
maxBytes=cfg.logging.max_log_size,
backupCount=cfg.logging.backup_count,
encoding="utf-8",
)
file_handler.setFormatter(formatter)
root_logger.addHandler(file_handler)
@classmethod
def _find_config(cls) -> Path:
possible_paths = [
Path.cwd() / "config" / "pipeline.yaml",
Path.cwd() / "config" / "pipeline.yml",
Path.cwd() / "pipeline.yaml",
Path(__file__).parent.parent.parent.parent / "config" / "pipeline.yaml",
]
for path in possible_paths:
if path.exists():
return path
raise FileNotFoundError(
"No configuration file found in the expected locations."
)
def _setup_paths(self) -> None:
root = Path(__file__).parent.parent.parent.parent
self.default_paths = ProjectPaths(
root=root,
configs=root / "config",
data=root / "data" / "dataset",
logs=root / "data" / "logs",
)
def _create_default(self) -> PipelineConfig:
return PipelineConfig(paths=self.default_paths)
def _override(self, base: Dict, update: Dict):
for key, value in update.items():
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
self._override(base[key], value)
else:
base[key] = value
@@ -1,26 +0,0 @@
from pathlib import Path
from pydantic import BaseModel, field_validator, ConfigDict
class ProjectPaths(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
root: Path
data: Path
logs: Path
configs: Path
@classmethod
@field_validator("*", mode="before")
def convert_to_path(cls, v):
return Path(v) if not isinstance(v, Path) else v
def get_data_path(self, filename: str) -> Path:
return self.data / filename
def get_logs_path(self, filename: str) -> Path:
return self.logs / filename
def get_config_path(self, filename: str) -> Path:
return self.configs / filename
@@ -1,15 +0,0 @@
from .article import Article
from .date_range import DateRange
from .page_range import PageRange
from .source import SourceKind, SourceDate, SourceSelectors
from .update_direction import UpdateDirection
__all__ = [
"Article",
"DateRange",
"PageRange",
"SourceKind",
"SourceDate",
"SourceSelectors",
"UpdateDirection",
]
@@ -1,30 +0,0 @@
from datetime import datetime
from typing import Any, Optional
from pydantic import BaseModel, HttpUrl
from .token_statistics import TokenStatistics
class Article(BaseModel):
title: str
link: HttpUrl
body: str
categories: list[str]
source: str
timestamp: datetime
metadata: Optional[dict[str, Any]] = None
token_statistics: Optional["TokenStatistics"] = None
def to_dict(self) -> dict[str, Any]:
return {
"title": self.title,
"link": str(self.link),
"body": self.body,
"categories": self.categories,
"source": self.source,
"timestamp": int(self.timestamp.timestamp()),
"metadata": self.metadata,
"tokenStatistics": self.token_statistics.to_dict()
if self.token_statistics
else "",
}
@@ -1,64 +0,0 @@
from dataclasses import dataclass
from datetime import datetime, timezone, timedelta
from typing import Optional
def _ensure_utc(dt: datetime) -> datetime:
if dt.tzinfo is None:
return dt.replace(tzinfo=timezone.utc)
return dt
@dataclass(frozen=True)
class DateRange:
start: int # Unix timestamp
end: int # Unix timestamp
def __post_init__(self) -> None:
assert self.start != 0, "[DateRange] Start timestamp cannot be 0"
assert self.end != 0, "[DateRange] End timestamp cannot be 0"
assert self.end >= self.start, (
"[DateRange] End must be greater than or equal to start"
)
def __str__(self) -> str:
return f"{self.start}:{self.end}"
def in_range(self, ts: int) -> bool:
return self.start <= ts <= self.end
def out_range(self, ts: int) -> bool:
return ts < self.start or ts > self.end
def format(self, fmt: str = "%Y-%m-%d") -> str:
start = datetime.fromtimestamp(self.start, tz=timezone.utc).strftime(fmt)
end = datetime.fromtimestamp(self.end, tz=timezone.utc).strftime(fmt)
return f"{start}:{end}"
@classmethod
def create(
cls, spec: str, fmt: str = "%Y-%m-%d", separator: str = ":"
) -> "DateRange":
assert separator != "", "[DateRange] Separator cannot be empty"
assert separator in spec, f"[DateRange] {separator} must be in {spec}"
parts = spec.split(separator)
assert len(parts) == 2, f"[DateRange] Invalid date interval: {spec}"
start = _ensure_utc(datetime.strptime(parts[0], fmt))
end = _ensure_utc(datetime.strptime(parts[1], fmt))
return cls(int(start.timestamp()), int(end.timestamp()))
@classmethod
def backward(cls, date: Optional[datetime] = None, days: int = 30) -> "DateRange":
base = _ensure_utc(date or datetime.now(timezone.utc))
start = base - timedelta(days=days)
end = base + timedelta(days=1) # in future to avoid timezone issues
return cls(int(start.timestamp()), int(end.timestamp()))
@classmethod
def forward(cls, date: datetime) -> "DateRange":
start = _ensure_utc(date)
end = datetime.now(timezone.utc) + timedelta(days=1)
return cls(int(start.timestamp()), int(end.timestamp()))
@@ -1,18 +0,0 @@
from basango.domain import DateRange
class ArticleNotFoundError(Exception):
pass
class ArticleOutOfRange(Exception):
def __init__(self, timestamp: str, date_range: DateRange):
self.timestamp = timestamp
self.date_range = date_range
super().__init__(
f"Article with timestamp {timestamp} is out of range {date_range}"
)
@classmethod
def create(cls, timestamp: str, date_range: DateRange) -> "ArticleOutOfRange":
return cls(timestamp, date_range)
@@ -1,20 +0,0 @@
from dataclasses import dataclass
@dataclass(frozen=True)
class PageRange:
start: int
end: int
@staticmethod
def create(spec: str) -> "PageRange":
parts = spec.split(":")
assert len(parts) == 2, f"[PageRange] Invalid page range: {spec}"
start, end = int(parts[0]), int(parts[1])
assert start >= 0, f"[PageRange] Invalid page range: {spec}"
assert end >= start, f"[PageRange] Invalid page range: {spec}"
return PageRange(start=start, end=end)
def __str__(self):
return f"{self.start}:{self.end}"
@@ -1,41 +0,0 @@
from enum import StrEnum
from typing import Optional
from pydantic import BaseModel, Field
class SourceKind(StrEnum):
WORDPRESS = "wordpress"
HTML = "html"
class SourceDate(BaseModel):
format: str = "%Y-%m-%d %H:%M"
pattern: Optional[str] = None
replacement: Optional[str] = None
class SourceSelectors(BaseModel):
articles: Optional[str] = Field(
default=None, description="CSS selector for the list of articles within a page"
)
article_title: Optional[str] = Field(
default=None, description="CSS selector for the article title"
)
article_link: Optional[str] = Field(
default=None, description="CSS selector for the article link"
)
article_body: Optional[str] = Field(
default=None, description="CSS selector for the article body/content"
)
article_date: Optional[str] = Field(
default=None, description="CSS selector for the article date"
)
article_categories: Optional[str] = Field(
default=None, description="CSS selector for the article categories"
)
pagination: str = Field(
default="ul.pagination > li a",
description="CSS selector for the pagination links",
)
@@ -1,19 +0,0 @@
from dataclasses import dataclass
@dataclass
class TokenStatistics:
"""Counts of tokens for different article sections."""
title: int
body: int
excerpt: int
categories: int
def to_dict(self) -> dict[str, int]:
return {
"title": self.title,
"body": self.body,
"excerpt": self.excerpt,
"categories": self.categories,
}
@@ -1,6 +0,0 @@
from enum import StrEnum
class UpdateDirection(StrEnum):
FORWARD = "forward"
BACKWARD = "backward"
@@ -1,22 +0,0 @@
from .date_parser import DateParser
from .http_client import BaseHttpClient, SyncHttpClient, AsyncHttpClient
from .open_graph import OpenGraphProvider
from .persistence import BasePersistor, CsvPersistor, JsonPersistor
from .user_agents import UserAgents
from .tokenizer import Tokenizer
HttpClient = SyncHttpClient
__all__ = [
"DateParser",
"BaseHttpClient",
"SyncHttpClient",
"AsyncHttpClient",
"HttpClient",
"OpenGraphProvider",
"UserAgents",
"BasePersistor",
"CsvPersistor",
"JsonPersistor",
"Tokenizer",
]
@@ -1,22 +0,0 @@
from .queue import QueueManager, QueueSettings
from .schemas import ListingTaskPayload, ArticleTaskPayload, ProcessedTaskPayload
from .tasks import (
schedule_async_crawl,
collect_listing,
collect_article,
forward_for_processing,
)
from .worker import start_worker
__all__ = [
"QueueManager",
"QueueSettings",
"ListingTaskPayload",
"ArticleTaskPayload",
"ProcessedTaskPayload",
"schedule_async_crawl",
"collect_listing",
"collect_article",
"forward_for_processing",
"start_worker",
]
@@ -1,92 +0,0 @@
"""
RQ queue configuration and helpers.
Design choices
- Queue names are prefixed (e.g. `crawler:articles`) so multiple environments
can share the same Redis. Configure via `BASANGO_QUEUE_PREFIX`.
- Job default timeouts and TTLs are centrally configured to avoid per-enqueue
tuning. Environment variables allow ops to adjust at runtime.
- Task callables are referenced by dotted string path when enqueuing to ensure
RQ workers can import them without importing this module and creating cycles.
"""
import os
from dataclasses import dataclass, field
from typing import Iterable
from redis import Redis
from rq import Queue
from .schemas import (
ArticleTaskPayload,
ListingTaskPayload,
ProcessedTaskPayload,
)
@dataclass(slots=True)
class QueueSettings:
redis_url: str = field(
default_factory=lambda: os.getenv( # type: ignore[arg-type]
"BASANGO_REDIS_URL", "redis://localhost:6379/0"
)
)
prefix: str = field(
default_factory=lambda: os.getenv("BASANGO_QUEUE_PREFIX", "crawler")
)
default_timeout: int = field(
default_factory=lambda: int(os.getenv("BASANGO_QUEUE_TIMEOUT", "600"))
)
result_ttl: int = field(
default_factory=lambda: int(os.getenv("BASANGO_QUEUE_RESULT_TTL", "3600"))
)
failure_ttl: int = field(
default_factory=lambda: int(os.getenv("BASANGO_QUEUE_FAILURE_TTL", "3600"))
)
listing_queue: str = "listing"
article_queue: str = "articles"
processed_queue: str = "processed"
class QueueManager:
def __init__(self, settings: QueueSettings | None = None) -> None:
self.settings = settings or QueueSettings()
self.connection = Redis.from_url(self.settings.redis_url)
self.listing_queue = self._build_queue(self.settings.listing_queue)
self.article_queue = self._build_queue(self.settings.article_queue)
self.processed_queue = self._build_queue(self.settings.processed_queue)
def _build_queue(self, suffix: str) -> Queue:
return Queue(
self.queue_name(suffix),
connection=self.connection,
default_timeout=self.settings.default_timeout,
result_ttl=self.settings.result_ttl,
failure_ttl=self.settings.failure_ttl,
)
def queue_name(self, suffix: str) -> str:
return f"{self.settings.prefix}:{suffix}"
def enqueue_listing(self, payload: ListingTaskPayload):
return self.listing_queue.enqueue(
"basango.services.crawler.async.tasks.collect_listing",
payload.to_dict(),
)
def enqueue_article(self, payload: ArticleTaskPayload):
return self.article_queue.enqueue(
"basango.services.crawler.async.tasks.collect_article",
payload.to_dict(),
)
def enqueue_processed(self, payload: ProcessedTaskPayload):
return self.processed_queue.enqueue(
"basango.services.crawler.async.tasks.forward_for_processing",
payload.to_dict(),
)
def iter_queue_names(self) -> Iterable[str]:
yield self.queue_name(self.settings.listing_queue)
yield self.queue_name(self.settings.article_queue)
yield self.queue_name(self.settings.processed_queue)
@@ -1,64 +0,0 @@
"""
Lightweight task payload schemas.
Notes
- Use dataclasses with `slots=True` for low overhead and predictable fields.
- `_coerce_kwargs` filters unknown keys so payloads are resilient to schema
changes when workers and producers are not updated in lockstep.
"""
from dataclasses import asdict, dataclass, fields
from typing import Any, Mapping
from basango.domain.article import Article
def _coerce_kwargs(cls, data: Mapping[str, Any]) -> dict[str, Any]:
return {field.name: data.get(field.name) for field in fields(cls)}
@dataclass(slots=True)
class ListingTaskPayload:
source_id: str
env: str = "development"
page_range: str | None = None
date_range: str | None = None
category: str | None = None
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: Mapping[str, Any]) -> "ListingTaskPayload":
return cls(**_coerce_kwargs(cls, data))
@dataclass(slots=True)
class ArticleTaskPayload:
source_id: str
env: str = "development"
url: str | None = None
data: Any | None = None
date_range: str | None = None
category: str | None = None
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: Mapping[str, Any]) -> "ArticleTaskPayload":
return cls(**_coerce_kwargs(cls, data))
@dataclass(slots=True)
class ProcessedTaskPayload:
source_id: str
article: Article
env: str = "development"
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: Mapping[str, Any]) -> "ProcessedTaskPayload":
return cls(**_coerce_kwargs(cls, data))
@@ -1,305 +0,0 @@
"""
RQ task functions for the asynchronous crawl pipeline.
Pipeline
- schedule_async_crawl: seeds a listing job for a source
- collect_listing: enumerates listing pages and enqueues detail jobs
- collect_article: extracts and persists article data, then forwards
- forward_for_processing: hands the record to downstream system (HTTP API)
Rationale
- Split listing vs article work to keep jobs small and retryable.
- Use ConfigManager to reconstruct the same pipeline/env in workers.
- Persist locally (CSV/JSON) before forwarding to decouple pipelines.
"""
import os
import logging
from typing import Any
from basango.domain.article import Article
from basango.services import SyncHttpClient
from basango.core.config import CrawlerConfig
from basango.core.config_manager import ConfigManager
from basango.domain import DateRange, PageRange, SourceKind, UpdateDirection
from basango.services import JsonPersistor
from basango.services.crawler.html_crawler import HtmlCrawler
from basango.services.crawler.wordpress_crawler import WordpressCrawler
from .queue import QueueManager, QueueSettings
from .schemas import (
ArticleTaskPayload,
ListingTaskPayload,
ProcessedTaskPayload,
)
logger = logging.getLogger(__name__)
def schedule_async_crawl(
*,
source_id: str,
env: str = "development",
page_range: str | None = None,
date_range: str | None = None,
category: str | None = None,
settings: QueueSettings | None = None,
):
# Keep payload serialisable and minimal; workers reconstruct config objects.
payload = ListingTaskPayload(
source_id=source_id,
env=env,
page_range=page_range,
date_range=date_range,
category=category,
)
manager = QueueManager(settings=settings)
job = manager.enqueue_listing(payload)
logger.info("Scheduled listing collection job %s for source %s", job.id, source_id)
return job.id
def collect_listing(payload: dict[str, Any]) -> int:
data = ListingTaskPayload.from_dict(payload)
manager = ConfigManager()
pipeline = manager.get(data.env)
source = pipeline.sources.find(data.source_id)
if source is None:
logger.error("Unknown source id %s", data.source_id)
return 0
crawler_config = CrawlerConfig(
source=source,
page_range=PageRange.create(data.page_range) if data.page_range else None,
date_range=DateRange.create(data.date_range) if data.date_range else None,
category=data.category,
notify=False,
direction=UpdateDirection.FORWARD,
)
client_config = pipeline.fetch.client
queue_manager = QueueManager()
# Branch by source kind to reuse the same high-level flow with different
# extraction strategies.
if source.source_kind == SourceKind.HTML:
crawler = HtmlCrawler(crawler_config, client_config)
queued = _collect_html_listing(crawler, data, queue_manager)
elif source.source_kind == SourceKind.WORDPRESS:
crawler = WordpressCrawler(crawler_config, client_config)
queued = _collect_wordpress_listing(crawler, data, queue_manager)
else:
logger.warning(
"Async crawling not supported for source kind %s", source.source_kind
)
queued = 0
logger.info("Queued %s article detail jobs for source %s", queued, data.source_id)
return queued
def collect_article(payload: dict[str, Any]) -> Article | None:
data = ArticleTaskPayload.from_dict(payload)
manager = ConfigManager()
pipeline = manager.get(data.env)
source = pipeline.sources.find(data.source_id)
if source is None:
logger.error("Unknown source id %s", data.source_id)
return None
crawler_config = CrawlerConfig(
source=source,
date_range=DateRange.create(data.date_range) if data.date_range else None,
category=data.category,
notify=False,
direction=UpdateDirection.FORWARD,
)
# Persist locally first to keep an auditable trail and enable
# replay/recovery independent of downstream availability.
persistors = [
JsonPersistor(
data_dir=pipeline.paths.data,
source_id=str(source.source_id),
),
]
try:
if source.source_kind == SourceKind.HTML:
article = _collect_html_article(
HtmlCrawler(
crawler_config, pipeline.fetch.client, persistors=persistors
),
data,
)
else:
article = _collect_wordpress_article(
WordpressCrawler(
crawler_config, pipeline.fetch.client, persistors=persistors
),
data,
)
queue_manager = QueueManager()
queue_manager.enqueue_processed(
ProcessedTaskPayload(
source_id=data.source_id,
env=data.env,
article=article,
)
)
logger.info(
"Persisted article %s and forwarded to processed queue", article.link
)
return article
except Exception as exc: # noqa: BLE001
logger.error(
"Failed to collect article for source %s url %s: %s",
data.source_id,
data.url,
exc,
)
return None
def forward_for_processing(payload: dict[str, Any]) -> Article | None:
data = ProcessedTaskPayload.from_dict(payload)
manager = ConfigManager()
pipeline = manager.get(data.env)
article = data.article
logger.info(
"Ready for downstream processing: source=%s link=%s",
data.source_id,
article.link,
)
try:
client = SyncHttpClient(client_config=pipeline.fetch.client)
client.post(
os.getenv(
"BASANGO_API_ENDPOINT",
"http://localhost:8000/api/aggregator/articles?token=dev",
),
json=article.to_dict(),
)
logger.info("Forwarded article %s to API", article.link)
return article
except Exception as exc: # noqa: BLE001
logger.error(
"Failed to forward article %s to API: %s",
article.link,
exc,
)
return None
def _collect_html_listing(
crawler: HtmlCrawler,
payload: ListingTaskPayload,
queue_manager: QueueManager,
) -> int:
source = crawler.source
selector = source.source_selectors.articles
if not selector:
logger.warning(
"No article selector configured for HTML source %s",
source.source_id,
)
return 0
page_range = crawler.config.page_range or crawler.get_pagination()
queued = 0
for page in range(page_range.start, page_range.end + 1):
page_url = crawler._build_page_url(page)
try:
soup = crawler.crawl(page_url, page)
except Exception as exc: # noqa: BLE001
logger.exception("Failed to crawl page %s: %s", page_url, exc)
continue
for node in soup.select(selector):
link = crawler._extract_link(node)
if not link:
continue
queue_manager.enqueue_article(
ArticleTaskPayload(
source_id=payload.source_id,
env=payload.env,
url=link,
date_range=payload.date_range,
category=payload.category,
)
)
queued += 1
return queued
def _collect_wordpress_listing(
crawler: WordpressCrawler,
payload: ListingTaskPayload,
queue_manager: QueueManager,
) -> int:
page_range = crawler.config.page_range or crawler.get_pagination()
queued = 0
for page in range(page_range.start, page_range.end + 1):
endpoint = crawler._posts_endpoint(page)
try:
response = crawler.client.get(endpoint)
articles = response.json()
except Exception as exc: # noqa: BLE001
logger.exception("Failed to fetch WordPress page %s: %s", endpoint, exc)
continue
if not isinstance(articles, list):
logger.warning("Unexpected WordPress payload type: %s", type(articles))
continue
for entry in articles:
queue_manager.enqueue_article(
ArticleTaskPayload(
source_id=payload.source_id,
env=payload.env,
url=entry.get("link"),
data=entry,
date_range=payload.date_range,
category=payload.category,
)
)
queued += 1
return queued
def _collect_html_article(
crawler: HtmlCrawler,
payload: ArticleTaskPayload,
) -> Article:
if not payload.url:
logger.warning("Missing article url for HTML source %s", payload.source_id)
raise ValueError("Missing article url")
crawler._current_article_url = payload.url # type: ignore[attr-defined]
try:
soup = crawler.crawl(payload.url)
except Exception as exc: # noqa: BLE001
logger.exception("Failed to crawl article %s: %s", payload.url, exc)
raise exc
return crawler.fetch_one(str(soup), crawler.config.date_range)
def _collect_wordpress_article(
crawler: WordpressCrawler,
payload: ArticleTaskPayload,
) -> Article:
if payload.data is None:
logger.warning("Missing WordPress payload for source %s", payload.source_id)
raise ValueError("Missing WordPress payload")
return crawler.fetch_one(payload.data, crawler.config.date_range)
@@ -1,41 +0,0 @@
"""
Worker bootstrap for RQ queues.
Defaults to the `articles` queue to prioritise article detail processing.
`SimpleWorker` is exposed for environments where forking is unstable (e.g.,
some macOS setups). Use `burst=True` for CI or one-shot consumption.
"""
import logging
from typing import Sequence
from rq import Queue, Worker, SimpleWorker
from .queue import QueueManager, QueueSettings
logger = logging.getLogger(__name__)
def start_worker(
queue_names: Sequence[str] | None = None,
*,
settings: QueueSettings | None = None,
burst: bool = False,
simple: bool = False,
) -> None:
manager = QueueManager(settings=settings)
if queue_names is None or not list(queue_names):
queue_names = [manager.settings.article_queue]
resolved = [manager.queue_name(name) for name in queue_names]
queues = [Queue(name, connection=manager.connection) for name in resolved]
worker_cls = SimpleWorker if simple else Worker
logger.info(
"Starting RQ %s for queues %s",
worker_cls.__name__,
", ".join(resolved),
)
worker = worker_cls(queues, connection=manager.connection)
worker.work(burst=burst)
@@ -1,39 +0,0 @@
"""
Thin indirection layer around async components (queues, tasks, worker).
We import symbols dynamically to avoid importing optional runtime dependencies
like RQ and Redis at module import time. This keeps regular (sync) crawling
usable even if async deps aren't installed, and avoids circular imports when
RQ workers import task callables by string path.
"""
from importlib import import_module
_async_queue = import_module("basango.services.crawler.async.queue")
_async_tasks = import_module("basango.services.crawler.async.tasks")
_async_worker = import_module("basango.services.crawler.async.worker")
_async_schemas = import_module("basango.services.crawler.async.schemas")
QueueManager = getattr(_async_queue, "QueueManager")
QueueSettings = getattr(_async_queue, "QueueSettings")
ListingTaskPayload = getattr(_async_schemas, "ListingTaskPayload")
ArticleTaskPayload = getattr(_async_schemas, "ArticleTaskPayload")
ProcessedTaskPayload = getattr(_async_schemas, "ProcessedTaskPayload")
schedule_async_crawl = getattr(_async_tasks, "schedule_async_crawl")
collect_listing = getattr(_async_tasks, "collect_listing")
collect_article = getattr(_async_tasks, "collect_article")
forward_for_processing = getattr(_async_tasks, "forward_for_processing")
start_worker = getattr(_async_worker, "start_worker")
__all__ = [
"QueueManager",
"QueueSettings",
"ListingTaskPayload",
"ArticleTaskPayload",
"ProcessedTaskPayload",
"schedule_async_crawl",
"collect_listing",
"collect_article",
"forward_for_processing",
"start_worker",
]
@@ -1,161 +0,0 @@
import logging
from abc import ABC, abstractmethod
from dataclasses import asdict, is_dataclass
from datetime import datetime
from typing import Optional, Any, Dict, List, Sequence
from basango.domain.article import Article
from bs4 import BeautifulSoup
from pydantic import HttpUrl
from basango.core.config import CrawlerConfig, ClientConfig
from basango.domain import DateRange, SourceKind, PageRange
from basango.domain.exception import ArticleOutOfRange
from basango.services import (
HttpClient,
DateParser,
OpenGraphProvider,
BasePersistor,
Tokenizer,
)
class BaseCrawler(ABC):
"""
Base building blocks shared by concrete crawlers.
Notable conventions
- `skip`: raises `ArticleOutOfRange` when an item falls outside the desired
date range. Callers catch it to stop pagination early.
- `record_article`: normalises metadata (including dataclasses) before
handing off to persistors.
"""
def __init__(
self,
crawler_config: CrawlerConfig,
client_config: ClientConfig,
persistors: Sequence[BasePersistor] | None = None,
) -> None:
self.config = crawler_config
self.source = crawler_config.source
self.client = HttpClient(client_config=client_config)
self.persistors: list[BasePersistor] = list(persistors) if persistors else []
self.date_parser = DateParser()
self.open_graph = OpenGraphProvider()
self.tokenizer = Tokenizer()
@abstractmethod
def fetch(self) -> None:
pass
def crawl(self, url: str, page: Optional[int] = None) -> BeautifulSoup:
if page is not None:
logging.info(f"> Page {page}")
response = self.client.get(url).text
return BeautifulSoup(response, "html.parser")
def save_article(
self,
*,
title: str,
link: str,
body: str,
categories: List[str],
timestamp: int,
metadata: Any = None,
) -> Article:
if metadata is None:
metadata_value = None
elif is_dataclass(metadata) and not isinstance(metadata, type):
metadata_value = asdict(metadata)
elif isinstance(metadata, dict):
metadata_value = metadata
else:
metadata_value = None
# Get source_id and ensure it's a string
source_id = getattr(self.source, "source_id", None)
if source_id is None:
source_id = "unknown"
article = Article(
title=title,
link=HttpUrl(link), # Convert str to HttpUrl
body=body,
categories=categories,
source=source_id, # Ensure it's a string, not None
timestamp=datetime.fromtimestamp(
timestamp
), # Convert int timestamp to datetime
metadata=metadata_value,
)
article.token_statistics = self.tokenizer.count_tokens(
article.title, article.body, article.categories
)
self._persist(article.to_dict())
logging.info("> %s [saved]", article.title)
return article
@abstractmethod
def fetch_one(
self, html: str, date_range: Optional[DateRange] = None
) -> Article | None:
pass
@abstractmethod
def get_pagination(self) -> PageRange:
pass
def get_last_page(self) -> int:
return 1
@staticmethod
@abstractmethod
def supports() -> SourceKind:
pass
@classmethod
def initialize(cls) -> None:
logging.info("Initializing Crawler")
def completed(self, notify: bool = False) -> None:
logging.info("Crawling completed")
if notify:
logging.info("Sending notification about completion")
# TODO: Implement notification logic here
self._shutdown_persistors()
@classmethod
def skip(cls, date_range: DateRange, timestamp: str, title: str, date: str) -> None:
if date_range.out_range(int(timestamp)):
# Use an exception to unwind to the crawl loop and stop as soon as
# we detect items beyond the configured range.
raise ArticleOutOfRange.create(timestamp, date_range)
logging.warning(f"> {title} [Skipped {date}]")
def _persist(self, article: Dict[str, Any]) -> None:
for persistor in self.persistors:
try:
persistor.persist(article)
except Exception as exc: # noqa: BLE001
logging.exception(
"Failed to persist article via %s: %s",
persistor.__class__.__name__,
exc,
)
def _shutdown_persistors(self) -> None:
for persistor in self.persistors:
try:
persistor.close()
except Exception as exc: # noqa: BLE001
logging.exception(
"Failed to close persistor %s: %s",
persistor.__class__.__name__,
exc,
)
@@ -1,322 +0,0 @@
import logging
import re
from datetime import datetime, timezone
from typing import Optional, cast, override, Sequence
from urllib.parse import parse_qs, urljoin, urlparse
from basango.domain.article import Article
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify
from basango.core.config import CrawlerConfig, ClientConfig
from basango.core.config.source_config import HtmlSourceConfig
from basango.domain import DateRange, PageRange, SourceKind
from basango.domain.exception import ArticleOutOfRange
from basango.services.crawler.base_crawler import BaseCrawler
from basango.services import BasePersistor
class HtmlCrawler(BaseCrawler):
"""
Generic HTML crawler driven by CSS selectors.
Strategy
- Listing pages are iterated to extract per-article links or blocks.
- When `requires_details` is set, a second request fetches the article page
to extract full content; otherwise the article block is parsed inline.
- Pagination is inferred from a template and last-page discovery heuristics
(regex or query string `page` fallback).
"""
def __init__(
self,
crawler_config: CrawlerConfig,
client_config: ClientConfig,
persistors: Sequence[BasePersistor] | None = None,
) -> None:
super().__init__(crawler_config, client_config, persistors=persistors)
if not self.source or self.source.source_kind != SourceKind.HTML:
raise ValueError("HtmlCrawler requires a source of kind HTML")
self.source = cast(HtmlSourceConfig, self.source)
self._current_article_url: Optional[str] = None
@override
def fetch(self) -> None:
self.initialize()
page_range = self.config.page_range or self.get_pagination()
date_range = self.config.date_range
article_selector = self.source.source_selectors.articles
if not article_selector:
logging.error(
"No article selector configured for HTML source %s",
self.source.source_id,
)
return
stop = False
for page_number in range(page_range.start, page_range.end + 1):
page_url = self._build_page_url(page_number)
try:
soup = self.crawl(page_url, page_number)
except Exception as exc: # noqa: BLE001
logging.error(
"> page %s => %s [failed]",
page_number,
exc,
)
continue
articles = soup.select(article_selector)
if not articles:
logging.info("No articles found on page %s", page_number)
continue
for article in articles:
try:
self._current_article_url = self._extract_link(article)
target_html = str(article)
if self.source.requires_details:
if not self._current_article_url:
logging.debug(
"Skipping article without link for details on page %s",
page_number,
)
continue
try:
detail_soup = self.crawl(self._current_article_url)
target_html = str(detail_soup)
except Exception as detail_exc: # noqa: BLE001
logging.error(
"Failed to fetch detail page %s: %s",
self._current_article_url,
detail_exc,
)
continue
self.fetch_one(target_html, date_range)
except ArticleOutOfRange:
# Using an exception to short-circuit nested loops keeps the
# happy path tidy (no extra flags at each extraction site).
logging.info("No more articles to fetch in this range.")
stop = True
break
except Exception as exc: # noqa: BLE001
logging.error(
"Failed to process article on %s: %s",
page_url,
exc,
)
finally:
self._current_article_url = None
if stop:
break
self.completed(self.config.notify)
@override
def fetch_one(self, html: str, date_range: Optional[DateRange] = None) -> Article:
soup = BeautifulSoup(html, "html.parser")
selectors = self.source.source_selectors
title = self._extract_text(soup, selectors.article_title) or "Untitled"
link = self._current_article_url or self._extract_link(soup)
if not link:
logging.warning("Skipping article '%s' without link", title)
raise ValueError("Missing article link")
body = self._extract_body(soup, selectors.article_body)
categories = self._extract_categories(soup, selectors.article_categories)
if not categories and self.config.category:
categories = [self.config.category]
raw_date = self._extract_text(soup, selectors.article_date)
timestamp = self._compute_timestamp(raw_date)
if date_range and not date_range.in_range(timestamp):
self.skip(date_range, str(timestamp), title, raw_date or "")
metadata = self.open_graph.consume_html(html)
return self.save_article(
title=title,
link=link,
body=body,
categories=categories,
timestamp=timestamp,
metadata=metadata,
)
@override
def get_pagination(self) -> PageRange:
return PageRange.create(f"0:{self.get_last_page()}")
@override
def get_last_page(self) -> int:
if not self.source:
return 1
if self.source.supports_categories and self.config.category:
path = self.source.pagination_template.replace(
"{category}", self.config.category
)
else:
path = self.source.pagination_template
links = self.crawl(f"{self.source.source_url}{path}").select(
self.source.source_selectors.pagination
)
if not links:
return 1
href = links[-1].get("href")
if not href or not isinstance(href, str):
return 1
# Heuristic: last pagination link either contains the page number
# directly or as a `page` query param. Prefer regex first to support
# path-style pagination (e.g., /page/4/).
match = re.search(r"(\d+)", href)
if match:
return int(match.group(1))
queries = parse_qs(urlparse(href).query)
if "page" in queries and queries["page"]:
try:
return int(queries["page"][0])
except ValueError:
return 1
return 1
@staticmethod
@override
def supports() -> SourceKind:
return SourceKind.HTML
def _build_page_url(self, page: int) -> str:
template = self._apply_category(self.source.pagination_template)
if "{page}" in template:
template = template.format(page=page)
elif page > 0:
separator = "&" if "?" in template else "?"
template = f"{template}{separator}page={page}"
base = str(self.source.source_url)
if not base.endswith("/"):
base = f"{base}/"
return urljoin(base, template.lstrip("/"))
def _apply_category(self, template: str) -> str:
if "{category}" in template:
replacement = self.config.category or ""
return template.replace("{category}", replacement)
return template
def _extract_link(self, node: BeautifulSoup | Tag) -> Optional[str]:
selector = self.source.source_selectors.article_link
if not selector:
return None
target = node.select_one(selector)
if not target:
return None
# Support a few common attributes for link-like elements (href,
# data-href, src) to tolerate variations in markup without custom code.
raw_href = target.get("href") or target.get("data-href") or target.get("src")
href: Optional[str]
if isinstance(raw_href, str):
href = raw_href.strip() or None
elif isinstance(raw_href, list):
href = next(
(
item.strip()
for item in raw_href
if isinstance(item, str) and item.strip()
),
None,
)
else:
href = None
if not href:
return None
return self._to_absolute_url(href)
def _to_absolute_url(self, href: str) -> str:
base = str(self.source.source_url)
if not base.endswith("/"):
base = f"{base}/"
return urljoin(base, href)
@staticmethod
def _extract_text(
node: BeautifulSoup | Tag, selector: Optional[str]
) -> Optional[str]:
if not selector:
return None
target = node.select_one(selector)
if not target:
return None
if target.name == "img":
for attr in ("alt", "title"):
value = target.get(attr)
if isinstance(value, str):
stripped = value.strip()
if stripped:
return stripped
elif isinstance(value, list):
for item in value:
if isinstance(item, str):
stripped = item.strip()
if stripped:
return stripped
text = target.get_text(" ", strip=True)
return text or None
@staticmethod
def _extract_body(node: BeautifulSoup | Tag, selector: Optional[str]) -> str:
if selector:
matches = node.select(selector)
if matches:
parts = [
markdownify(item.get_text(" ", strip=False), heading_style="ATX")
for item in matches
if item.get_text(strip=True)
]
if parts:
# Join without separators: callers can post-process if
# needed, but this preserves maximum fidelity.
return "\n".join(parts)
return markdownify(node.get_text(" ", strip=False), heading_style="ATX")
@staticmethod
def _extract_categories(
node: BeautifulSoup | Tag, selector: Optional[str]
) -> list[str]:
if not selector:
return []
values: list[str] = []
for item in node.select(selector):
text = item.get_text(" ", strip=True)
if text:
lower = text.lower()
if lower not in values:
values.append(lower)
return values
def _compute_timestamp(self, raw_date: Optional[str]) -> int:
if not raw_date:
return int(datetime.now(timezone.utc).timestamp())
return self.date_parser.create_timestamp(
raw_date.strip(),
fmt=self.source.source_date.format,
pattern=self.source.source_date.pattern,
replacement=self.source.source_date.replacement,
)
@@ -1,187 +0,0 @@
import json
import logging
from datetime import datetime, timezone
from typing import Optional, override, cast, Final, Any, Sequence
from markdownify import markdownify
from basango.domain.article import Article
from bs4 import BeautifulSoup
from basango.core.config import WordPressSourceConfig, CrawlerConfig, ClientConfig
from basango.domain import SourceKind, PageRange, DateRange
from basango.domain.exception import ArticleOutOfRange
from basango.services.crawler.base_crawler import BaseCrawler
from basango.services import BasePersistor
class WordpressCrawler(BaseCrawler):
"""
WordPress REST API crawler.
It uses the `/wp-json/wp/v2/posts` endpoints and limits fields to reduce
payload size. Pagination is driven by WordPress headers `x-wp-totalpages`
and `x-wp-total`. Category IDs are mapped to slugs via a secondary endpoint
and cached per run.
"""
def __init__(
self,
crawler_config: CrawlerConfig,
client_config: ClientConfig,
persistors: Sequence[BasePersistor] | None = None,
) -> None:
super().__init__(crawler_config, client_config, persistors=persistors)
if not self.source or self.source.source_kind != SourceKind.WORDPRESS:
raise ValueError("WordpressCrawler requires a source of kind WORDPRESS")
self.source = cast(WordPressSourceConfig, self.source)
self.category_map: dict[int, str] = {}
POST_QUERY: Final = "_fields=date,slug,link,title.rendered,content.rendered,categories&orderby=date&order=desc"
CATEGORY_QUERY: Final = (
"_fields=id,slug,count&orderby=count&order=desc&per_page=100"
)
TOTAL_PAGES_HEADER: Final = "x-wp-totalpages"
TOTAL_POSTS_HEADER: Final = "x-wp-total"
@override
def fetch(self) -> None:
self.initialize()
page_range = self.config.page_range or self.get_pagination()
date_range = self.config.date_range
stop = False
for page_number in range(page_range.start, page_range.end + 1):
endpoint = self._posts_endpoint(page_number)
try:
response = self.client.get(endpoint)
payload = response.text
articles = json.loads(payload)
except Exception as exc: # noqa: BLE001
logging.error(
"> page %s => %s [failed]",
page_number,
exc,
)
continue
for article in articles:
try:
self.fetch_one(article, date_range)
except ArticleOutOfRange:
# Same early-exit semantic as HtmlCrawler
logging.info("No more articles to fetch in this range.")
stop = True
break
except Exception as exc: # noqa: BLE001
logging.error(
"Failed to process WordPress article on page %s: %s",
page_number,
exc,
)
if stop:
break
self.completed(self.config.notify)
@override
def fetch_one(self, html: Any, date_range: Optional[DateRange] = None) -> Article:
try:
data = json.loads(html) if isinstance(html, str) else html
except json.JSONDecodeError as exc:
logging.error("Failed to decode WordPress payload: %s", exc)
raise exc
if not isinstance(data, dict):
logging.error("Skipping unexpected WordPress payload: %s", type(data))
raise ValueError("Unexpected WordPress payload type")
link = data.get("link")
if not link:
logging.error("Skipping WordPress article without link")
raise ValueError("WordPress article without link")
title_html = data.get("title", {}).get("rendered", "")
body_html = data.get("content", {}).get("rendered", "")
title = BeautifulSoup(title_html, "html.parser").get_text(" ", strip=True)
body = markdownify(
BeautifulSoup(body_html, "html.parser").get_text(" ", strip=False),
heading_style="ATX",
)
timestamp = self._compute_timestamp(data.get("date"))
categories_value = self._map_categories(data.get("categories", []))
categories = [item for item in categories_value.split(",") if item]
if date_range and not date_range.in_range(timestamp):
self.skip(date_range, str(timestamp), title, data.get("date", ""))
metadata = self.open_graph.consume_url(link)
return self.save_article(
title=title or data.get("slug", "Untitled"),
link=link,
body=body,
categories=categories,
timestamp=timestamp,
metadata=metadata,
)
@override
def get_pagination(self) -> PageRange:
response = self.client.get(
f"{self.source.source_url}wp-json/wp/v2/posts?_fields=id&per_page=100"
)
pages = int(response.headers.get(self.TOTAL_PAGES_HEADER, "1"))
posts = int(response.headers.get(self.TOTAL_POSTS_HEADER, "0"))
logging.info("WordPress Pagination %s posts in %s pages", posts, pages)
return PageRange.create(f"1:{pages}")
def _fetch_categories(self) -> None:
response = self.client.get(
f"{self.source.source_url}wp-json/wp/v2/categories?{self.CATEGORY_QUERY}"
)
for category in response.json():
self.category_map[int(category["id"])] = category["slug"]
def _map_categories(self, categories: list[int]) -> str:
if not self.category_map:
self._fetch_categories()
return ",".join(
self.category_map[category]
for category in sorted(categories)
if category in self.category_map
)
def _posts_endpoint(self, page: int) -> str:
base = str(self.source.source_url)
if not base.endswith("/"):
base = f"{base}/"
return f"{base}wp-json/wp/v2/posts?{self.POST_QUERY}&page={page}&per_page=100"
@staticmethod
def _compute_timestamp(raw: Optional[str]) -> int:
if not raw:
return int(datetime.now(timezone.utc).timestamp())
cleaned = raw.replace("Z", "+00:00")
try:
dt = datetime.fromisoformat(cleaned)
except ValueError:
return int(datetime.now(timezone.utc).timestamp())
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return int(dt.timestamp())
@override
def get_last_page(self) -> int:
return 1
@staticmethod
@override
def supports() -> SourceKind:
return SourceKind.WORDPRESS
@@ -1,82 +0,0 @@
import logging
import re
from datetime import datetime, timezone
from typing import Optional
class DateParser:
MONTHS = {
"janvier": "01",
"février": "02",
"mars": "03",
"avril": "04",
"mai": "05",
"juin": "06",
"juillet": "07",
"août": "08",
"septembre": "09",
"octobre": "10",
"novembre": "11",
"décembre": "12",
}
DAYS = {
"dimanche": "0",
"lundi": "1",
"mardi": "2",
"mercredi": "3",
"jeudi": "4",
"vendredi": "5",
"samedi": "6",
}
DEFAULT_DATE_FORMAT = "%Y-%m-%d %H:%M"
@classmethod
def _apply_substitution(
cls, date: str, pattern: Optional[str], replacement: Optional[str]
) -> str:
if not pattern or replacement is None:
return date
# Accept PHP-like patterns with leading/trailing slashes
if len(pattern) >= 2 and pattern[0] == "/" and pattern.rfind("/") > 0:
pattern = pattern[1 : pattern.rfind("/")]
# Convert $1 to \1 for Python
replacement = re.sub(r"\$(\d+)", r"\\\1", replacement)
try:
return re.sub(pattern, replacement, date)
except re.error:
logging.error(f"[DateParser] Could not convert {pattern} to {replacement}")
return date
def create_timestamp(
self,
date: str,
fmt: Optional[str] = None,
pattern: Optional[str] = None,
replacement: Optional[str] = None,
) -> int:
# Normalize and translate French day/month words
date = date.lower()
for k, v in self.DAYS.items():
date = date.replace(k, v)
for k, v in self.MONTHS.items():
date = date.replace(k, v)
# Optional regex transform
date = self._apply_substitution(date, pattern, replacement)
fmt = fmt or self.DEFAULT_DATE_FORMAT
try:
dt = datetime.strptime(date, fmt).replace(tzinfo=timezone.utc)
return int(dt.timestamp())
except Exception as e:
logging.error(
f"[DateParser] Could not parse date '{date}' with format '{fmt}': {e}"
)
dt = datetime.now(timezone.utc).replace(
hour=0, minute=0, second=0, microsecond=0
)
return int(dt.timestamp())
@@ -1,9 +0,0 @@
from .base_http_client import BaseHttpClient
from .sync_http_client import SyncHttpClient
from .async_http_client import AsyncHttpClient
__all__ = [
"BaseHttpClient",
"SyncHttpClient",
"AsyncHttpClient",
]
@@ -1,121 +0,0 @@
import asyncio
from dataclasses import dataclass, field
import httpx
from .base_http_client import (
BaseHttpClient,
HttpData,
HttpHeaders,
HttpParams,
TRANSIENT_STATUSES,
)
@dataclass
class AsyncHttpClient(BaseHttpClient):
_client: httpx.AsyncClient = field(init=False, repr=False)
def __post_init__(self) -> None:
super().__post_init__()
self._client = httpx.AsyncClient(
follow_redirects=self.client_config.follow_redirects,
max_redirects=5,
verify=self.client_config.verify_ssl,
timeout=self.client_config.timeout,
headers=dict(self._headers),
)
async def __aenter__(self) -> "AsyncHttpClient":
return self
async def __aexit__(self, exc_type, exc, tb) -> None:
await self.aclose()
def close(self) -> None:
if self._client.is_closed:
return
try:
loop = asyncio.get_running_loop()
except RuntimeError: # no running loop
asyncio.run(self.aclose())
else:
loop.create_task(self.aclose())
async def aclose(self) -> None:
try:
await self._client.aclose()
except Exception: # noqa: BLE001
pass
async def _request(
self,
method: str,
url: str,
*,
headers: HttpHeaders = None,
params: HttpParams = None,
data: HttpData = None,
json: HttpData = None,
) -> httpx.Response:
attempt = 0
while True:
try:
response = await self._client.request(
method,
url,
headers=self._build_headers(headers),
params=params,
data=data,
json=json,
)
if (
response.status_code in TRANSIENT_STATUSES
) and attempt < self.client_config.max_retries:
await asyncio.sleep(self._retry_delay(attempt, response))
attempt += 1
continue
response.raise_for_status()
return response
except httpx.HTTPStatusError as exc:
status = exc.response.status_code if exc.response else 0
if (
status in TRANSIENT_STATUSES
) and attempt < self.client_config.max_retries:
await asyncio.sleep(self._retry_delay(attempt, exc.response))
attempt += 1
continue
raise
except httpx.RequestError:
if attempt < self.client_config.max_retries:
await asyncio.sleep(self._compute_backoff(attempt))
attempt += 1
continue
raise
async def get(
self,
url: str,
*,
headers: HttpHeaders = None,
params: HttpParams = None,
) -> httpx.Response:
return await self._request("GET", url, headers=headers, params=params)
async def post(
self,
url: str,
*,
headers: HttpHeaders = None,
params: HttpParams = None,
data: HttpData = None,
json: HttpData = None,
) -> httpx.Response:
return await self._request(
"POST",
url,
headers=headers,
params=params,
data=data,
json=json,
)
@@ -1,87 +0,0 @@
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime, timezone
from email.utils import parsedate_to_datetime
from typing import Any, Optional, TypeAlias
import httpx
from basango.core.config import ClientConfig
from basango.services.user_agents import UserAgents
HttpHeaders: TypeAlias = dict[str, str] | None
HttpParams: TypeAlias = dict[str, Any] | None
HttpData: TypeAlias = Any | None
TRANSIENT_STATUSES = (429, 500, 502, 503, 504)
@dataclass
class BaseHttpClient(ABC):
client_config: ClientConfig
user_agent_provider: UserAgents | None = None
default_headers: HttpHeaders = None
_user_agent: str = field(init=False, repr=False)
_headers: dict[str, str] = field(init=False, repr=False)
def __post_init__(self) -> None:
provider = self.user_agent_provider or UserAgents(
rotate=self.client_config.rotate,
fallback=self.client_config.user_agent,
)
user_agent = provider.get()
self._user_agent = user_agent if user_agent else self.client_config.user_agent
headers = {"User-Agent": self._user_agent}
if self.default_headers:
headers.update(self.default_headers)
self._headers = headers
def _compute_backoff(self, attempt: int) -> float:
base = min(
self.client_config.backoff_initial
* (self.client_config.backoff_multiplier**attempt),
self.client_config.backoff_max,
)
jitter = random.uniform(0, base * 0.25)
return base + jitter
def _retry_delay(
self, attempt: int, response: Optional[httpx.Response] = None
) -> float:
delay = 0.0
if response is not None and self.client_config.respect_retry_after:
retry_after = (
response.headers.get("Retry-After") if response.headers else None
)
if retry_after:
delay = self._parse_retry_after(retry_after)
if delay == 0.0:
delay = self._compute_backoff(attempt)
return delay
@staticmethod
def _parse_retry_after(header_value: str) -> float:
try:
return max(0.0, float(int(header_value)))
except (TypeError, ValueError):
try:
dt = parsedate_to_datetime(header_value)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
return max(0.0, (dt - now).total_seconds())
except Exception: # noqa: BLE001
return 0.0
def _build_headers(self, headers: HttpHeaders = None) -> dict[str, str]:
merged = dict(self._headers)
if headers:
merged.update(headers)
return merged
@abstractmethod
def close(self) -> None: # pragma: no cover - enforced by subclasses
"""Close the underlying HTTPX client."""
@@ -1,107 +0,0 @@
import time
from dataclasses import dataclass, field
import httpx
from .base_http_client import (
BaseHttpClient,
HttpData,
HttpHeaders,
HttpParams,
TRANSIENT_STATUSES,
)
@dataclass
class SyncHttpClient(BaseHttpClient):
_client: httpx.Client = field(init=False, repr=False)
def __post_init__(self) -> None:
super().__post_init__()
self._client = httpx.Client(
follow_redirects=self.client_config.follow_redirects,
max_redirects=5,
verify=self.client_config.verify_ssl,
timeout=self.client_config.timeout,
headers=dict(self._headers),
)
def __enter__(self) -> "SyncHttpClient":
return self
def __exit__(self, exc_type, exc, tb) -> None:
self.close()
def close(self) -> None:
try:
self._client.close()
except Exception: # noqa: BLE001
pass
def _request(
self,
method: str,
url: str,
*,
headers: HttpHeaders = None,
params: HttpParams = None,
data: HttpData = None,
json: HttpData = None,
) -> httpx.Response:
attempt = 0
while True:
try:
response = self._client.request(
method,
url,
headers=self._build_headers(headers),
params=params,
data=data,
json=json,
)
if (
response.status_code in TRANSIENT_STATUSES
) and attempt < self.client_config.max_retries:
time.sleep(self._retry_delay(attempt, response))
attempt += 1
continue
response.raise_for_status()
return response
except httpx.HTTPStatusError as exc:
status = exc.response.status_code if exc.response else 0
if (
status in TRANSIENT_STATUSES
) and attempt < self.client_config.max_retries:
time.sleep(self._retry_delay(attempt, exc.response))
attempt += 1
continue
raise
except httpx.RequestError:
if attempt < self.client_config.max_retries:
time.sleep(self._compute_backoff(attempt))
attempt += 1
continue
raise
def get(
self, url: str, *, headers: HttpHeaders = None, params: HttpParams = None
) -> httpx.Response:
return self._request("GET", url, headers=headers, params=params)
def post(
self,
url: str,
*,
headers: HttpHeaders = None,
params: HttpParams = None,
data: HttpData = None,
json: HttpData = None,
) -> httpx.Response:
return self._request(
"POST",
url,
headers=headers,
params=params,
data=data,
json=json,
)
@@ -1,55 +0,0 @@
import logging
from dataclasses import dataclass
from typing import Optional
import trafilatura
from basango.core.config import ClientConfig
from basango.services.http_client import SyncHttpClient
from basango.services.user_agents import UserAgents
@dataclass
class OpenGraphObject:
title: Optional[str] = None
description: Optional[str] = None
image: Optional[str] = None
url: Optional[str] = None
class OpenGraphProvider:
def __init__(
self, user_agent_provider: UserAgents = UserAgents(rotate=False)
) -> None:
self._user_agent = user_agent_provider.og()
self._http_client = SyncHttpClient(
client_config=ClientConfig(),
default_headers={"User-Agent": self._user_agent},
)
def consume_url(self, url: str) -> OpenGraphObject | None:
try:
logging.info(f"[OpenGraphProvider] Consuming url: {url}")
html = self._http_client.get(url).text
return self.consume_html(html, url)
except Exception as e:
logging.exception(f"[OpenGraphProvider] Failed to consume url: {e}")
return None
@classmethod
def consume_html(
cls, html: str, url: Optional[str] = None
) -> OpenGraphObject | None:
try:
meta = trafilatura.extract_metadata(html, default_url=url)
if not meta:
return None
return OpenGraphObject(
title=meta.title or None,
description=meta.description or None,
image=meta.image or None,
url=url,
)
except Exception as e:
logging.error(f"[OpenGraphProvider] Failed to extract metadata: {e}")
return None
@@ -1,9 +0,0 @@
from .base_persistor import BasePersistor
from .csv_persistor import CsvPersistor
from .json_persistor import JsonPersistor
__all__ = [
"BasePersistor",
"CsvPersistor",
"JsonPersistor",
]
@@ -1,14 +0,0 @@
from abc import ABC, abstractmethod
from typing import Mapping, Any
class BasePersistor(ABC):
"""Abstract interface for article persistence backends."""
@abstractmethod
def persist(self, article: Mapping[str, Any]) -> None:
"""Persist a single article payload."""
def close(self) -> None: # pragma: no cover - optional override
"""Hook for subclasses that need explicit shutdown."""
return None
@@ -1,79 +0,0 @@
import csv
import json
from dataclasses import dataclass, field
from pathlib import Path
from threading import Lock
from typing import Any, Mapping, Sequence
from .base_persistor import BasePersistor
DEFAULT_FIELDS = (
"title",
"link",
"body",
"categories",
"source",
"timestamp",
"metadata",
)
@dataclass
class CsvPersistor(BasePersistor):
data_dir: Path
source_id: str
fieldnames: Sequence[str] = DEFAULT_FIELDS
encoding: str = "utf-8"
_file_path: Path = field(init=False, repr=False)
_lock: Lock = field(default_factory=Lock, init=False, repr=False)
_header_written: bool = field(default=False, init=False, repr=False)
def __post_init__(self) -> None:
# Pre-create output directory and detect existing header to avoid
# re-writing it across process restarts.
self.data_dir.mkdir(parents=True, exist_ok=True)
self._file_path = self.data_dir / f"{self.source_id}.csv"
if self._file_path.exists() and self._file_path.stat().st_size > 0:
self._header_written = True
def persist(self, article: Mapping[str, Any]) -> None:
record = self._serialise(article)
# File writes are guarded by a process-local lock to tolerate threads
# sharing the same persistor instance.
with self._lock:
needs_header = not self._header_written or not self._file_path.exists()
with self._file_path.open(
"a", newline="", encoding=self.encoding
) as handle:
writer = csv.DictWriter(
handle,
fieldnames=self.fieldnames,
quoting=csv.QUOTE_ALL,
lineterminator="\n",
)
if needs_header:
writer.writeheader()
self._header_written = True
writer.writerow(record)
def _serialise(self, article: Mapping[str, Any]) -> dict[str, Any]:
categories = article.get("categories")
if isinstance(categories, (list, tuple)):
serialised_categories = ";".join(str(item) for item in categories)
else:
serialised_categories = categories
metadata = article.get("metadata")
if metadata is None or isinstance(metadata, str):
serialised_metadata = metadata
else:
# JSON-encode metadata to a compact, CSV-safe string; csv will quote it.
serialised_metadata = json.dumps(
metadata, ensure_ascii=True, separators=(",", ":"), sort_keys=True
)
record = {field: article.get(field) for field in self.fieldnames}
record["categories"] = serialised_categories
record["metadata"] = serialised_metadata
return record
@@ -1,28 +0,0 @@
import json
from dataclasses import dataclass, field
from pathlib import Path
from threading import Lock
from typing import Any, Mapping
from .base_persistor import BasePersistor
@dataclass
class JsonPersistor(BasePersistor):
data_dir: Path
source_id: str
suffix: str = ".jsonl"
encoding: str = "utf-8"
_file_path: Path = field(init=False, repr=False)
_lock: Lock = field(default_factory=Lock, init=False, repr=False)
def __post_init__(self) -> None:
self.data_dir.mkdir(parents=True, exist_ok=True)
self._file_path = self.data_dir / f"{self.source_id}{self.suffix}"
def persist(self, article: Mapping[str, Any]) -> None:
payload = json.dumps(article, ensure_ascii=False)
with self._lock:
with self._file_path.open("a", encoding=self.encoding) as handle:
handle.write(payload)
handle.write("\n")
@@ -1,56 +0,0 @@
"""
Tokenizer utilities for counting and encoding article text.
This module wraps the `tiktoken` encoder to provide simple helpers for:
- encoding/decoding text to token ids
- counting tokens for different parts of an Article
The `Tokenizer` can be constructed with either a specific `model` (preferred)
or an `encoding` name fallback.
"""
import logging
import tiktoken
from typing import Optional
from basango.domain.token_statistics import TokenStatistics
class Tokenizer:
"""Thin wrapper around tiktoken encoder for token operations."""
def __init__(
self, encoding: str = "cl100k_base", model: Optional[str] = None
) -> None:
self.encoding = encoding
# Prefer model-based encoding lookup if a model is provided.
self.tokenizer = (
tiktoken.encoding_for_model(model)
if model
else tiktoken.get_encoding(encoding)
)
def encode(self, text: str) -> list[int]:
"""Encode text into a list of token ids."""
return self.tokenizer.encode(text)
def decode(self, tokens: list[int]) -> str:
"""Decode a list of token ids back into a string."""
return self.tokenizer.decode(tokens)
def count_tokens(
self, title: str, body: str, categories: list[str]
) -> TokenStatistics:
"""Return token counts for the provided Article.
The excerpt count is computed on the first 200 characters of the body
to give a quick estimate of a short preview's token length.
"""
logging.info(f"[Tokenizer] tokenizing {title}...")
return TokenStatistics(
title=len(self.encode(title)),
body=len(self.encode(body)),
excerpt=len(self.encode(body[:200])),
categories=len(self.encode(", ".join(categories))),
)
@@ -1,28 +0,0 @@
import random
from dataclasses import dataclass
@dataclass
class UserAgents:
USER_AGENTS = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5",
"Mozilla/5.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7",
"Mozilla/5.0 (Linux; U; Android 5.0; SM-P815 Build/LRX22G) AppleWebKit/600.4 (KHTML, like Gecko) Chrome/48.0.1562.260 Mobile Safari/600.0",
"Mozilla/5.0 (Windows; U; Windows NT 6.3;) AppleWebKit/533.34 (KHTML, like Gecko) Chrome/51.0.1883.215 Safari/533",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; x64; en-US Trident/4.0)",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_10_3) Gecko/20100101 Firefox/63.4",
"Mozilla/5.0 (Linux; Linux x86_64; en-US) AppleWebKit/603.50 (KHTML, like Gecko) Chrome/55.0.2226.116 Safari/601",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 7_8_3; en-US) Gecko/20100101 Firefox/68.9",
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_9_8; like Mac OS X) AppleWebKit/603.34 (KHTML, like Gecko) Chrome/47.0.1126.107 Mobile Safari/602.7",
"Mozilla/5.0 (iPod; CPU iPod OS 8_2_0; like Mac OS X) AppleWebKit/601.40 (KHTML, like Gecko) Chrome/47.0.1590.178 Mobile Safari/535.2",
]
rotate: bool = True
fallback: str = "Basango/0.1 (+https://github.com/bernard-ng/basango)"
def get(self) -> str:
return random.choice(self.USER_AGENTS) if self.rotate else self.fallback
@classmethod
def og(cls) -> str:
return "facebookexternalhit/1.1"
@@ -1,57 +0,0 @@
from datetime import datetime, timezone
import pytest
from basango.domain import DateRange
def ts(y: int, m: int, d: int, hh: int = 0, mm: int = 0, ss: int = 0) -> int:
return int(datetime(y, m, d, hh, mm, ss, tzinfo=timezone.utc).timestamp())
def test_from_parses_two_dates_with_default_format() -> None:
dr = DateRange.create("2024-10-01:2024-10-08")
assert dr.start == ts(2024, 10, 1)
assert dr.end == ts(2024, 10, 8)
def test_str_and_format_roundtrip() -> None:
dr = DateRange.create("2024-10-01:2024-10-02")
assert str(dr) == f"{ts(2024, 10, 1)}:{ts(2024, 10, 2)}"
assert dr.format("%Y-%m-%d") == "2024-10-01:2024-10-02"
def test_in_range_out_range_inclusive_boundaries() -> None:
dr = DateRange.create("2024-10-01:2024-10-02")
start = ts(2024, 10, 1)
end = ts(2024, 10, 2)
before = start - 1
after = end + 1
midday_end = ts(2024, 10, 2, 12, 0, 0)
assert dr.in_range(start) is True
assert dr.in_range(end) is True
assert dr.out_range(before) is True
# End is at 00:00 of end day; times later that day are outside
assert dr.out_range(midday_end) is True
assert dr.out_range(after) is True
def test_backward_uses_days_and_next_day_end() -> None:
base = datetime(2024, 10, 31, tzinfo=timezone.utc)
dr = DateRange.backward(date=base, days=10)
assert dr.start == ts(2024, 10, 21)
assert dr.end == ts(2024, 11, 1)
def test_from_raises_on_invalid_separator_or_spec() -> None:
with pytest.raises(AssertionError):
DateRange.create("2024-10-01:2024-10-08", separator="")
with pytest.raises(AssertionError):
DateRange.create("2024-10-01", separator=":")
def test_from_accepts_python_format_string() -> None:
dr = DateRange.create("2024/10/01|2024/10/02", fmt="%Y/%m/%d", separator="|")
assert dr.start == ts(2024, 10, 1)
assert dr.end == ts(2024, 10, 2)
@@ -1,19 +0,0 @@
import pytest
from basango.domain import PageRange
def test_it_should_create_page_range():
pr = PageRange.create("1:10")
assert pr.start == 1
assert pr.end == 10
def test_end_page_should_be_greater_than_start_page():
with pytest.raises(AssertionError):
PageRange.create("10:1")
def test_non_negative_pages():
with pytest.raises(AssertionError):
PageRange.create("-1:-10")
@@ -1,291 +0,0 @@
from unittest.mock import patch
import pytest
from bs4 import BeautifulSoup
from pydantic import HttpUrl
from basango.core.config import WordPressSourceConfig
from basango.core.config.fetch_config import CrawlerConfig, ClientConfig
from basango.core.config.source_config import HtmlSourceConfig, SourceSelectors
from basango.domain import SourceKind, PageRange
from basango.services.crawler.html_crawler import HtmlCrawler
class TestHtmlCrawler:
"""Test suite for HtmlCrawler."""
@pytest.fixture
def mock_client_config(self):
return ClientConfig()
@pytest.fixture
def mock_html_source_config(self):
return HtmlSourceConfig(
source_id="test_source",
source_url=HttpUrl("https://example.com"),
pagination_template="news",
source_selectors=SourceSelectors(pagination="ul.pagination > li a"),
supports_categories=True,
)
@pytest.fixture
def mock_crawler_config(self, mock_html_source_config):
return CrawlerConfig(source=mock_html_source_config, category="tech")
@pytest.fixture
def html_crawler(self, mock_crawler_config, mock_client_config):
return HtmlCrawler(mock_crawler_config, mock_client_config)
def test_with_valid_html_source(self, html_crawler):
"""Test __init__ with valid HTML source config."""
assert html_crawler.source.source_kind == SourceKind.HTML
assert isinstance(html_crawler.source, HtmlSourceConfig)
def test_with_invalid_source_kind_raises_error(self, mock_client_config):
"""Test __init__ raises ValueError when source kind is not HTML."""
wordpress_source = WordPressSourceConfig(
source_id="test_wordpress",
source_url=HttpUrl("https://example.com"),
)
config = CrawlerConfig(source=wordpress_source)
with pytest.raises(
ValueError, match="HtmlCrawler requires a source of kind HTML"
):
HtmlCrawler(config, mock_client_config)
def test_with_no_source_raises_error(self, mock_client_config):
"""Test __init__ raises ValueError when no source is provided."""
config = CrawlerConfig(source=None)
with pytest.raises(
ValueError, match="HtmlCrawler requires a source of kind HTML"
):
HtmlCrawler(config, mock_client_config)
def test_get_pagination_returns_valid_page_range(self, html_crawler):
"""Test that get_pagination returns a valid PageRange."""
with patch.object(html_crawler, "get_last_page", return_value=5):
result = html_crawler.get_pagination()
assert isinstance(result, PageRange)
assert result.start == 0
assert result.end == 5
assert str(result) == "0:5"
def test_get_last_page_with_valid_pagination_links(self, html_crawler):
"""Test get_last_page extracts page number from pagination links."""
# Mock HTML with pagination links
mock_html = """
<ul class="pagination">
<li><a href="/news?page=1">1</a></li>
<li><a href="/news?page=2">2</a></li>
<li><a href="/news?page=3">3</a></li>
<li><a href="/news?page=10">10</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 10
def test_get_last_page_with_no_pagination_links(self, html_crawler):
"""Test get_last_page returns 1 when no pagination links found."""
mock_html = "<div>No pagination here</div>"
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 1
def test_get_last_page_with_empty_href(self, html_crawler):
"""Test get_last_page returns 1 when href is empty or None."""
mock_html = """
<ul class="pagination">
<li><a>No href</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 1
def test_get_last_page_with_regex_extraction(self, html_crawler):
"""Test get_last_page extracts page number using regex."""
mock_html = """
<ul class="pagination">
<li><a href="/articles/page/25/">Page 25</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 25
def test_get_last_page_with_query_parameters(self, html_crawler):
"""Test get_last_page extracts page number from query parameters."""
mock_html = """
<ul class="pagination">
<li><a href="/news?category=tech&page=15&sort=date">Last</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 15
def test_get_last_page_with_invalid_page_parameter(self, html_crawler):
"""Test get_last_page returns 1 when page parameter is invalid."""
mock_html = """
<ul class="pagination">
<li><a href="/news?page=invalid">Last</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 1
def test_get_last_page_with_category_support(self, html_crawler):
"""Test get_last_page uses category in URL when supported."""
mock_html = """
<ul class="pagination">
<li><a href="/news?category=tech&page=8">8</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl") as mock_crawl:
mock_crawl.return_value = mock_soup
html_crawler.get_last_page()
# The URL construction concatenates source_url with the path
# Since the template doesn't contain {category}, it should remain unchanged
expected_url = "https://example.com/news"
mock_crawl.assert_called_once_with(expected_url)
def test_get_last_page_with_category_template(self, mock_client_config):
"""Test get_last_page uses category replacement when template contains {category}."""
source_config = HtmlSourceConfig(
source_id="test_source",
source_url=HttpUrl("https://example.com"),
pagination_template="news/{category}",
source_selectors=SourceSelectors(pagination="ul.pagination > li a"),
supports_categories=True,
)
crawler_config = CrawlerConfig(source=source_config, category="tech")
crawler = HtmlCrawler(crawler_config, mock_client_config)
mock_html = """
<ul class="pagination">
<li><a href="/news/tech?page=5">5</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(crawler, "crawl") as mock_crawl:
mock_crawl.return_value = mock_soup
crawler.get_last_page()
expected_url = "https://example.com/news/tech"
mock_crawl.assert_called_once_with(expected_url)
def test_get_last_page_without_category_support(self, html_crawler):
"""Test get_last_page uses default template when categories not supported."""
# Modify source to not support categories
html_crawler.source.supports_categories = False
mock_html = """
<ul class="pagination">
<li><a href="/news?page=5">5</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl") as mock_crawl:
mock_crawl.return_value = mock_soup
html_crawler.get_last_page()
# Verify the URL was constructed without category replacement
expected_url = "https://example.com/news"
mock_crawl.assert_called_once_with(expected_url)
def test_get_last_page_without_category_in_config(
self, mock_client_config, mock_html_source_config
):
"""Test get_last_page uses default template when no category in config."""
config = CrawlerConfig(source=mock_html_source_config, category=None)
crawler = HtmlCrawler(config, mock_client_config)
mock_html = """
<ul class="pagination">
<li><a href="/news?page=3">3</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(crawler, "crawl") as mock_crawl:
mock_crawl.return_value = mock_soup
crawler.get_last_page()
# Verify the URL was constructed without category replacement
expected_url = "https://example.com/news"
mock_crawl.assert_called_once_with(expected_url)
def test_get_last_page_with_multiple_numbers_in_href(self, html_crawler):
"""Test get_last_page extracts first number when multiple numbers present."""
mock_html = """
<ul class="pagination">
<li><a href="/news/2024/page/42/comments/100">Last</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
# Should extract the first number found (2024)
assert result == 2024
def test_supports_html_source_kind(self):
"""Test that supports method returns True for HTML source kind."""
assert HtmlCrawler.supports() is SourceKind.HTML
def test_get_pagination_integration(self, html_crawler):
"""Integration test for get_pagination calling get_last_page."""
mock_html = """
<ul class="pagination">
<li><a href="/news?page=7">7</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_pagination()
assert isinstance(result, PageRange)
assert result.start == 0
assert result.end == 7
def test_get_last_page_with_non_string_href(self, html_crawler):
"""Test get_last_page handles non-string href attributes."""
# Create a mock element with href as a list (AttributeValueList)
mock_html = """
<ul class="pagination">
<li><a href="/news?page=5">5</a></li>
</ul>
"""
mock_soup = BeautifulSoup(mock_html, "html.parser")
# Modify the href to simulate a non-string type by removing it
pagination_link = mock_soup.select("ul.pagination > li a")[-1]
# Instead of setting href to a list, let's test with missing href
del pagination_link.attrs["href"]
with patch.object(html_crawler, "crawl", return_value=mock_soup):
result = html_crawler.get_last_page()
assert result == 1
@@ -1,239 +0,0 @@
from unittest.mock import Mock, patch
import pytest
from pydantic import HttpUrl
from basango.core.config.fetch_config import CrawlerConfig, ClientConfig
from basango.core.config.source_config import (
WordPressSourceConfig,
HtmlSourceConfig,
SourceSelectors,
)
from basango.domain import SourceKind, PageRange
from basango.services.crawler.wordpress_crawler import WordpressCrawler
class TestWordPressCrawler:
"""Test suite for WordPressCrawler."""
@pytest.fixture
def mock_client_config(self):
return ClientConfig()
@pytest.fixture
def mock_wordpress_source_config(self):
return WordPressSourceConfig(
source_id="test_wordpress_source",
source_url=HttpUrl("https://example.com/"),
supports_categories=True,
categories=["tech", "news"],
)
@pytest.fixture
def mock_crawler_config(self, mock_wordpress_source_config):
return CrawlerConfig(source=mock_wordpress_source_config, category="tech")
@pytest.fixture
def wordpress_crawler(self, mock_crawler_config, mock_client_config):
return WordpressCrawler(mock_crawler_config, mock_client_config)
@pytest.fixture
def mock_response_with_headers(self):
response = Mock()
response.headers = {
WordpressCrawler.TOTAL_PAGES_HEADER: "5",
WordpressCrawler.TOTAL_POSTS_HEADER: "47",
}
return response
def test_with_valid_wordpress_source(self, wordpress_crawler):
"""Test __init__ with valid WordPress source config."""
assert wordpress_crawler.source.source_kind == SourceKind.WORDPRESS
assert isinstance(wordpress_crawler.source, WordPressSourceConfig)
def test_with_invalid_source_kind_raises_error(self, mock_client_config):
"""Test __init__ raises ValueError when source kind is not WORDPRESS."""
html_source = HtmlSourceConfig(
source_id="test_html",
source_url=HttpUrl("https://example.com"),
pagination_template="news",
source_selectors=SourceSelectors(),
)
config = CrawlerConfig(source=html_source)
with pytest.raises(
ValueError, match="WordpressCrawler requires a source of kind WORDPRESS"
):
WordpressCrawler(config, mock_client_config)
def test_with_no_source_raises_error(self, mock_client_config):
"""Test __init__ raises ValueError when source is None."""
config = CrawlerConfig(source=None)
with pytest.raises(
ValueError, match="WordpressCrawler requires a source of kind WORDPRESS"
):
WordpressCrawler(config, mock_client_config)
def test_get_pagination_returns_valid_page_range(
self, wordpress_crawler, mock_response_with_headers
):
"""Test get_pagination returns correct PageRange from WordPress API headers."""
with patch.object(
wordpress_crawler.client, "get", return_value=mock_response_with_headers
):
result = wordpress_crawler.get_pagination()
assert isinstance(result, PageRange)
assert result.start == 1
assert result.end == 5
assert str(result) == "1:5"
def test_get_pagination_with_default_headers(self, wordpress_crawler):
"""Test get_pagination with default headers when WordPress headers are missing."""
mock_response = Mock()
mock_response.headers = {} # No WordPress headers
with patch.object(wordpress_crawler.client, "get", return_value=mock_response):
result = wordpress_crawler.get_pagination()
assert isinstance(result, PageRange)
assert result.start == 1
assert result.end == 1 # Default when no headers
def test_get_pagination_makes_correct_api_call(self, wordpress_crawler):
"""Test get_pagination makes the correct WordPress API call."""
mock_response = Mock()
mock_response.headers = {
WordpressCrawler.TOTAL_PAGES_HEADER: "3",
WordpressCrawler.TOTAL_POSTS_HEADER: "25",
}
with patch.object(
wordpress_crawler.client, "get", return_value=mock_response
) as mock_get:
wordpress_crawler.get_pagination()
expected_url = f"{wordpress_crawler.source.source_url}wp-json/wp/v2/posts?_fields=id&per_page=100"
mock_get.assert_called_once_with(expected_url)
def test_fetch_categories_populates_category_map(self, wordpress_crawler):
"""Test _fetch_categories populates the category_map correctly."""
mock_categories_response = Mock()
mock_categories_response.json.return_value = [
{"id": 1, "slug": "technology", "count": 15},
{"id": 2, "slug": "business", "count": 10},
{"id": 3, "slug": "sports", "count": 8},
]
with patch.object(
wordpress_crawler.client, "get", return_value=mock_categories_response
):
wordpress_crawler._fetch_categories()
assert len(wordpress_crawler.category_map) == 3
assert wordpress_crawler.category_map[1] == "technology"
assert wordpress_crawler.category_map[2] == "business"
assert wordpress_crawler.category_map[3] == "sports"
def test_fetch_categories_makes_correct_api_call(self, wordpress_crawler):
"""Test _fetch_categories makes the correct WordPress API call."""
mock_response = Mock()
mock_response.json.return_value = []
with patch.object(
wordpress_crawler.client, "get", return_value=mock_response
) as mock_get:
wordpress_crawler._fetch_categories()
expected_url = f"{wordpress_crawler.source.source_url}wp-json/wp/v2/categories?{WordpressCrawler.CATEGORY_QUERY}"
mock_get.assert_called_once_with(expected_url)
def test_map_categories_with_populated_category_map(self, wordpress_crawler):
"""Test _map_categories returns correct comma-separated string."""
# Pre-populate category map
wordpress_crawler.category_map = {
1: "technology",
2: "business",
3: "sports",
4: "lifestyle",
}
result = wordpress_crawler._map_categories([2, 1, 4])
# Should be sorted by category ID
assert result == "technology,business,lifestyle"
def test_map_categories_with_empty_category_map_fetches_categories(
self, wordpress_crawler
):
"""Test _map_categories fetches categories when category_map is empty."""
mock_categories_response = Mock()
mock_categories_response.json.return_value = [
{"id": 1, "slug": "tech", "count": 15},
{"id": 2, "slug": "news", "count": 10},
]
wordpress_crawler.category_map = {}
with patch.object(
wordpress_crawler.client, "get", return_value=mock_categories_response
):
result = wordpress_crawler._map_categories([1, 2])
assert result == "tech,news"
assert len(wordpress_crawler.category_map) == 2
def test_map_categories_filters_unknown_category_ids(self, wordpress_crawler):
"""Test _map_categories filters out unknown category IDs."""
wordpress_crawler.category_map = {1: "technology", 2: "business"}
result = wordpress_crawler._map_categories([1, 99, 2, 100])
# Should only include known categories
assert result == "technology,business"
def test_map_categories_with_empty_category_list(self, wordpress_crawler):
"""Test _map_categories returns empty string for empty category list."""
wordpress_crawler.category_map = {1: "tech", 2: "news"}
result = wordpress_crawler._map_categories([])
assert result == ""
def test_map_categories_sorts_by_category_id(self, wordpress_crawler):
"""Test _map_categories sorts categories by ID."""
wordpress_crawler.category_map = {3: "charlie", 1: "alpha", 2: "beta"}
result = wordpress_crawler._map_categories([3, 1, 2])
# Should be sorted by ID: 1, 2, 3
assert result == "alpha,beta,charlie"
def test_supports_wordpress_source_kind(self):
"""Test supports method returns True for WordPress source kind."""
assert WordpressCrawler.supports() is SourceKind.WORDPRESS
@pytest.mark.parametrize(
"pages,posts,expected_start,expected_end",
[
("1", "10", 1, 1),
("5", "47", 1, 5),
("10", "100", 1, 10),
],
)
def test_get_pagination_with_various_header_values(
self, wordpress_crawler, pages, posts, expected_start, expected_end
):
"""Test get_pagination with various header values."""
mock_response = Mock()
mock_response.headers = {
WordpressCrawler.TOTAL_PAGES_HEADER: pages,
WordpressCrawler.TOTAL_POSTS_HEADER: posts,
}
with patch.object(wordpress_crawler.client, "get", return_value=mock_response):
result = wordpress_crawler.get_pagination()
assert result.start == expected_start
assert result.end == expected_end
@@ -1,70 +0,0 @@
from datetime import datetime, timezone
import pytest
from basango.services.date_parser import DateParser
@pytest.mark.parametrize(
"date_str, fmt, pattern, replacement, expected",
[
(
"2004-02-12T15:19:21",
"%Y-%m-%dT%H:%M:%S",
None,
None,
1076599161, # 2004-02-12 15:19:21 UTC
),
(
"08/10/2024 - 00:00",
"%Y-%m-%d %H:%M",
r"/(\d{2})\/(\d{2})\/(\d{4}) - (\d{2}:\d{2})/",
r"$3-$2-$1 $4",
1728345600, # 2024-10-08 00:00:00 UTC
),
(
"mar 08/10/2024 - 00:00",
"%Y-%m-%d %H:%M",
r"/\w{3} (\d{2})\/(\d{2})\/(\d{4}) - (\d{2}:\d{2})/",
r"$3-$2-$1 $4",
1728345600, # 2024-10-08 00:00:00 UTC
),
(
"Mardi 8 octobre 2024 - 00:00",
"%Y-%m-%d %H:%M",
r"/(\d{1}) (\d{1,2}) (\d{2}) (\d{4}) - (\d{2}:\d{2})/",
r"$4-$3-$2 $5",
1728345600, # 2024-10-08 00:00:00 UTC
),
(
"8.10.2024 00:00",
"%d.%m.%Y %H:%M",
None,
None,
1728345600, # 2024-10-08 00:00:00 UTC
),
],
)
def test_create_timestamp_with_valid_dates(
date_str: str,
fmt: str | None,
pattern: str | None,
replacement: str | None,
expected: int,
) -> None:
dr = DateParser()
result = dr.create_timestamp(date_str, fmt, pattern, replacement)
assert result == expected
def test_create_timestamp_with_invalid_date_falls_back_to_midnight_today() -> None:
dr = DateParser()
# Compute expected midnight (UTC) before invoking the parser to avoid edge cases.
now = datetime.now(timezone.utc)
expected_midnight = int(
now.replace(hour=0, minute=0, second=0, microsecond=0).timestamp()
)
result = dr.create_timestamp("invalid date string", None, None, None)
assert result == expected_midnight
-9
View File
@@ -1,9 +0,0 @@
import os
import sys
# Ensure 'src' is on sys.path so `import basango...` works in tests
ROOT = os.path.dirname(os.path.dirname(__file__))
SRC = os.path.join(ROOT, "src")
if SRC not in sys.path:
sys.path.insert(0, SRC)
-948
View File
@@ -1,948 +0,0 @@
version = 1
revision = 3
requires-python = ">=3.13"
[[package]]
name = "annotated-types"
version = "0.7.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
]
[[package]]
name = "anyio"
version = "4.10.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "idna" },
{ name = "sniffio" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f1/b4/636b3b65173d3ce9a38ef5f0522789614e590dab6a8d505340a4efe4c567/anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6", size = 213252, upload-time = "2025-08-04T08:54:26.451Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6f/12/e5e0282d673bb9746bacfb6e2dba8719989d3660cdb2ea79aee9a9651afb/anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1", size = 107213, upload-time = "2025-08-04T08:54:24.882Z" },
]
[[package]]
name = "babel"
version = "2.17.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" },
]
[[package]]
name = "bandit"
version = "1.8.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "pyyaml" },
{ name = "rich" },
{ name = "stevedore" },
]
sdist = { url = "https://files.pythonhosted.org/packages/fb/b5/7eb834e213d6f73aace21938e5e90425c92e5f42abafaf8a6d5d21beed51/bandit-1.8.6.tar.gz", hash = "sha256:dbfe9c25fc6961c2078593de55fd19f2559f9e45b99f1272341f5b95dea4e56b", size = 4240271, upload-time = "2025-07-06T03:10:50.9Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/48/ca/ba5f909b40ea12ec542d5d7bdd13ee31c4d65f3beed20211ef81c18fa1f3/bandit-1.8.6-py3-none-any.whl", hash = "sha256:3348e934d736fcdb68b6aa4030487097e23a501adf3e7827b63658df464dddd0", size = 133808, upload-time = "2025-07-06T03:10:49.134Z" },
]
[[package]]
name = "basango"
version = "0.1.0"
source = { editable = "." }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "httpx" },
{ name = "markdownify" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pyyaml" },
{ name = "readability-lxml" },
{ name = "rq" },
{ name = "selectolax" },
{ name = "tiktoken" },
{ name = "trafilatura" },
{ name = "typer" },
{ name = "uv-build" },
]
[package.dev-dependencies]
dev = [
{ name = "bandit" },
{ name = "pyright" },
{ name = "pytest" },
{ name = "ruff" },
]
[package.metadata]
requires-dist = [
{ name = "beautifulsoup4", specifier = ">=4.13.5" },
{ name = "httpx", specifier = ">=0.27.2" },
{ name = "markdownify", specifier = ">=0.13.1" },
{ name = "pydantic", specifier = ">=2.11.7" },
{ name = "pydantic-settings", specifier = ">=2.10.1" },
{ name = "pyyaml", specifier = ">=6.0.2" },
{ name = "readability-lxml", specifier = ">=0.8.1" },
{ name = "rq", specifier = ">=2.5.0" },
{ name = "selectolax", specifier = ">=0.3.20" },
{ name = "tiktoken", specifier = ">=0.12.0" },
{ name = "trafilatura", specifier = ">=1.7.0" },
{ name = "typer", specifier = ">=0.16.1" },
{ name = "uv-build", specifier = ">=0.8.12,<0.9.0" },
]
[package.metadata.requires-dev]
dev = [
{ name = "bandit", specifier = ">=1.8.6" },
{ name = "pyright", specifier = ">=1.1.404" },
{ name = "pytest", specifier = ">=8.4.1" },
{ name = "ruff", specifier = ">=0.12.9" },
]
[[package]]
name = "beautifulsoup4"
version = "4.13.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "soupsieve" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/85/2e/3e5079847e653b1f6dc647aa24549d68c6addb4c595cc0d902d1b19308ad/beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695", size = 622954, upload-time = "2025-08-24T14:06:13.168Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/eb/f4151e0c7377a6e08a38108609ba5cede57986802757848688aeedd1b9e8/beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a", size = 105113, upload-time = "2025-08-24T14:06:14.884Z" },
]
[[package]]
name = "certifi"
version = "2025.8.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
]
[[package]]
name = "chardet"
version = "5.2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" },
{ url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" },
{ url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" },
{ url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" },
{ url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" },
{ url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" },
{ url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" },
{ url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" },
{ url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" },
{ url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" },
{ url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" },
{ url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" },
{ url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" },
{ url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" },
{ url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" },
{ url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" },
{ url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" },
{ url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" },
{ url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" },
{ url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" },
{ url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" },
{ url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" },
{ url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" },
]
[[package]]
name = "click"
version = "8.2.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" },
]
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "courlan"
version = "1.3.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "babel" },
{ name = "tld" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382, upload-time = "2024-10-29T16:40:20.994Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848, upload-time = "2024-10-29T16:40:18.325Z" },
]
[[package]]
name = "croniter"
version = "6.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "python-dateutil" },
{ name = "pytz" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ad/2f/44d1ae153a0e27be56be43465e5cb39b9650c781e001e7864389deb25090/croniter-6.0.0.tar.gz", hash = "sha256:37c504b313956114a983ece2c2b07790b1f1094fe9d81cc94739214748255577", size = 64481, upload-time = "2024-12-17T17:17:47.32Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/4b/290b4c3efd6417a8b0c284896de19b1d5855e6dbdb97d2a35e68fa42de85/croniter-6.0.0-py2.py3-none-any.whl", hash = "sha256:2f878c3856f17896979b2a4379ba1f09c83e374931ea15cc835c5dd2eee9b368", size = 25468, upload-time = "2024-12-17T17:17:45.359Z" },
]
[[package]]
name = "cssselect"
version = "1.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/72/0a/c3ea9573b1dc2e151abfe88c7fe0c26d1892fe6ed02d0cdb30f0d57029d5/cssselect-1.3.0.tar.gz", hash = "sha256:57f8a99424cfab289a1b6a816a43075a4b00948c86b4dcf3ef4ee7e15f7ab0c7", size = 42870, upload-time = "2025-03-10T09:30:29.638Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/58/257350f7db99b4ae12b614a36256d9cc870d71d9e451e79c2dc3b23d7c3c/cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d", size = 18786, upload-time = "2025-03-10T09:30:28.048Z" },
]
[[package]]
name = "dateparser"
version = "1.2.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "python-dateutil" },
{ name = "pytz" },
{ name = "regex" },
{ name = "tzlocal" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a9/30/064144f0df1749e7bb5faaa7f52b007d7c2d08ec08fed8411aba87207f68/dateparser-1.2.2.tar.gz", hash = "sha256:986316f17cb8cdc23ea8ce563027c5ef12fc725b6fb1d137c14ca08777c5ecf7", size = 329840, upload-time = "2025-06-26T09:29:23.211Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/87/22/f020c047ae1346613db9322638186468238bcfa8849b4668a22b97faad65/dateparser-1.2.2-py3-none-any.whl", hash = "sha256:5a5d7211a09013499867547023a2a0c91d5a27d15dd4dbcea676ea9fe66f2482", size = 315453, upload-time = "2025-06-26T09:29:21.412Z" },
]
[[package]]
name = "h11"
version = "0.16.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
[[package]]
name = "htmldate"
version = "1.9.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "charset-normalizer" },
{ name = "dateparser" },
{ name = "lxml" },
{ name = "python-dateutil" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a5/26/aaae4cab984f0b7dd0f5f1b823fa2ed2fd4a2bb50acd5bd2f0d217562678/htmldate-1.9.3.tar.gz", hash = "sha256:ac0caf4628c3ded4042011e2d60dc68dfb314c77b106587dd307a80d77e708e9", size = 44913, upload-time = "2024-12-30T12:52:35.206Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/05/49/8872130016209c20436ce0c1067de1cf630755d0443d068a5bc17fa95015/htmldate-1.9.3-py3-none-any.whl", hash = "sha256:3fadc422cf3c10a5cdb5e1b914daf37ec7270400a80a1b37e2673ff84faaaff8", size = 31565, upload-time = "2024-12-30T12:52:32.145Z" },
]
[[package]]
name = "httpcore"
version = "1.0.9"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "h11" },
]
sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
[[package]]
name = "httpx"
version = "0.28.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "certifi" },
{ name = "httpcore" },
{ name = "idna" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
]
[[package]]
name = "idna"
version = "3.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
]
[[package]]
name = "justext"
version = "3.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "lxml", extra = ["html-clean"] },
]
sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521, upload-time = "2025-02-25T20:21:49.934Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940, upload-time = "2025-02-25T20:21:44.179Z" },
]
[[package]]
name = "lxml"
version = "5.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479, upload-time = "2025-04-23T01:50:29.322Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086, upload-time = "2025-04-23T01:46:52.218Z" },
{ url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613, upload-time = "2025-04-23T01:46:55.281Z" },
{ url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008, upload-time = "2025-04-23T01:46:57.817Z" },
{ url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915, upload-time = "2025-04-23T01:47:00.745Z" },
{ url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890, upload-time = "2025-04-23T01:47:04.702Z" },
{ url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644, upload-time = "2025-04-23T01:47:07.833Z" },
{ url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817, upload-time = "2025-04-23T01:47:10.317Z" },
{ url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916, upload-time = "2025-04-23T01:47:12.823Z" },
{ url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274, upload-time = "2025-04-23T01:47:15.916Z" },
{ url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757, upload-time = "2025-04-23T01:47:19.793Z" },
{ url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028, upload-time = "2025-04-23T01:47:22.401Z" },
{ url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487, upload-time = "2025-04-23T01:47:25.513Z" },
{ url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688, upload-time = "2025-04-23T01:47:28.454Z" },
{ url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043, upload-time = "2025-04-23T01:47:31.208Z" },
{ url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569, upload-time = "2025-04-23T01:47:33.805Z" },
{ url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270, upload-time = "2025-04-23T01:47:36.133Z" },
{ url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606, upload-time = "2025-04-23T01:47:39.028Z" },
]
[package.optional-dependencies]
html-clean = [
{ name = "lxml-html-clean" },
]
[[package]]
name = "lxml-html-clean"
version = "0.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "lxml" },
]
sdist = { url = "https://files.pythonhosted.org/packages/79/b6/466e71db127950fb8d172026a8f0a9f0dc6f64c8e78e2ca79f252e5790b8/lxml_html_clean-0.4.2.tar.gz", hash = "sha256:91291e7b5db95430abf461bc53440964d58e06cc468950f9e47db64976cebcb3", size = 21622, upload-time = "2025-04-09T11:33:59.432Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/0b/942cb7278d6caad79343ad2ddd636ed204a47909b969d19114a3097f5aa3/lxml_html_clean-0.4.2-py3-none-any.whl", hash = "sha256:74ccfba277adcfea87a1e9294f47dd86b05d65b4da7c5b07966e3d5f3be8a505", size = 14184, upload-time = "2025-04-09T11:33:57.988Z" },
]
[[package]]
name = "markdown-it-py"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mdurl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
]
[[package]]
name = "markdownify"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "six" },
]
sdist = { url = "https://files.pythonhosted.org/packages/83/1b/6f2697b51eaca81f08852fd2734745af15718fea10222a1d40f8a239c4ea/markdownify-1.2.0.tar.gz", hash = "sha256:f6c367c54eb24ee953921804dfe6d6575c5e5b42c643955e7242034435de634c", size = 18771, upload-time = "2025-08-09T17:44:15.302Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6a/e2/7af643acb4cae0741dffffaa7f3f7c9e7ab4046724543ba1777c401d821c/markdownify-1.2.0-py3-none-any.whl", hash = "sha256:48e150a1c4993d4d50f282f725c0111bd9eb25645d41fa2f543708fd44161351", size = 15561, upload-time = "2025-08-09T17:44:14.074Z" },
]
[[package]]
name = "mdurl"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]
[[package]]
name = "nodeenv"
version = "1.9.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
]
[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pbr"
version = "7.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "setuptools" },
]
sdist = { url = "https://files.pythonhosted.org/packages/80/88/baf6b45d064271f19fefac7def6a030a893f912f430de0024dd595ced61f/pbr-7.0.0.tar.gz", hash = "sha256:cf4127298723dafbce3afd13775ccf3885be5d3c8435751b867f9a6a10b71a39", size = 129146, upload-time = "2025-08-13T09:16:41.654Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d6/98/120c3e21bf3fc0ef397a3906465ee9f5c76996c52811e65455eadc12d68a/pbr-7.0.0-py2.py3-none-any.whl", hash = "sha256:b447e63a2bc04fd975fc0480b8d5ebf979179e2c0ae203bf1eff9ea20073bc38", size = 125109, upload-time = "2025-08-13T09:16:40.269Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "pydantic"
version = "2.11.7"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "annotated-types" },
{ name = "pydantic-core" },
{ name = "typing-extensions" },
{ name = "typing-inspection" },
]
sdist = { url = "https://files.pythonhosted.org/packages/00/dd/4325abf92c39ba8623b5af936ddb36ffcfe0beae70405d456ab1fb2f5b8c/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", size = 788350, upload-time = "2025-06-14T08:33:17.137Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" },
]
[[package]]
name = "pydantic-core"
version = "2.33.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" },
{ url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" },
{ url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" },
{ url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" },
{ url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" },
{ url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" },
{ url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" },
{ url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" },
{ url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" },
{ url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" },
{ url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" },
{ url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" },
{ url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" },
{ url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" },
{ url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" },
{ url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" },
{ url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
]
[[package]]
name = "pydantic-settings"
version = "2.10.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pydantic" },
{ name = "python-dotenv" },
{ name = "typing-inspection" },
]
sdist = { url = "https://files.pythonhosted.org/packages/68/85/1ea668bbab3c50071ca613c6ab30047fb36ab0da1b92fa8f17bbc38fd36c/pydantic_settings-2.10.1.tar.gz", hash = "sha256:06f0062169818d0f5524420a360d632d5857b83cffd4d42fe29597807a1614ee", size = 172583, upload-time = "2025-06-24T13:26:46.841Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pyright"
version = "1.1.404"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nodeenv" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e2/6e/026be64c43af681d5632722acd100b06d3d39f383ec382ff50a71a6d5bce/pyright-1.1.404.tar.gz", hash = "sha256:455e881a558ca6be9ecca0b30ce08aa78343ecc031d37a198ffa9a7a1abeb63e", size = 4065679, upload-time = "2025-08-20T18:46:14.029Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/84/30/89aa7f7d7a875bbb9a577d4b1dc5a3e404e3d2ae2657354808e905e358e0/pyright-1.1.404-py3-none-any.whl", hash = "sha256:c7b7ff1fdb7219c643079e4c3e7d4125f0dafcc19d253b47e898d130ea426419", size = 5902951, upload-time = "2025-08-20T18:46:12.096Z" },
]
[[package]]
name = "pytest"
version = "8.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "six" },
]
sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
]
[[package]]
name = "python-dotenv"
version = "1.1.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f6/b0/4bc07ccd3572a2f9df7e6782f52b0c6c90dcbb803ac4a167702d7d0dfe1e/python_dotenv-1.1.1.tar.gz", hash = "sha256:a8a6399716257f45be6a007360200409fce5cda2661e3dec71d23dc15f6189ab", size = 41978, upload-time = "2025-06-24T04:21:07.341Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" },
]
[[package]]
name = "pytz"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
]
[[package]]
name = "pyyaml"
version = "6.0.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" },
{ url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" },
{ url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" },
{ url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" },
{ url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" },
{ url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" },
{ url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" },
{ url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" },
{ url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
]
[[package]]
name = "readability-lxml"
version = "0.8.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "chardet" },
{ name = "cssselect" },
{ name = "lxml", extra = ["html-clean"] },
]
sdist = { url = "https://files.pythonhosted.org/packages/55/3e/dc87d97532ddad58af786ec89c7036182e352574c1cba37bf2bf783d2b15/readability_lxml-0.8.4.1.tar.gz", hash = "sha256:9d2924f5942dd7f37fb4da353263b22a3e877ccf922d0e45e348e4177b035a53", size = 22874, upload-time = "2025-05-03T21:11:45.493Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/75/2cc58965097e351415af420be81c4665cf80da52a17ef43c01ffbe2caf91/readability_lxml-0.8.4.1-py3-none-any.whl", hash = "sha256:874c0cea22c3bf2b78c7f8df831bfaad3c0a89b7301d45a188db581652b4b465", size = 19912, upload-time = "2025-05-03T21:11:43.993Z" },
]
[[package]]
name = "redis"
version = "6.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0d/d6/e8b92798a5bd67d659d51a18170e91c16ac3b59738d91894651ee255ed49/redis-6.4.0.tar.gz", hash = "sha256:b01bc7282b8444e28ec36b261df5375183bb47a07eb9c603f284e89cbc5ef010", size = 4647399, upload-time = "2025-08-07T08:10:11.441Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e8/02/89e2ed7e85db6c93dfa9e8f691c5087df4e3551ab39081a4d7c6d1f90e05/redis-6.4.0-py3-none-any.whl", hash = "sha256:f0544fa9604264e9464cdf4814e7d4830f74b165d52f2a330a760a88dd248b7f", size = 279847, upload-time = "2025-08-07T08:10:09.84Z" },
]
[[package]]
name = "regex"
version = "2025.9.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b2/5a/4c63457fbcaf19d138d72b2e9b39405954f98c0349b31c601bfcb151582c/regex-2025.9.1.tar.gz", hash = "sha256:88ac07b38d20b54d79e704e38aa3bd2c0f8027432164226bdee201a1c0c9c9ff", size = 400852, upload-time = "2025-09-01T22:10:10.479Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/98/25/b2959ce90c6138c5142fe5264ee1f9b71a0c502ca4c7959302a749407c79/regex-2025.9.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bc6834727d1b98d710a63e6c823edf6ffbf5792eba35d3fa119531349d4142ef", size = 485932, upload-time = "2025-09-01T22:08:57.913Z" },
{ url = "https://files.pythonhosted.org/packages/49/2e/6507a2a85f3f2be6643438b7bd976e67ad73223692d6988eb1ff444106d3/regex-2025.9.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c3dc05b6d579875719bccc5f3037b4dc80433d64e94681a0061845bd8863c025", size = 289568, upload-time = "2025-09-01T22:08:59.258Z" },
{ url = "https://files.pythonhosted.org/packages/c7/d8/de4a4b57215d99868f1640e062a7907e185ec7476b4b689e2345487c1ff4/regex-2025.9.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22213527df4c985ec4a729b055a8306272d41d2f45908d7bacb79be0fa7a75ad", size = 286984, upload-time = "2025-09-01T22:09:00.835Z" },
{ url = "https://files.pythonhosted.org/packages/03/15/e8cb403403a57ed316e80661db0e54d7aa2efcd85cb6156f33cc18746922/regex-2025.9.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e3f6e3c5a5a1adc3f7ea1b5aec89abfc2f4fbfba55dafb4343cd1d084f715b2", size = 797514, upload-time = "2025-09-01T22:09:02.538Z" },
{ url = "https://files.pythonhosted.org/packages/e4/26/2446f2b9585fed61faaa7e2bbce3aca7dd8df6554c32addee4c4caecf24a/regex-2025.9.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bcb89c02a0d6c2bec9b0bb2d8c78782699afe8434493bfa6b4021cc51503f249", size = 862586, upload-time = "2025-09-01T22:09:04.322Z" },
{ url = "https://files.pythonhosted.org/packages/fd/b8/82ffbe9c0992c31bbe6ae1c4b4e21269a5df2559102b90543c9b56724c3c/regex-2025.9.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b0e2f95413eb0c651cd1516a670036315b91b71767af83bc8525350d4375ccba", size = 910815, upload-time = "2025-09-01T22:09:05.978Z" },
{ url = "https://files.pythonhosted.org/packages/2f/d8/7303ea38911759c1ee30cc5bc623ee85d3196b733c51fd6703c34290a8d9/regex-2025.9.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a41dc039e1c97d3c2ed3e26523f748e58c4de3ea7a31f95e1cf9ff973fff5a", size = 802042, upload-time = "2025-09-01T22:09:07.865Z" },
{ url = "https://files.pythonhosted.org/packages/fc/0e/6ad51a55ed4b5af512bb3299a05d33309bda1c1d1e1808fa869a0bed31bc/regex-2025.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f0b4258b161094f66857a26ee938d3fe7b8a5063861e44571215c44fbf0e5df", size = 786764, upload-time = "2025-09-01T22:09:09.362Z" },
{ url = "https://files.pythonhosted.org/packages/8d/d5/394e3ffae6baa5a9217bbd14d96e0e5da47bb069d0dbb8278e2681a2b938/regex-2025.9.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bf70e18ac390e6977ea7e56f921768002cb0fa359c4199606c7219854ae332e0", size = 856557, upload-time = "2025-09-01T22:09:11.129Z" },
{ url = "https://files.pythonhosted.org/packages/cd/80/b288d3910c41194ad081b9fb4b371b76b0bbfdce93e7709fc98df27b37dc/regex-2025.9.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b84036511e1d2bb0a4ff1aec26951caa2dea8772b223c9e8a19ed8885b32dbac", size = 849108, upload-time = "2025-09-01T22:09:12.877Z" },
{ url = "https://files.pythonhosted.org/packages/d1/cd/5ec76bf626d0d5abdc277b7a1734696f5f3d14fbb4a3e2540665bc305d85/regex-2025.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c2e05dcdfe224047f2a59e70408274c325d019aad96227ab959403ba7d58d2d7", size = 788201, upload-time = "2025-09-01T22:09:14.561Z" },
{ url = "https://files.pythonhosted.org/packages/b5/36/674672f3fdead107565a2499f3007788b878188acec6d42bc141c5366c2c/regex-2025.9.1-cp313-cp313-win32.whl", hash = "sha256:3b9a62107a7441b81ca98261808fed30ae36ba06c8b7ee435308806bd53c1ed8", size = 264508, upload-time = "2025-09-01T22:09:16.193Z" },
{ url = "https://files.pythonhosted.org/packages/83/ad/931134539515eb64ce36c24457a98b83c1b2e2d45adf3254b94df3735a76/regex-2025.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:b38afecc10c177eb34cfae68d669d5161880849ba70c05cbfbe409f08cc939d7", size = 275469, upload-time = "2025-09-01T22:09:17.462Z" },
{ url = "https://files.pythonhosted.org/packages/24/8c/96d34e61c0e4e9248836bf86d69cb224fd222f270fa9045b24e218b65604/regex-2025.9.1-cp313-cp313-win_arm64.whl", hash = "sha256:ec329890ad5e7ed9fc292858554d28d58d56bf62cf964faf0aa57964b21155a0", size = 268586, upload-time = "2025-09-01T22:09:18.948Z" },
{ url = "https://files.pythonhosted.org/packages/21/b1/453cbea5323b049181ec6344a803777914074b9726c9c5dc76749966d12d/regex-2025.9.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:72fb7a016467d364546f22b5ae86c45680a4e0de6b2a6f67441d22172ff641f1", size = 486111, upload-time = "2025-09-01T22:09:20.734Z" },
{ url = "https://files.pythonhosted.org/packages/f6/0e/92577f197bd2f7652c5e2857f399936c1876978474ecc5b068c6d8a79c86/regex-2025.9.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c9527fa74eba53f98ad86be2ba003b3ebe97e94b6eb2b916b31b5f055622ef03", size = 289520, upload-time = "2025-09-01T22:09:22.249Z" },
{ url = "https://files.pythonhosted.org/packages/af/c6/b472398116cca7ea5a6c4d5ccd0fc543f7fd2492cb0c48d2852a11972f73/regex-2025.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c905d925d194c83a63f92422af7544ec188301451b292c8b487f0543726107ca", size = 287215, upload-time = "2025-09-01T22:09:23.657Z" },
{ url = "https://files.pythonhosted.org/packages/cf/11/f12ecb0cf9ca792a32bb92f758589a84149017467a544f2f6bfb45c0356d/regex-2025.9.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74df7c74a63adcad314426b1f4ea6054a5ab25d05b0244f0c07ff9ce640fa597", size = 797855, upload-time = "2025-09-01T22:09:25.197Z" },
{ url = "https://files.pythonhosted.org/packages/46/88/bbb848f719a540fb5997e71310f16f0b33a92c5d4b4d72d4311487fff2a3/regex-2025.9.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4f6e935e98ea48c7a2e8be44494de337b57a204470e7f9c9c42f912c414cd6f5", size = 863363, upload-time = "2025-09-01T22:09:26.705Z" },
{ url = "https://files.pythonhosted.org/packages/54/a9/2321eb3e2838f575a78d48e03c1e83ea61bd08b74b7ebbdeca8abc50fc25/regex-2025.9.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4a62d033cd9ebefc7c5e466731a508dfabee827d80b13f455de68a50d3c2543d", size = 910202, upload-time = "2025-09-01T22:09:28.906Z" },
{ url = "https://files.pythonhosted.org/packages/33/07/d1d70835d7d11b7e126181f316f7213c4572ecf5c5c97bdbb969fb1f38a2/regex-2025.9.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef971ebf2b93bdc88d8337238be4dfb851cc97ed6808eb04870ef67589415171", size = 801808, upload-time = "2025-09-01T22:09:30.733Z" },
{ url = "https://files.pythonhosted.org/packages/13/d1/29e4d1bed514ef2bf3a4ead3cb8bb88ca8af94130239a4e68aa765c35b1c/regex-2025.9.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d936a1db208bdca0eca1f2bb2c1ba1d8370b226785c1e6db76e32a228ffd0ad5", size = 786824, upload-time = "2025-09-01T22:09:32.61Z" },
{ url = "https://files.pythonhosted.org/packages/33/27/20d8ccb1bee460faaa851e6e7cc4cfe852a42b70caa1dca22721ba19f02f/regex-2025.9.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:7e786d9e4469698fc63815b8de08a89165a0aa851720eb99f5e0ea9d51dd2b6a", size = 857406, upload-time = "2025-09-01T22:09:34.117Z" },
{ url = "https://files.pythonhosted.org/packages/74/fe/60c6132262dc36430d51e0c46c49927d113d3a38c1aba6a26c7744c84cf3/regex-2025.9.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:6b81d7dbc5466ad2c57ce3a0ddb717858fe1a29535c8866f8514d785fdb9fc5b", size = 848593, upload-time = "2025-09-01T22:09:35.598Z" },
{ url = "https://files.pythonhosted.org/packages/cc/ae/2d4ff915622fabbef1af28387bf71e7f2f4944a348b8460d061e85e29bf0/regex-2025.9.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cd4890e184a6feb0ef195338a6ce68906a8903a0f2eb7e0ab727dbc0a3156273", size = 787951, upload-time = "2025-09-01T22:09:37.139Z" },
{ url = "https://files.pythonhosted.org/packages/85/37/dc127703a9e715a284cc2f7dbdd8a9776fd813c85c126eddbcbdd1ca5fec/regex-2025.9.1-cp314-cp314-win32.whl", hash = "sha256:34679a86230e46164c9e0396b56cab13c0505972343880b9e705083cc5b8ec86", size = 269833, upload-time = "2025-09-01T22:09:39.245Z" },
{ url = "https://files.pythonhosted.org/packages/83/bf/4bed4d3d0570e16771defd5f8f15f7ea2311edcbe91077436d6908956c4a/regex-2025.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:a1196e530a6bfa5f4bde029ac5b0295a6ecfaaffbfffede4bbaf4061d9455b70", size = 278742, upload-time = "2025-09-01T22:09:40.651Z" },
{ url = "https://files.pythonhosted.org/packages/cf/3e/7d7ac6fd085023312421e0d69dfabdfb28e116e513fadbe9afe710c01893/regex-2025.9.1-cp314-cp314-win_arm64.whl", hash = "sha256:f46d525934871ea772930e997d577d48c6983e50f206ff7b66d4ac5f8941e993", size = 271860, upload-time = "2025-09-01T22:09:42.413Z" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "rich"
version = "14.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markdown-it-py" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/fe/75/af448d8e52bf1d8fa6a9d089ca6c07ff4453d86c65c145d0a300bb073b9b/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8", size = 224441, upload-time = "2025-07-25T07:32:58.125Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e3/30/3c4d035596d3cf444529e0b2953ad0466f6049528a879d27534700580395/rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f", size = 243368, upload-time = "2025-07-25T07:32:56.73Z" },
]
[[package]]
name = "rq"
version = "2.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click" },
{ name = "croniter" },
{ name = "redis" },
]
sdist = { url = "https://files.pythonhosted.org/packages/48/1c/1c390fd8594e7367c1ee672297f7a877c0982b9c26877242c5a509ad27c0/rq-2.5.0.tar.gz", hash = "sha256:b55d328fcaeaf25823b8b8450283225f8048bd1c52abaaca192c99201ab5c687", size = 666978, upload-time = "2025-08-15T10:41:34.84Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/36/8917bcfc9794cbc4dd984962feb401f2dfeee0d89e1e40e3367420996f42/rq-2.5.0-py3-none-any.whl", hash = "sha256:90c74eb5b5793ff08e6c3391fd6deb7151f308ac8f04b6831580b38e90688155", size = 108377, upload-time = "2025-08-15T10:41:21.792Z" },
]
[[package]]
name = "ruff"
version = "0.12.9"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4a/45/2e403fa7007816b5fbb324cb4f8ed3c7402a927a0a0cb2b6279879a8bfdc/ruff-0.12.9.tar.gz", hash = "sha256:fbd94b2e3c623f659962934e52c2bea6fc6da11f667a427a368adaf3af2c866a", size = 5254702, upload-time = "2025-08-14T16:08:55.2Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ad/20/53bf098537adb7b6a97d98fcdebf6e916fcd11b2e21d15f8c171507909cc/ruff-0.12.9-py3-none-linux_armv6l.whl", hash = "sha256:fcebc6c79fcae3f220d05585229463621f5dbf24d79fdc4936d9302e177cfa3e", size = 11759705, upload-time = "2025-08-14T16:08:12.968Z" },
{ url = "https://files.pythonhosted.org/packages/20/4d/c764ee423002aac1ec66b9d541285dd29d2c0640a8086c87de59ebbe80d5/ruff-0.12.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aed9d15f8c5755c0e74467731a007fcad41f19bcce41cd75f768bbd687f8535f", size = 12527042, upload-time = "2025-08-14T16:08:16.54Z" },
{ url = "https://files.pythonhosted.org/packages/8b/45/cfcdf6d3eb5fc78a5b419e7e616d6ccba0013dc5b180522920af2897e1be/ruff-0.12.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5b15ea354c6ff0d7423814ba6d44be2807644d0c05e9ed60caca87e963e93f70", size = 11724457, upload-time = "2025-08-14T16:08:18.686Z" },
{ url = "https://files.pythonhosted.org/packages/72/e6/44615c754b55662200c48bebb02196dbb14111b6e266ab071b7e7297b4ec/ruff-0.12.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d596c2d0393c2502eaabfef723bd74ca35348a8dac4267d18a94910087807c53", size = 11949446, upload-time = "2025-08-14T16:08:21.059Z" },
{ url = "https://files.pythonhosted.org/packages/fd/d1/9b7d46625d617c7df520d40d5ac6cdcdf20cbccb88fad4b5ecd476a6bb8d/ruff-0.12.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b15599931a1a7a03c388b9c5df1bfa62be7ede6eb7ef753b272381f39c3d0ff", size = 11566350, upload-time = "2025-08-14T16:08:23.433Z" },
{ url = "https://files.pythonhosted.org/packages/59/20/b73132f66f2856bc29d2d263c6ca457f8476b0bbbe064dac3ac3337a270f/ruff-0.12.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d02faa2977fb6f3f32ddb7828e212b7dd499c59eb896ae6c03ea5c303575756", size = 13270430, upload-time = "2025-08-14T16:08:25.837Z" },
{ url = "https://files.pythonhosted.org/packages/a2/21/eaf3806f0a3d4c6be0a69d435646fba775b65f3f2097d54898b0fd4bb12e/ruff-0.12.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:17d5b6b0b3a25259b69ebcba87908496e6830e03acfb929ef9fd4c58675fa2ea", size = 14264717, upload-time = "2025-08-14T16:08:27.907Z" },
{ url = "https://files.pythonhosted.org/packages/d2/82/1d0c53bd37dcb582b2c521d352fbf4876b1e28bc0d8894344198f6c9950d/ruff-0.12.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:72db7521860e246adbb43f6ef464dd2a532ef2ef1f5dd0d470455b8d9f1773e0", size = 13684331, upload-time = "2025-08-14T16:08:30.352Z" },
{ url = "https://files.pythonhosted.org/packages/3b/2f/1c5cf6d8f656306d42a686f1e207f71d7cebdcbe7b2aa18e4e8a0cb74da3/ruff-0.12.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a03242c1522b4e0885af63320ad754d53983c9599157ee33e77d748363c561ce", size = 12739151, upload-time = "2025-08-14T16:08:32.55Z" },
{ url = "https://files.pythonhosted.org/packages/47/09/25033198bff89b24d734e6479e39b1968e4c992e82262d61cdccaf11afb9/ruff-0.12.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fc83e4e9751e6c13b5046d7162f205d0a7bac5840183c5beebf824b08a27340", size = 12954992, upload-time = "2025-08-14T16:08:34.816Z" },
{ url = "https://files.pythonhosted.org/packages/52/8e/d0dbf2f9dca66c2d7131feefc386523404014968cd6d22f057763935ab32/ruff-0.12.9-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:881465ed56ba4dd26a691954650de6ad389a2d1fdb130fe51ff18a25639fe4bb", size = 12899569, upload-time = "2025-08-14T16:08:36.852Z" },
{ url = "https://files.pythonhosted.org/packages/a0/bd/b614d7c08515b1428ed4d3f1d4e3d687deffb2479703b90237682586fa66/ruff-0.12.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:43f07a3ccfc62cdb4d3a3348bf0588358a66da756aa113e071b8ca8c3b9826af", size = 11751983, upload-time = "2025-08-14T16:08:39.314Z" },
{ url = "https://files.pythonhosted.org/packages/58/d6/383e9f818a2441b1a0ed898d7875f11273f10882f997388b2b51cb2ae8b5/ruff-0.12.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:07adb221c54b6bba24387911e5734357f042e5669fa5718920ee728aba3cbadc", size = 11538635, upload-time = "2025-08-14T16:08:41.297Z" },
{ url = "https://files.pythonhosted.org/packages/20/9c/56f869d314edaa9fc1f491706d1d8a47747b9d714130368fbd69ce9024e9/ruff-0.12.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f5cd34fabfdea3933ab85d72359f118035882a01bff15bd1d2b15261d85d5f66", size = 12534346, upload-time = "2025-08-14T16:08:43.39Z" },
{ url = "https://files.pythonhosted.org/packages/bd/4b/d8b95c6795a6c93b439bc913ee7a94fda42bb30a79285d47b80074003ee7/ruff-0.12.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f6be1d2ca0686c54564da8e7ee9e25f93bdd6868263805f8c0b8fc6a449db6d7", size = 13017021, upload-time = "2025-08-14T16:08:45.889Z" },
{ url = "https://files.pythonhosted.org/packages/c7/c1/5f9a839a697ce1acd7af44836f7c2181cdae5accd17a5cb85fcbd694075e/ruff-0.12.9-py3-none-win32.whl", hash = "sha256:cc7a37bd2509974379d0115cc5608a1a4a6c4bff1b452ea69db83c8855d53f93", size = 11734785, upload-time = "2025-08-14T16:08:48.062Z" },
{ url = "https://files.pythonhosted.org/packages/fa/66/cdddc2d1d9a9f677520b7cfc490d234336f523d4b429c1298de359a3be08/ruff-0.12.9-py3-none-win_amd64.whl", hash = "sha256:6fb15b1977309741d7d098c8a3cb7a30bc112760a00fb6efb7abc85f00ba5908", size = 12840654, upload-time = "2025-08-14T16:08:50.158Z" },
{ url = "https://files.pythonhosted.org/packages/ac/fd/669816bc6b5b93b9586f3c1d87cd6bc05028470b3ecfebb5938252c47a35/ruff-0.12.9-py3-none-win_arm64.whl", hash = "sha256:63c8c819739d86b96d500cce885956a1a48ab056bbcbc61b747ad494b2485089", size = 11949623, upload-time = "2025-08-14T16:08:52.233Z" },
]
[[package]]
name = "selectolax"
version = "0.3.34"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/bf/8c/8bbe1b17098b4e2a63a251361870303c37ad4c3170536277096575c24ca4/selectolax-0.3.34.tar.gz", hash = "sha256:c2cdb30b60994f1e0b74574dd408f1336d2fadd68a3ebab8ea573740dcbf17e2", size = 4706599, upload-time = "2025-08-28T23:17:44.131Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/29/eeb77d1a77599023387d4d00655960dfa3d760557b42a65ef347e29b40b0/selectolax-0.3.34-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2bb74e079098d758bd3d5c77b1c66c90098de305e4084b60981e561acf52c12a", size = 2001199, upload-time = "2025-08-28T23:16:59.467Z" },
{ url = "https://files.pythonhosted.org/packages/21/80/326b9dd2901b64c3c654db9e8841ddc412b9c2af0047b7d43290bbb276be/selectolax-0.3.34-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc39822f714e6e434ceb893e1ccff873f3f88c8db8226ba2f8a5f4a7a0e2aa29", size = 1994171, upload-time = "2025-08-28T23:17:01.206Z" },
{ url = "https://files.pythonhosted.org/packages/15/af/1265e4f9429b3c3cf098ba08cb3264d7e16990ed3029d89e9890012aae76/selectolax-0.3.34-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181b67949ec23b4f11b6f2e426ba9904dd25c73d12c2cb22caf8fae21a363e99", size = 2196092, upload-time = "2025-08-28T23:17:02.574Z" },
{ url = "https://files.pythonhosted.org/packages/1c/41/e67100abd8b0b2a5e1d5d7fa864c31d31e9a2c0bbd08ce4e951235f13143/selectolax-0.3.34-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b09f9d7b22bbb633966ac2019ec059caf735a5bdb4a5784bab0f4db2198fd6a", size = 2233674, upload-time = "2025-08-28T23:17:03.928Z" },
{ url = "https://files.pythonhosted.org/packages/3a/24/7ad043805c9292b4f535071c223d10aad7703b4460d68de1dce9dcf21d3f/selectolax-0.3.34-cp313-cp313-win32.whl", hash = "sha256:6e2ae8a984f82c9373e8a5ec0450f67603fde843fed73675f5187986e9e45b59", size = 1686489, upload-time = "2025-08-28T23:17:05.341Z" },
{ url = "https://files.pythonhosted.org/packages/6b/79/62666fbfcd847c0cfc2b75b496bfa8382d765e7a3d5a2c792004760a6e61/selectolax-0.3.34-cp313-cp313-win_amd64.whl", hash = "sha256:96acd5414aaf0bb8677258ff7b0f494953b2621f71be1e3d69e01743545509ec", size = 1789924, upload-time = "2025-08-28T23:17:06.708Z" },
{ url = "https://files.pythonhosted.org/packages/5d/b5/0bb579210a7de36d97c359016e77119513d3e810c61e99ade72089bc1b4d/selectolax-0.3.34-cp313-cp313-win_arm64.whl", hash = "sha256:1d309fd17ba72bb46a282154f75752ed7746de6f00e2c1eec4cd421dcdadf008", size = 1737480, upload-time = "2025-08-28T23:17:08.575Z" },
{ url = "https://files.pythonhosted.org/packages/b8/5c/ab87e8ecb3c6aa1053d1c6d1eba0e47e292cc72aff0f6fbb89d920d4d87c/selectolax-0.3.34-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:3e9c4197563c9b62b56dd7545bfd993ce071fd40b8779736e9bc59813f014c23", size = 2000587, upload-time = "2025-08-28T23:17:10.327Z" },
{ url = "https://files.pythonhosted.org/packages/72/8e/5c08bd5628f73ab582696f8349138a569115a0fd6ab71842e4115ceec4ff/selectolax-0.3.34-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f96eaa0da764a4b9e08e792c0f17cce98749f1406ffad35e6d4835194570bdbf", size = 1994327, upload-time = "2025-08-28T23:17:11.709Z" },
{ url = "https://files.pythonhosted.org/packages/ac/29/02b22eff289b29ee3f869a85e4be4f7f3cf4b480d429bb18aab014848917/selectolax-0.3.34-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:412ce46d963444cd378e9f3197a2f30b05d858722677a361fc44ad244d2bb7db", size = 2201620, upload-time = "2025-08-28T23:17:13.538Z" },
{ url = "https://files.pythonhosted.org/packages/6d/d3/bdd3a94bb1276be4ef4371dbfd254137b22f5c54a94d051a8d72c3956dc6/selectolax-0.3.34-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:58dd7dc062b0424adb001817bf9b05476d165a4db1885a69cac66ca16b313035", size = 2233487, upload-time = "2025-08-28T23:17:14.921Z" },
{ url = "https://files.pythonhosted.org/packages/e6/6a/5d551c570f29bfca5815f45fa6e6a3310cc5bc6c9b1073a968d71f73612b/selectolax-0.3.34-cp314-cp314-win32.whl", hash = "sha256:4255558fa48e3685a13f3d9dfc84586146c7b0b86e44c899ac2ac263357c987f", size = 1779755, upload-time = "2025-08-28T23:17:16.322Z" },
{ url = "https://files.pythonhosted.org/packages/cc/dc/5def41b07cb3b917841022489e6bd6c3277363c23b44eca00a0ada93221c/selectolax-0.3.34-cp314-cp314-win_amd64.whl", hash = "sha256:6cbf2707d79afd7e15083f3f32c11c9b6e39a39026c8b362ce25959842a837b6", size = 1877332, upload-time = "2025-08-28T23:17:17.766Z" },
{ url = "https://files.pythonhosted.org/packages/19/0f/63da99be8f78bbfca0cb3f9ad71b7475ab97383f830c86a9abd29c6d3f25/selectolax-0.3.34-cp314-cp314-win_arm64.whl", hash = "sha256:3aa83e4d1f5f5534c9d9e44fc53640c82edc7d0eef6fca0829830cccc8df9568", size = 1831124, upload-time = "2025-08-28T23:17:19.744Z" },
{ url = "https://files.pythonhosted.org/packages/39/5c/07d8031c6c106de10ff42b4440ad7fa6a038650942bb2e194e4eb9ffec6d/selectolax-0.3.34-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:bb0b9002974ec7052f7eb1439b8e404e11a00a26affcbdd73fc53fc55beec809", size = 2023889, upload-time = "2025-08-28T23:17:21.222Z" },
{ url = "https://files.pythonhosted.org/packages/fd/80/fa8220c2eae44928b5ae73eccd44baedb328109f115c948d796c46d11048/selectolax-0.3.34-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:38e5fdffab6d08800a19671ac9641ff9ca6738fad42090f4dd0da76e4db29582", size = 2011882, upload-time = "2025-08-28T23:17:22.844Z" },
{ url = "https://files.pythonhosted.org/packages/f6/02/657089f68f59308bd90137102a7f6da0c3770128ae7245e1290e99f5a48d/selectolax-0.3.34-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:871d35e19dfde9ee83c1df139940c2e5cdf6a50ef3d147a0e9acf382b63b5b3e", size = 2221871, upload-time = "2025-08-28T23:17:24.259Z" },
{ url = "https://files.pythonhosted.org/packages/d2/56/1ad7877f9b2b12f616a8847eca0a3047c6b5ed14588f21fe1f6915357efb/selectolax-0.3.34-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f3f269bc53bc84ccc166704263712f4448130ec827a38a0df230cffe3dc46a9", size = 2241032, upload-time = "2025-08-28T23:17:25.76Z" },
{ url = "https://files.pythonhosted.org/packages/60/c0/30ce665b7382f663fdbb282748ddee392a61c85f51862776b128d8644d45/selectolax-0.3.34-cp314-cp314t-win32.whl", hash = "sha256:b957d105c2f3d86de872f61be1c9a92e1d84580a5ec89a413282f60ffb3f7bc1", size = 1828494, upload-time = "2025-08-28T23:17:27.447Z" },
{ url = "https://files.pythonhosted.org/packages/a4/9e/11d023ad74d0d1a48cefdddbb2d00365c4d9a97735d7c24c0f206cd1babb/selectolax-0.3.34-cp314-cp314t-win_amd64.whl", hash = "sha256:9c609d639ce09154d688063bb830dc351fb944fa52629e25717dbab45ad04327", size = 1951608, upload-time = "2025-08-28T23:17:29.327Z" },
{ url = "https://files.pythonhosted.org/packages/cc/20/a5f93b84e3e6de9756dc82465c0dff57b1c8a25b1815bca0817e4342494c/selectolax-0.3.34-cp314-cp314t-win_arm64.whl", hash = "sha256:6359e94d66fb4fce9fb7c9d18252c3d8cba28b90f7412da8ce610bd77746f750", size = 1852855, upload-time = "2025-08-28T23:17:30.746Z" },
]
[[package]]
name = "setuptools"
version = "80.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
]
[[package]]
name = "shellingham"
version = "1.5.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
]
[[package]]
name = "six"
version = "1.17.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
]
[[package]]
name = "sniffio"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
]
[[package]]
name = "soupsieve"
version = "2.8"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" },
]
[[package]]
name = "stevedore"
version = "5.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pbr" },
]
sdist = { url = "https://files.pythonhosted.org/packages/28/3f/13cacea96900bbd31bb05c6b74135f85d15564fc583802be56976c940470/stevedore-5.4.1.tar.gz", hash = "sha256:3135b5ae50fe12816ef291baff420acb727fcd356106e3e9cbfa9e5985cd6f4b", size = 513858, upload-time = "2025-02-20T14:03:57.285Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f7/45/8c4ebc0c460e6ec38e62ab245ad3c7fc10b210116cea7c16d61602aa9558/stevedore-5.4.1-py3-none-any.whl", hash = "sha256:d10a31c7b86cba16c1f6e8d15416955fc797052351a56af15e608ad20811fcfe", size = 49533, upload-time = "2025-02-20T14:03:55.849Z" },
]
[[package]]
name = "tiktoken"
version = "0.12.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "regex" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
{ url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
{ url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
{ url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
{ url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
{ url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
{ url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" },
{ url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
{ url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
{ url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
{ url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
{ url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
{ url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
{ url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" },
{ url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
{ url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
{ url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
{ url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
{ url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
{ url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
{ url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" },
{ url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
{ url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
{ url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
{ url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
{ url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
{ url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
{ url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
]
[[package]]
name = "tld"
version = "0.13.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/df/a1/5723b07a70c1841a80afc9ac572fdf53488306848d844cd70519391b0d26/tld-0.13.1.tar.gz", hash = "sha256:75ec00936cbcf564f67361c41713363440b6c4ef0f0c1592b5b0fbe72c17a350", size = 462000, upload-time = "2025-05-21T22:18:29.341Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/dc/70/b2f38360c3fc4bc9b5e8ef429e1fde63749144ac583c2dbdf7e21e27a9ad/tld-0.13.1-py2.py3-none-any.whl", hash = "sha256:a2d35109433ac83486ddf87e3c4539ab2c5c2478230e5d9c060a18af4b03aa7c", size = 274718, upload-time = "2025-05-21T22:18:25.811Z" },
]
[[package]]
name = "trafilatura"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "courlan" },
{ name = "htmldate" },
{ name = "justext" },
{ name = "lxml" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404, upload-time = "2024-12-03T15:23:24.16Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557, upload-time = "2024-12-03T15:23:21.41Z" },
]
[[package]]
name = "typer"
version = "0.16.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click" },
{ name = "rich" },
{ name = "shellingham" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/43/78/d90f616bf5f88f8710ad067c1f8705bf7618059836ca084e5bb2a0855d75/typer-0.16.1.tar.gz", hash = "sha256:d358c65a464a7a90f338e3bb7ff0c74ac081449e53884b12ba658cbd72990614", size = 102836, upload-time = "2025-08-18T19:18:22.898Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/76/06dbe78f39b2203d2a47d5facc5df5102d0561e2807396471b5f7c5a30a1/typer-0.16.1-py3-none-any.whl", hash = "sha256:90ee01cb02d9b8395ae21ee3368421faf21fa138cb2a541ed369c08cec5237c9", size = 46397, upload-time = "2025-08-18T19:18:21.663Z" },
]
[[package]]
name = "typing-extensions"
version = "4.14.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/98/5a/da40306b885cc8c09109dc2e1abd358d5684b1425678151cdaed4731c822/typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36", size = 107673, upload-time = "2025-07-04T13:28:34.16Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" },
]
[[package]]
name = "typing-inspection"
version = "0.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726, upload-time = "2025-05-21T18:55:23.885Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
]
[[package]]
name = "tzdata"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
]
[[package]]
name = "tzlocal"
version = "5.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "tzdata", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" },
]
[[package]]
name = "urllib3"
version = "2.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
]
[[package]]
name = "uv-build"
version = "0.8.12"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/58/1d/109827cffcdd2430783450591083a3cc9b80c8d34f962ff86e00a7d73eaf/uv_build-0.8.12.tar.gz", hash = "sha256:49666685059bf5c62e5634371b00b2012ebe3e4e4d0f479cff0400bf66ad1e3a", size = 322245, upload-time = "2025-08-18T23:59:48.408Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6d/6e/75995ef959314680fc127c3d947bc2dec1fed57a0fb400b81270dda01132/uv_build-0.8.12-py3-none-linux_armv6l.whl", hash = "sha256:03cd118ae8731aeca7994a48d6f23a5d4aacef5ee9c88bc60daf99ad698cefae", size = 1318465, upload-time = "2025-08-18T23:59:19.615Z" },
{ url = "https://files.pythonhosted.org/packages/fc/55/fa65b463af6b2c1738b81d6153975ca3b1a07056552f0993c2cf7b324018/uv_build-0.8.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:23d3d46cd619640b4b3e2977cfe629fb898586d21b8b641c9385021b1755fde5", size = 1299484, upload-time = "2025-08-18T23:59:23.737Z" },
{ url = "https://files.pythonhosted.org/packages/55/21/14fb0309c64e324f13f309460fc5a1ebf4872c1f91be89d50039c8e3a91c/uv_build-0.8.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a6676b94db118f4b3e903acf52f4acc6e8b558330d576a8438181726b47bad15", size = 1177028, upload-time = "2025-08-18T23:59:25.052Z" },
{ url = "https://files.pythonhosted.org/packages/dc/ae/61ebacd6b43f97300409412ba99d274305919bbda367c44ea4b114c91ac5/uv_build-0.8.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:2135094eab1657c121a74176a41f2ad30066962f476dac11b6c48ad6cb279392", size = 1367327, upload-time = "2025-08-18T23:59:26.676Z" },
{ url = "https://files.pythonhosted.org/packages/d4/f7/d8c29e322ecb569774e90f3e9a1b8018465a4c88e62c6083aa91f7c53de9/uv_build-0.8.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20199b48eebf3a07046d5988b4eca8c3a8c83e50299e8e6bba085bf8f2e02611", size = 1274839, upload-time = "2025-08-18T23:59:28.034Z" },
{ url = "https://files.pythonhosted.org/packages/a3/be/63ef8eb542b98d3d4536b8519f9e4d4dbf8f52443975740be9f833fa4985/uv_build-0.8.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9fdd226820cfdba719779f4ccbf594258177f67ef1907141a8b959757c26d55c", size = 1426207, upload-time = "2025-08-18T23:59:29.687Z" },
{ url = "https://files.pythonhosted.org/packages/80/b0/3ea05c1cdbc32fd13e0e97d56e8b3be4cd350ed5e6d9aa137ebe65afb5ae/uv_build-0.8.12-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:9c76003c6af6c6949f796448458bb104c5d3f7d9a1ced3f3aeed613e2f47677e", size = 1577750, upload-time = "2025-08-18T23:59:30.983Z" },
{ url = "https://files.pythonhosted.org/packages/53/ed/1391d420efdbeb07353db1404e34830a322fe2efb64853c0d4fcda315276/uv_build-0.8.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dfe32cdb94c85981597d40efc08c01ff30267db18935df50ffcef1258e091d52", size = 1481257, upload-time = "2025-08-18T23:59:32.248Z" },
{ url = "https://files.pythonhosted.org/packages/26/28/bc6c7d00fb3a4713f85359c8687067111021542f379d5ff49136cfbe9b64/uv_build-0.8.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a96aa67f8071a025b41abc661ddd0cec2731d1530095479f2b810b1c04a09252", size = 1418075, upload-time = "2025-08-18T23:59:33.961Z" },
{ url = "https://files.pythonhosted.org/packages/23/05/39236c6e86a5d49a0d4c80064907665db34a8c180ba3110bca436ddbb8f3/uv_build-0.8.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6878f2179dafb1053a413ad41f2f9640655489972bec6211aaf8d492b49614af", size = 1421678, upload-time = "2025-08-18T23:59:35.653Z" },
{ url = "https://files.pythonhosted.org/packages/66/d7/731bec1f5955de6ea33cffcf568a81375dfe80e17215dd66cdf659fcd28c/uv_build-0.8.12-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:7bce23578e8abbb40fd70aebed1afd27d132915e451551322f10aa304dd8bf26", size = 1365561, upload-time = "2025-08-18T23:59:37.664Z" },
{ url = "https://files.pythonhosted.org/packages/bb/b8/1219fa9d21c1deacd8d8b9f4b4193596ea6cdbef718e299b371354c19897/uv_build-0.8.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a9c57674dd757f8208b6e4929abd5bcb6b63bab1ea5fab0f3feaa4c40236c7dd", size = 1375369, upload-time = "2025-08-18T23:59:38.948Z" },
{ url = "https://files.pythonhosted.org/packages/ae/31/700da060b59d4bb163f146d2f673292937595efa77e71a73842b945e49c7/uv_build-0.8.12-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:021a75dec60bf14f0bebdf10aafa08a03ad5d2c9bfd82565b77ac56a82316911", size = 1290573, upload-time = "2025-08-18T23:59:40.223Z" },
{ url = "https://files.pythonhosted.org/packages/d5/9b/711a875605583bed36ff18ccd5351f2582cafedef4720a667e90e6023e3a/uv_build-0.8.12-py3-none-musllinux_1_1_i686.whl", hash = "sha256:2884df52ef9c47bccebf0f616380b281078a4e50fd29a6d44e841f2e2532f687", size = 1380155, upload-time = "2025-08-18T23:59:41.868Z" },
{ url = "https://files.pythonhosted.org/packages/67/31/4b0269dbebd18e406ec565ead0c0b05909d255cd4650dfac1b198542e92d/uv_build-0.8.12-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:c8072519032f4c90e36ea4650fa4a86a30a6d3355082a31f996e7c9e6a6e92f6", size = 1462583, upload-time = "2025-08-18T23:59:43.164Z" },
{ url = "https://files.pythonhosted.org/packages/f3/01/2d47a047109ac53d40c3912d15a4aeadfa67c3937dcd7cd854f865e25fef/uv_build-0.8.12-py3-none-win32.whl", hash = "sha256:45830715e022b85994c06db03ea1a337684cef441ab3ecd38d4b03071845f662", size = 1251560, upload-time = "2025-08-18T23:59:44.425Z" },
{ url = "https://files.pythonhosted.org/packages/c6/11/d8a0a1b87e4cca37abbeb3756119260d9f84bc954cec0bfb04447138a19e/uv_build-0.8.12-py3-none-win_amd64.whl", hash = "sha256:b549a205e1a7487f278baa5fd59dae6901955be7af024dea9d17615e64312cf4", size = 1329565, upload-time = "2025-08-18T23:59:45.932Z" },
{ url = "https://files.pythonhosted.org/packages/d4/0d/c2b30dd90d9fbd0ddef6db4b0fc60e80643d0ef2501229078dcff79067f1/uv_build-0.8.12-py3-none-win_arm64.whl", hash = "sha256:f0c05d62de6c8cb59eb686ac8c6a4e9549f81603864df4f853923eefc850f674", size = 1236604, upload-time = "2025-08-18T23:59:47.094Z" },
]