Initial commit

This commit is contained in:
2025-10-05 13:55:28 +02:00
commit 68d521677a
767 changed files with 46947 additions and 0 deletions
+97
View File
@@ -0,0 +1,97 @@
# Fetching and crawling configuration
fetch:
client:
timeout: 20
user_agent: Basango/0.1 (+https://github.com/bernard-ng/basango)
follow_redirects: true
verify_ssl: true
rotate_user_agent: true
max_retries: 3
backoff_initial: 1.0
backoff_multiplier: 2.0
backoff_max: 30.0
respect_retry_after: true
crawler:
notify: false
use_multi_threading: false
max_workers: 5
# Source configurations
sources:
html:
- source_id: radiookapi.net
source_url: https://www.radiookapi.net
source_date:
pattern: "/(\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
source_selectors:
articles: ".view-content > .views-row.content-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field-name-body"
article_date: ".views-field-created"
article_categories: ".views-field-field-cat-gorie a"
pagination: "ul.pagination > li a(:last-child)"
pagination_template: "/actualite?page={page}"
supports_categories: false
requires_details: false
requires_rate_limit: false
- source_id: 7sur7.cd
source_url: https://7sur7.cd
source_date:
pattern: "/\\w{3} (\\d{2})\/(\\d{2})\/(\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$3-$2-$1 $4"
categories: [ "politique", "economie", "culture", "sport", "societe" ]
source_selectors:
articles: ".view-content > .row.views-row"
article_title: ".views-field-title a"
article_link: ".views-field-title a"
article_body: ".field.field--name-body"
article_date: ".views-field-created"
pagination: "ul.pagination > li a(:last-child)"
pagination_template: "/index.php/category/{category}?page={page}"
supports_categories: true
requires_details: false
requires_rate_limit: false
- source_id: mediacongo.net
source_url: https://mediacongo.net
source_date:
format: "%d.%m.%Y %H:%M"
source_selectors:
articles: ".for_aitems > .article_other_item"
article_title: "img"
article_link: "a(:first-child)"
article_categories: "a.color_link"
article_body: ".article_ttext"
article_date: ".article_other_about"
pagination: ".nav > a(:last-child)"
pagination_template: "/articles.html?page={page}"
supports_categories: false
requires_details: true
requires_rate_limit: false
- source_id: actualite.cd
source_url: https://actualite.cd
source_date:
pattern: "/(\\d{1}) (\\d{1,2}) (\\d{2}) (\\d{4}) - (\\d{2}:\\d{2})/"
replacement: "$4-$3-$2 $5"
source_selectors:
articles: "#views-bootstrap-taxonomy-term-page-2 > div > div"
article_title: "#actu-titre a"
article_link: "#actu-titre a"
article_categories: "#actu-cat a"
article_body: ".views-field.views-field-body"
article_date: "#p-date"
pagination_template: "/actualite?page={page}"
supports_categories: false
requires_details: true
requires_rate_limit: false
wordpress:
- source_id: beto.cd
source_url: https://beto.cd
requires_rate_limit: true
- source_id: newscd.net
source_url: https://newscd.net