Files
basango/packages/domain/config/crawler.json
T

263 lines
8.6 KiB
JSON

{
"crawler": {
"backend": {
"endpoint": "%env(BASANGO_API_CRAWLER_ENDPOINT)%",
"token": "%env(BASANGO_API_CRAWLER_TOKEN)%"
},
"fetch": {
"async": {
"prefix": "basango:crawler",
"queues": {
"details": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_DETAILS)%",
"listing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_LISTING)%",
"processing": "%env(BASANGO_CRAWLER_ASYNC_QUEUE_PROCESSING)%"
},
"redisUrl": "%env(BASANGO_CRAWLER_ASYNC_REDIS_URL)%",
"ttl": {
"default": 600,
"failure": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_FAILURE)%",
"result": "%env(number:BASANGO_CRAWLER_ASYNC_TTL_RESULT)%"
}
},
"client": {
"backoffInitial": 1,
"backoffMax": 30,
"backoffMultiplier": 2,
"followRedirects": true,
"maxRetries": "%env(number:BASANGO_CRAWLER_FETCH_MAX_RETRIES)%",
"respectRetryAfter": "%env(boolean:BASANGO_CRAWLER_FETCH_RESPECT_RETRY_AFTER)%",
"rotate": true,
"timeout": 20,
"userAgent": "%env(BASANGO_CRAWLER_FETCH_USER_AGENT)%",
"verifySsl": true
},
"crawler": {
"direction": "%env(BASANGO_CRAWLER_UPDATE_DIRECTION)%",
"maxWorkers": 5,
"notify": false,
"useMultiThreading": false
}
},
"paths": {
"data": "%env(BASANGO_CRAWLER_DATA_PATH)%",
"root": "%env(BASANGO_CRAWLER_ROOT_PATH)%"
},
"sources": {
"html": [
{
"paginationTemplate": "actualite",
"requiresDetails": true,
"requiresRateLimit": false,
"sourceDate": {},
"sourceId": "radiookapi.net",
"sourceKind": "html",
"sourceSelectors": {
"articleBody": ".field-name-body",
"articleCategories": ".views-field-field-cat-gorie a",
"articleDate": "head > meta[property=\"article:published_time\"]",
"articleLink": ".views-field-title a",
"articles": ".view-content > .views-row.content-row",
"articleTitle": "h1.page-header",
"pagination": "ul.pagination > li.pager-last > a"
},
"sourceUrl": "https://www.radiookapi.net",
"supportsCategories": false
},
{
"categories": ["politique", "economie", "culture", "sport", "societe"],
"paginationTemplate": "index.php/category/{category}",
"requiresDetails": true,
"requiresRateLimit": false,
"sourceDate": {},
"sourceId": "7sur7.cd",
"sourceKind": "html",
"sourceSelectors": {
"articleBody": "div[property=\"schema:text\"].field.field--name-body",
"articleDate": "head > meta[property=\"article:published_time\"]",
"articleLink": ".views-field-title a",
"articles": ".view-content > .row.views-row",
"articleTitle": ".views-field-title a",
"pagination": "ul.pagination > li.pager__item.pager__item--last > a"
},
"sourceUrl": "https://7sur7.cd",
"supportsCategories": true
},
{
"paginationTemplate": "articles.html",
"requiresDetails": true,
"requiresRateLimit": false,
"sourceDate": {
"format": "dd.MM.yyyy"
},
"sourceId": "mediacongo.net",
"sourceKind": "html",
"sourceSelectors": {
"articleBody": ".article_ttext",
"articleCategories": "a.color_link",
"articleDate": ".article_other_about",
"articleLink": "a:first-child",
"articles": ".for_aitems > .article_other_item",
"articleTitle": "h1",
"pagination": "div.pagination > div > a:last-child"
},
"sourceUrl": "https://www.mediacongo.net",
"supportsCategories": false
},
{
"paginationTemplate": "actualite",
"requiresDetails": true,
"requiresRateLimit": false,
"sourceDate": {},
"sourceId": "actualite.cd",
"sourceKind": "html",
"sourceSelectors": {
"articleBody": ".views-field.views-field-body .field-content",
"articleCategories": "#actu-cat",
"articleDate": "head > meta[property=\"article:published_time\"]",
"articleLink": "#actu-titre a",
"articles": "#views-bootstrap-taxonomy-term-page-2 > div > div",
"articleTitle": "h1.page-title"
},
"sourceUrl": "https://actualite.cd",
"supportsCategories": false
}
],
"wordpress": [
{
"requiresRateLimit": true,
"sourceId": "beto.cd",
"sourceKind": "wordpress",
"sourceUrl": "https://beto.cd"
},
{ "sourceId": "newscd.net", "sourceKind": "wordpress", "sourceUrl": "https://newscd.net" },
{
"sourceId": "africanewsrdc.net",
"sourceKind": "wordpress",
"sourceUrl": "https://www.africanewsrdc.net"
},
{
"sourceId": "angazainstitute.ac.cd",
"sourceKind": "wordpress",
"sourceUrl": "https://angazainstitute.ac.cd"
},
{ "sourceId": "b-onetv.cd", "sourceKind": "wordpress", "sourceUrl": "https://b-onetv.cd" },
{
"sourceId": "bukavufm.com",
"sourceKind": "wordpress",
"sourceUrl": "https://bukavufm.com"
},
{
"sourceId": "changement7.net",
"sourceKind": "wordpress",
"sourceUrl": "https://changement7.net"
},
{
"sourceId": "congoactu.net",
"sourceKind": "wordpress",
"sourceUrl": "https://congoactu.net"
},
{
"sourceId": "congoindependant.com",
"sourceKind": "wordpress",
"sourceUrl": "https://www.congoindependant.com"
},
{
"sourceId": "congoquotidien.com",
"sourceKind": "wordpress",
"sourceUrl": "https://www.congoquotidien.com"
},
{
"sourceId": "cumulard.cd",
"sourceKind": "wordpress",
"sourceUrl": "https://www.cumulard.cd"
},
{
"sourceId": "environews-rdc.net",
"sourceKind": "wordpress",
"sourceUrl": "https://environews-rdc.net"
},
{
"sourceId": "freemediardc.info",
"sourceKind": "wordpress",
"sourceUrl": "https://www.freemediardc.info"
},
{
"sourceId": "geopolismagazine.org",
"sourceKind": "wordpress",
"sourceUrl": "https://geopolismagazine.org"
},
{
"sourceId": "habarirdc.net",
"sourceKind": "wordpress",
"sourceUrl": "https://habarirdc.net"
},
{
"sourceId": "infordc.com",
"sourceKind": "wordpress",
"sourceUrl": "https://infordc.com"
},
{
"sourceId": "kilalopress.net",
"sourceKind": "wordpress",
"sourceUrl": "https://kilalopress.net"
},
{
"sourceId": "laprosperiteonline.net",
"sourceKind": "wordpress",
"sourceUrl": "https://laprosperiteonline.net"
},
{
"sourceId": "laprunellerdc.cd",
"sourceKind": "wordpress",
"sourceUrl": "https://laprunellerdc.cd"
},
{
"sourceId": "lesmedias.net",
"sourceKind": "wordpress",
"sourceUrl": "https://lesmedias.net"
},
{
"sourceId": "lesvolcansnews.net",
"sourceKind": "wordpress",
"sourceUrl": "https://lesvolcansnews.net"
},
{
"sourceId": "netic-news.net",
"sourceKind": "wordpress",
"sourceUrl": "https://www.netic-news.net"
},
{
"sourceId": "objectif-infos.cd",
"sourceKind": "wordpress",
"sourceUrl": "https://objectif-infos.cd"
},
{
"sourceId": "scooprdc.net",
"sourceKind": "wordpress",
"sourceUrl": "https://scooprdc.net"
},
{
"sourceId": "journaldekinshasa.com",
"sourceKind": "wordpress",
"sourceUrl": "https://www.journaldekinshasa.com"
},
{
"sourceId": "lepotentiel.cd",
"sourceKind": "wordpress",
"sourceUrl": "https://lepotentiel.cd"
},
{
"sourceId": "acturdc.com",
"sourceKind": "wordpress",
"sourceUrl": "https://acturdc.com"
},
{
"sourceId": "matininfos.net",
"sourceKind": "wordpress",
"sourceUrl": "https://matininfos.net"
}
]
}
}
}