Initial commit

This commit is contained in:
2025-10-05 13:55:28 +02:00
commit 68d521677a
767 changed files with 46947 additions and 0 deletions
+52
View File
@@ -0,0 +1,52 @@
#!/usr/bin/env bash
# Get the script directory and define data directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DATA_DIR="${SCRIPT_DIR}/../data"
DUMP_FILE="${DATA_DIR}/data.sql"
ARCHIVE_FILE="${DUMP_FILE}.gz"
# Ensure data directory exists
mkdir -p "$DATA_DIR"
# Load environment variables from .env.local
set -a
if [ -f "${SCRIPT_DIR}/../.env.local" ]; then
export "$(grep -v '^#' "${SCRIPT_DIR}/../.env.local" | grep '=' | xargs)"
fi
set +a
# Parse DATABASE_URL into components
if [[ -z "$DATABASE_URL" ]]; then
echo "DATABASE_URL is not set in .env.local"
exit 1
fi
regex="^mysql:\/\/([^:]+):([^@]+)@([^:]+):([0-9]+)\/([^?]+)"
if [[ "$DATABASE_URL" =~ $regex ]]; then
DB_USER="${BASH_REMATCH[1]}"
DB_PASSWORD="${BASH_REMATCH[2]}"
DB_HOST="${BASH_REMATCH[3]}"
DB_PORT="${BASH_REMATCH[4]}"
DB_NAME="${BASH_REMATCH[5]}"
else
echo "Invalid DATABASE_URL format"
exit 1
fi
# Step 1: Dump the database
mysqldump --host="${DB_HOST}" --port="${DB_PORT}" \
--user="${DB_USER}" --password="${DB_PASSWORD}" \
--max_allowed-packet=1G --net-buffer-length=32704 --skip-extended-insert \
"${DB_NAME}" > "$DUMP_FILE"
gzip -f "$DUMP_FILE"
# Step 2: Send the file to Telegram
curl -F "chat_id=${DEVY_CHANNEL}" \
-F "message_thread_id=${DEVY_TOPIC}" \
-F "document=@${ARCHIVE_FILE}" \
"https://api.telegram.org/bot${DEVY_TOKEN}/sendDocument"
# Step 3: Clean up
rm -f "$ARCHIVE_FILE" "$DUMP_FILE"
+18
View File
@@ -0,0 +1,18 @@
#!/usr/bin/env php
<?php
use Symfony\Bundle\FrameworkBundle\Console\Application;
use App\SharedKernel\Infrastructure\Framework\Symfony\Kernel;
if (!is_file(dirname(__DIR__).'/vendor/autoload_runtime.php')) {
throw new LogicException('Symfony Runtime is missing. Try running "composer require symfony/runtime".');
}
require_once dirname(__DIR__).'/vendor/autoload_runtime.php';
return function (array $context) {
$kernel = new Kernel($context['APP_ENV'], (bool) $context['APP_DEBUG']);
return new Application($kernel);
};
+42
View File
@@ -0,0 +1,42 @@
#!/usr/bin/env bash
SOURCES=(
"africanewsrdc.net"
"angazainstitute.ac.cd"
"b-onetv.cd"
"bukavufm.com"
"changement7.net"
"congoactu.net"
"congoindependant.com"
"congoquotidien.com"
"cumulard.cd"
"environews-rdc.net"
"freemediardc.info"
"geopolismagazine.org"
"habarirdc.net"
"infordc.com"
"kilalopress.net"
"laprosperiteonline.net"
"laprunellerdc.cd"
"lesmedias.net"
"lesvolcansnews.net"
"netic-news.net"
"objectif-infos.cd"
"scooprdc.net"
"journaldekinshasa.com"
"lepotentiel.cd"
"acturdc.com"
"matininfos.net"
)
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:crawl"
LOG_DIR="/var/www/html/news.devscast.tech/var"
mkdir -p "$LOG_DIR"
rm -f "${LOG_DIR}"/*.log
for SOURCE in "${SOURCES[@]}"; do
LOG_FILE="${LOG_DIR}/crawling-${SOURCE}.log"
nohup $BASE_CMD "$SOURCE" -vvv > "$LOG_FILE" 2>&1 &
done
echo "All crawlers started in the background."
+15
View File
@@ -0,0 +1,15 @@
#!/usr/bin/env bash
SOURCES=("7sur7.cd" "actualite.cd" "radiookapi.net" "mediacongo.net" "newscd.net")
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:open-graph"
LOG_DIR="/var/www/html/news.devscast.tech/var"
mkdir -p "$LOG_DIR"
rm -f "${LOG_DIR}"/*.log
for SOURCE in "${SOURCES[@]}"; do
LOG_FILE="${LOG_DIR}/${SOURCE}.log"
nohup $BASE_CMD "$SOURCE" -vvv --no-interaction > "$LOG_FILE" 2>&1 &
done
echo "All open graph crawlers started in the background."
+3
View File
@@ -0,0 +1,3 @@
#!/usr/bin/env bash
ps aux | grep '/bin/console app:' | grep -v grep | awk '{print $2}' | xargs -r kill -9
+24
View File
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
SOURCES=("7sur7.cd" "actualite.cd" "radiookapi.net" "mediacongo.net" "newscd.net")
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:update"
LOG_DIR="/var/www/html/news.devscast.tech/var"
mkdir -p "$LOG_DIR"
rm -f "${LOG_DIR}"/*.log
for SOURCE in "${SOURCES[@]}"; do
if [[ "$SOURCE" == "7sur7.cd" ]]; then
CATEGORIES=("politique" "economie" "culture" "sport" "societe")
for CATEGORY in "${CATEGORIES[@]}"; do
LOG_FILE="${LOG_DIR}/${SOURCE}.${CATEGORY}.log"
nohup $BASE_CMD "$SOURCE" --direction=forward -vvv --category="$CATEGORY" > "$LOG_FILE" 2>&1 &
done
else
LOG_FILE="${LOG_DIR}/${SOURCE}.log"
nohup $BASE_CMD "$SOURCE" --direction=forward -vvv > "$LOG_FILE" 2>&1 &
fi
done
echo "All crawlers started in the background."
+24
View File
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
SOURCES=("7sur7.cd" "actualite.cd" "radiookapi.net" "mediacongo.net" "newscd.net")
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:update"
LOG_DIR="/var/www/html/news.devscast.tech/var"
mkdir -p "$LOG_DIR"
rm -f "${LOG_DIR}"/*.log
for SOURCE in "${SOURCES[@]}"; do
if [[ "$SOURCE" == "7sur7.cd" ]]; then
CATEGORIES=("politique" "economie" "culture" "sport" "societe")
for CATEGORY in "${CATEGORIES[@]}"; do
LOG_FILE="${LOG_DIR}/${SOURCE}.${CATEGORY}.log"
$BASE_CMD "$SOURCE" --direction=forward -vvv --category="$CATEGORY" 2>&1 | tee "$LOG_FILE"
done
else
LOG_FILE="${LOG_DIR}/${SOURCE}.log"
$BASE_CMD "$SOURCE" --direction=forward -vvv 2>&1 | tee "$LOG_FILE"
fi
done
echo "All crawlers finished."