Initial commit
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Get the script directory and define data directory
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
DATA_DIR="${SCRIPT_DIR}/../data"
|
||||
DUMP_FILE="${DATA_DIR}/data.sql"
|
||||
ARCHIVE_FILE="${DUMP_FILE}.gz"
|
||||
|
||||
# Ensure data directory exists
|
||||
mkdir -p "$DATA_DIR"
|
||||
|
||||
# Load environment variables from .env.local
|
||||
set -a
|
||||
if [ -f "${SCRIPT_DIR}/../.env.local" ]; then
|
||||
export "$(grep -v '^#' "${SCRIPT_DIR}/../.env.local" | grep '=' | xargs)"
|
||||
fi
|
||||
set +a
|
||||
|
||||
# Parse DATABASE_URL into components
|
||||
if [[ -z "$DATABASE_URL" ]]; then
|
||||
echo "DATABASE_URL is not set in .env.local"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
regex="^mysql:\/\/([^:]+):([^@]+)@([^:]+):([0-9]+)\/([^?]+)"
|
||||
if [[ "$DATABASE_URL" =~ $regex ]]; then
|
||||
DB_USER="${BASH_REMATCH[1]}"
|
||||
DB_PASSWORD="${BASH_REMATCH[2]}"
|
||||
DB_HOST="${BASH_REMATCH[3]}"
|
||||
DB_PORT="${BASH_REMATCH[4]}"
|
||||
DB_NAME="${BASH_REMATCH[5]}"
|
||||
else
|
||||
echo "Invalid DATABASE_URL format"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Step 1: Dump the database
|
||||
mysqldump --host="${DB_HOST}" --port="${DB_PORT}" \
|
||||
--user="${DB_USER}" --password="${DB_PASSWORD}" \
|
||||
--max_allowed-packet=1G --net-buffer-length=32704 --skip-extended-insert \
|
||||
"${DB_NAME}" > "$DUMP_FILE"
|
||||
|
||||
gzip -f "$DUMP_FILE"
|
||||
|
||||
# Step 2: Send the file to Telegram
|
||||
curl -F "chat_id=${DEVY_CHANNEL}" \
|
||||
-F "message_thread_id=${DEVY_TOPIC}" \
|
||||
-F "document=@${ARCHIVE_FILE}" \
|
||||
"https://api.telegram.org/bot${DEVY_TOKEN}/sendDocument"
|
||||
|
||||
# Step 3: Clean up
|
||||
rm -f "$ARCHIVE_FILE" "$DUMP_FILE"
|
||||
Executable
+18
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
|
||||
use Symfony\Bundle\FrameworkBundle\Console\Application;
|
||||
use App\SharedKernel\Infrastructure\Framework\Symfony\Kernel;
|
||||
|
||||
if (!is_file(dirname(__DIR__).'/vendor/autoload_runtime.php')) {
|
||||
throw new LogicException('Symfony Runtime is missing. Try running "composer require symfony/runtime".');
|
||||
}
|
||||
|
||||
require_once dirname(__DIR__).'/vendor/autoload_runtime.php';
|
||||
|
||||
return function (array $context) {
|
||||
$kernel = new Kernel($context['APP_ENV'], (bool) $context['APP_DEBUG']);
|
||||
|
||||
return new Application($kernel);
|
||||
};
|
||||
Executable
+42
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
SOURCES=(
|
||||
"africanewsrdc.net"
|
||||
"angazainstitute.ac.cd"
|
||||
"b-onetv.cd"
|
||||
"bukavufm.com"
|
||||
"changement7.net"
|
||||
"congoactu.net"
|
||||
"congoindependant.com"
|
||||
"congoquotidien.com"
|
||||
"cumulard.cd"
|
||||
"environews-rdc.net"
|
||||
"freemediardc.info"
|
||||
"geopolismagazine.org"
|
||||
"habarirdc.net"
|
||||
"infordc.com"
|
||||
"kilalopress.net"
|
||||
"laprosperiteonline.net"
|
||||
"laprunellerdc.cd"
|
||||
"lesmedias.net"
|
||||
"lesvolcansnews.net"
|
||||
"netic-news.net"
|
||||
"objectif-infos.cd"
|
||||
"scooprdc.net"
|
||||
"journaldekinshasa.com"
|
||||
"lepotentiel.cd"
|
||||
"acturdc.com"
|
||||
"matininfos.net"
|
||||
)
|
||||
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:crawl"
|
||||
LOG_DIR="/var/www/html/news.devscast.tech/var"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
rm -f "${LOG_DIR}"/*.log
|
||||
|
||||
for SOURCE in "${SOURCES[@]}"; do
|
||||
LOG_FILE="${LOG_DIR}/crawling-${SOURCE}.log"
|
||||
nohup $BASE_CMD "$SOURCE" -vvv > "$LOG_FILE" 2>&1 &
|
||||
done
|
||||
|
||||
echo "All crawlers started in the background."
|
||||
Executable
+15
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
SOURCES=("7sur7.cd" "actualite.cd" "radiookapi.net" "mediacongo.net" "newscd.net")
|
||||
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:open-graph"
|
||||
LOG_DIR="/var/www/html/news.devscast.tech/var"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
rm -f "${LOG_DIR}"/*.log
|
||||
|
||||
for SOURCE in "${SOURCES[@]}"; do
|
||||
LOG_FILE="${LOG_DIR}/${SOURCE}.log"
|
||||
nohup $BASE_CMD "$SOURCE" -vvv --no-interaction > "$LOG_FILE" 2>&1 &
|
||||
done
|
||||
|
||||
echo "All open graph crawlers started in the background."
|
||||
Executable
+3
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ps aux | grep '/bin/console app:' | grep -v grep | awk '{print $2}' | xargs -r kill -9
|
||||
Executable
+24
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
SOURCES=("7sur7.cd" "actualite.cd" "radiookapi.net" "mediacongo.net" "newscd.net")
|
||||
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:update"
|
||||
LOG_DIR="/var/www/html/news.devscast.tech/var"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
rm -f "${LOG_DIR}"/*.log
|
||||
|
||||
for SOURCE in "${SOURCES[@]}"; do
|
||||
if [[ "$SOURCE" == "7sur7.cd" ]]; then
|
||||
CATEGORIES=("politique" "economie" "culture" "sport" "societe")
|
||||
|
||||
for CATEGORY in "${CATEGORIES[@]}"; do
|
||||
LOG_FILE="${LOG_DIR}/${SOURCE}.${CATEGORY}.log"
|
||||
nohup $BASE_CMD "$SOURCE" --direction=forward -vvv --category="$CATEGORY" > "$LOG_FILE" 2>&1 &
|
||||
done
|
||||
else
|
||||
LOG_FILE="${LOG_DIR}/${SOURCE}.log"
|
||||
nohup $BASE_CMD "$SOURCE" --direction=forward -vvv > "$LOG_FILE" 2>&1 &
|
||||
fi
|
||||
done
|
||||
|
||||
echo "All crawlers started in the background."
|
||||
Executable
+24
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
SOURCES=("7sur7.cd" "actualite.cd" "radiookapi.net" "mediacongo.net" "newscd.net")
|
||||
BASE_CMD="/usr/bin/php /var/www/html/news.devscast.tech/bin/console app:update"
|
||||
LOG_DIR="/var/www/html/news.devscast.tech/var"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
rm -f "${LOG_DIR}"/*.log
|
||||
|
||||
for SOURCE in "${SOURCES[@]}"; do
|
||||
if [[ "$SOURCE" == "7sur7.cd" ]]; then
|
||||
CATEGORIES=("politique" "economie" "culture" "sport" "societe")
|
||||
|
||||
for CATEGORY in "${CATEGORIES[@]}"; do
|
||||
LOG_FILE="${LOG_DIR}/${SOURCE}.${CATEGORY}.log"
|
||||
$BASE_CMD "$SOURCE" --direction=forward -vvv --category="$CATEGORY" 2>&1 | tee "$LOG_FILE"
|
||||
done
|
||||
else
|
||||
LOG_FILE="${LOG_DIR}/${SOURCE}.log"
|
||||
$BASE_CMD "$SOURCE" --direction=forward -vvv 2>&1 | tee "$LOG_FILE"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "All crawlers finished."
|
||||
Reference in New Issue
Block a user