[backend] accept articles from crawler

This commit is contained in:
2025-10-07 00:15:38 +02:00
parent 327ba5179d
commit 449718cdf2
397 changed files with 1490 additions and 3063 deletions
@@ -2,13 +2,13 @@
declare(strict_types=1);
namespace App\Aggregator\Application\EventListener;
namespace Basango\Aggregator\Application\EventListener;
use App\Aggregator\Application\Mailing\SourceCrawledEmail;
use App\Aggregator\Domain\Event\SourceCrawled;
use App\SharedKernel\Application\Mailing\Mailer;
use App\SharedKernel\Domain\EventListener\EventListener;
use App\SharedKernel\Domain\Model\ValueObject\EmailAddress;
use Basango\Aggregator\Application\Mailing\SourceCrawledEmail;
use Basango\Aggregator\Domain\Event\SourceCrawled;
use Basango\SharedKernel\Application\Mailing\Mailer;
use Basango\SharedKernel\Domain\EventListener\EventListener;
use Basango\SharedKernel\Domain\Model\ValueObject\EmailAddress;
/**
* Class SourceFetchedListener.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Application\Mailing;
namespace Basango\Aggregator\Application\Mailing;
use App\SharedKernel\Application\Mailing\EmailDefinition;
use App\SharedKernel\Domain\Model\ValueObject\EmailAddress;
use Basango\SharedKernel\Application\Mailing\EmailDefinition;
use Basango\SharedKernel\Domain\Model\ValueObject\EmailAddress;
/**
* Class SourceFetched.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Application\ReadModel;
namespace Basango\Aggregator\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class ExportedArticle.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Application\ReadModel;
namespace Basango\Aggregator\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceStatistics.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Application\ReadModel;
namespace Basango\Aggregator\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Assert;
/**
* Class SourceStatisticsList.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
namespace Basango\Aggregator\Application\UseCase\Command;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use Basango\Aggregator\Domain\Model\ValueObject\Link;
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
/**
* Class Save.
@@ -21,7 +21,7 @@ final readonly class CreateArticle
public string $body,
public string $source,
public int $timestamp,
public ?OpenGraphObject $metadata = null
public ?OpenGraph $metadata = null
) {
}
}
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
namespace Basango\Aggregator\Application\UseCase\Command;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
/**
* Class CreateSource.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
namespace Basango\Aggregator\Application\UseCase\Command;
/**
* Class DeleteArticles.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
namespace Basango\Aggregator\Application\UseCase\Command;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class Export.
@@ -2,15 +2,15 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
namespace Basango\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\UseCase\Command\CreateArticle;
use App\Aggregator\Domain\Exception\DuplicatedArticle;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Model\Repository\ArticleRepository;
use App\Aggregator\Domain\Model\Repository\SourceRepository;
use App\Aggregator\Domain\Service\HashCalculator;
use App\SharedKernel\Application\Messaging\CommandHandler;
use Basango\Aggregator\Application\UseCase\Command\CreateArticle;
use Basango\Aggregator\Domain\Exception\DuplicatedArticle;
use Basango\Aggregator\Domain\Model\Entity\Article;
use Basango\Aggregator\Domain\Model\Repository\ArticleRepository;
use Basango\Aggregator\Domain\Model\Repository\SourceRepository;
use Basango\Aggregator\Domain\Service\HashCalculator;
use Basango\SharedKernel\Application\Messaging\CommandHandler;
/**
* Class CreateArticlesHandler.
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
namespace Basango\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\UseCase\Command\CreateSource;
use App\Aggregator\Domain\Model\Entity\Source;
use App\Aggregator\Domain\Model\Repository\SourceRepository;
use App\SharedKernel\Application\Messaging\CommandHandler;
use Basango\Aggregator\Application\UseCase\Command\CreateSource;
use Basango\Aggregator\Domain\Model\Entity\Source;
use Basango\Aggregator\Domain\Model\Repository\SourceRepository;
use Basango\SharedKernel\Application\Messaging\CommandHandler;
/**
* Class AddSourceHandler.
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
namespace Basango\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\UseCase\Command\DeleteArticles;
use App\Aggregator\Domain\Model\Repository\ArticleRepository;
use App\SharedKernel\Application\Messaging\CommandHandler;
use Basango\Aggregator\Application\UseCase\Command\DeleteArticles;
use Basango\Aggregator\Domain\Model\Repository\ArticleRepository;
use Basango\SharedKernel\Application\Messaging\CommandHandler;
/**
* Class DeleteArticlesHandler.
@@ -2,15 +2,15 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
namespace Basango\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\ReadModel\ArticleForExport;
use App\Aggregator\Application\UseCase\Command\ExportArticles;
use App\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use App\SharedKernel\Application\Messaging\CommandHandler;
use App\SharedKernel\Application\Messaging\QueryBus;
use App\SharedKernel\Domain\DataTransfert\DataExporter;
use App\SharedKernel\Domain\DataTransfert\TransfertSetting;
use Basango\Aggregator\Application\ReadModel\ArticleForExport;
use Basango\Aggregator\Application\UseCase\Command\ExportArticles;
use Basango\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use Basango\SharedKernel\Application\Messaging\CommandHandler;
use Basango\SharedKernel\Application\Messaging\QueryBus;
use Basango\SharedKernel\Domain\DataTransfert\DataExporter;
use Basango\SharedKernel\Domain\DataTransfert\TransfertSetting;
/**
* Class GetArticlesForExportHandler.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
namespace Basango\Aggregator\Application\UseCase\Query;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class GetArticlesForExport.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
namespace Basango\Aggregator\Application\UseCase\Query;
/**
* Class GetEarliestPublicationDate.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
namespace Basango\Aggregator\Application\UseCase\Query;
/**
* Class GetLatestPublicationDate.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
namespace Basango\Aggregator\Application\UseCase\Query;
/**
* Class GetSourceStatisticsList.
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
namespace Basango\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\ReadModel\ArticleForExport;
use App\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use App\SharedKernel\Application\Messaging\QueryHandler;
use Basango\Aggregator\Application\ReadModel\ArticleForExport;
use Basango\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use Basango\SharedKernel\Application\Messaging\QueryHandler;
/**
* Class GetArticlesForExportHandler.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
namespace Basango\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use App\SharedKernel\Application\Messaging\QueryHandler;
use Basango\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use Basango\SharedKernel\Application\Messaging\QueryHandler;
/**
* Interface GetEarliestPublicationDateHandler.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
namespace Basango\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use App\SharedKernel\Application\Messaging\QueryHandler;
use Basango\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use Basango\SharedKernel\Application\Messaging\QueryHandler;
/**
* Interface GetLatestPublicationDateHandler.
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
namespace Basango\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\ReadModel\SourceStatisticsList;
use App\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use App\SharedKernel\Application\Messaging\QueryHandler;
use Basango\Aggregator\Application\ReadModel\SourceStatisticsList;
use Basango\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use Basango\SharedKernel\Application\Messaging\QueryHandler;
/**
* Interface GetSourceStatisticsListHandler.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Event;
namespace Basango\Aggregator\Domain\Event;
/**
* Class SourceFetched.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
namespace Basango\Aggregator\Domain\Exception;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\SharedKernel\Domain\Exception\UserFacingError;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class ArticleNotFound.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
namespace Basango\Aggregator\Domain\Exception;
use App\SharedKernel\Domain\Exception\UserFacingError;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\SharedKernel\Domain\Exception\UserFacingError;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class ArticleOutOfRange.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
namespace Basango\Aggregator\Domain\Exception;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\SharedKernel\Domain\Exception\UserFacingError;
use Basango\Aggregator\Domain\Model\ValueObject\Link;
use Basango\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class DuplicatedArticle.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
namespace Basango\Aggregator\Domain\Exception;
use App\SharedKernel\Domain\Exception\UserFacingError;
use Basango\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class DuplicatedArticle.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
namespace Basango\Aggregator\Domain\Exception;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\Exception\UserFacingError;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class SourceNotFound.
@@ -2,15 +2,14 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Entity;
namespace Basango\Aggregator\Domain\Model\Entity;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\ValueObject\Crawling\OpenGraph;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Model\ValueObject\ReadingTime;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\Aggregator\Domain\Model\ValueObject\Link;
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
use Basango\Aggregator\Domain\Model\ValueObject\ReadingTime;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
/**
* Class Article.
@@ -73,22 +72,14 @@ class Article
return $this;
}
public function defineOpenGraph(?OpenGraphObject $object): self
public function defineOpenGraph(?OpenGraph $object): self
{
if ($object instanceof OpenGraphObject) {
$image = $object->images[0] ?? null;
$video = $object->videos[0] ?? null;
$audio = $object->audios[0] ?? null;
$this->metadata = new OpenGraph(
title: $object->title,
description: $object->description,
image: $image->url ?? $image?->secureUrl,
video: $video->url ?? $video?->secureUrl,
audio: $audio->url ?? $audio?->secureUrl,
locale: $object->locale
);
}
$this->metadata = new OpenGraph(
title: $object->title,
description: $object->description,
image: $object->image,
locale: $object->locale ?? "fr"
);
return $this;
}
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Entity;
namespace Basango\Aggregator\Domain\Model\Entity;
use App\Aggregator\Domain\Model\Identity\CategoryId;
use Basango\Aggregator\Domain\Model\Identity\CategoryId;
/**
* Class Category.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Entity;
namespace Basango\Aggregator\Domain\Model\Entity;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
/**
* Class Source.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Identity;
namespace Basango\Aggregator\Domain\Model\Identity;
use Symfony\Component\Uid\UuidV7;
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Identity;
namespace Basango\Aggregator\Domain\Model\Identity;
use Symfony\Component\Uid\UuidV7;
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Identity;
namespace Basango\Aggregator\Domain\Model\Identity;
use Symfony\Component\Uid\UuidV7;
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Repository;
namespace Basango\Aggregator\Domain\Model\Repository;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\Aggregator\Domain\Model\Entity\Article;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Interface ArticleRepository.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Repository;
namespace Basango\Aggregator\Domain\Model\Repository;
use App\Aggregator\Domain\Model\Entity\Source;
use App\Aggregator\Domain\Model\Identity\SourceId;
use Basango\Aggregator\Domain\Model\Entity\Source;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
/**
* Interface SourceRepository.
@@ -1,26 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class FetchConfig.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CrawlingSettings
{
public function __construct(
public string $id,
public ?PageRange $pageRange = null,
public ?DateRange $dateRange = null,
public ?string $category = null,
public bool $notify = false
) {
Assert::notEmpty($this->id);
}
}
@@ -1,50 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
use App\SharedKernel\Domain\Assert;
/**
* Class PageRange.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class PageRange implements \Stringable
{
public int $start;
public int $end;
private function __construct(int $start, int $end)
{
Assert::greaterThanEq($start, 0);
Assert::greaterThanEq($end, 0);
Assert::greaterThan($end, $start);
$this->start = $start;
$this->end = $end;
}
#[\Override]
public function __toString(): string
{
return $this->start . ':' . $this->end;
}
public static function from(string $interval): self
{
[$start, $end] = explode(':', $interval);
$start = (int) $start;
$end = (int) $end;
return new self($start, $end);
}
public function inRange(int $page): bool
{
return $page >= $this->start && $page <= $this->end;
}
}
@@ -1,16 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
/**
* Class UpdateDirection.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum UpdateDirection: string
{
case FORWARD = 'forward';
case BACKWARD = 'backward';
}
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject;
namespace Basango\Aggregator\Domain\Model\ValueObject;
use App\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Assert;
/**
* Class Link.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
namespace Basango\Aggregator\Domain\Model\ValueObject;
/**
* Class OpenGraphMeta.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject;
namespace Basango\Aggregator\Domain\Model\ValueObject;
/**
* Class ReadingTime.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
namespace Basango\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Class Bias.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
namespace Basango\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Class Credibility.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
namespace Basango\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Class Reliability.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
namespace Basango\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Enum Sentiment.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
namespace Basango\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Enum Transparency.
@@ -1,72 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling;
/**
* Class DateParser.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class DateParser
{
public const array MONTHS = [
'janvier' => '01',
'février' => '02',
'mars' => '03',
'avril' => '04',
'mai' => '05',
'juin' => '06',
'juillet' => '07',
'août' => '08',
'septembre' => '09',
'octobre' => '10',
'novembre' => '11',
'décembre' => '12',
];
public const array DAYS = [
'dimanche' => '0',
'lundi' => '1',
'mardi' => '2',
'mercredi' => '3',
'jeudi' => '4',
'vendredi' => '5',
'samedi' => '6',
];
public const string DEFAULT_DATE_FORMAT = 'Y-m-d H:i';
/**
* @throws \Throwable
*/
public function createTimeStamp(
string $date,
?string $format = null,
?string $pattern = null,
?string $replacement = null
): string {
/** @var string $date */
$date = strtr(strtr(strtolower($date), self::DAYS), self::MONTHS);
if ($pattern !== null && $replacement !== null) {
/** @var string $date */
$date = preg_replace(
pattern: $pattern,
replacement: $replacement,
subject: $date
);
}
if ($format === 'c') {
$date = str_replace('t', ' ', $date);
$format = 'Y-m-d H:i:s';
}
$datetime = \DateTime::createFromFormat($format ?? self::DEFAULT_DATE_FORMAT, $date);
return $datetime !== false ?
$datetime->format('U') :
new \DateTime('midnight')->format('U');
}
}
@@ -1,32 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphElement;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
/**
* Class Audio.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Audio extends OpenGraphElement
{
public function __construct(
public ?string $url = null,
public ?string $secureUrl = null,
public ?string $type = null
) {
}
public function supportedProperties(): array
{
return [
OpenGraphProperty::AUDIO_URL => $this->url,
OpenGraphProperty::AUDIO_SECURE_URL => $this->secureUrl,
OpenGraphProperty::AUDIO_TYPE => $this->type,
];
}
}
@@ -1,38 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphElement;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
/**
* Class Image.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Image extends OpenGraphElement
{
public function __construct(
public ?string $url = null,
public ?string $secureUrl = null,
public ?string $type = null,
public ?int $width = null,
public ?int $height = null,
public ?bool $userGenerated = null
) {
}
public function supportedProperties(): array
{
return [
OpenGraphProperty::IMAGE => $this->url,
OpenGraphProperty::IMAGE_SECURE_URL => $this->secureUrl,
OpenGraphProperty::IMAGE_TYPE => $this->type,
OpenGraphProperty::IMAGE_WIDTH => $this->width,
OpenGraphProperty::IMAGE_HEIGHT => $this->height,
OpenGraphProperty::IMAGE_USER_GENERATED => $this->userGenerated,
];
}
}
@@ -1,36 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphElement;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
/**
* Class Video.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Video extends OpenGraphElement
{
public function __construct(
public ?string $url = null,
public ?string $secureUrl = null,
public ?string $type = null,
public ?int $width = null,
public ?int $height = null
) {
}
public function supportedProperties(): array
{
return [
OpenGraphProperty::VIDEO_URL => $this->url,
OpenGraphProperty::VIDEO_SECURE_URL => $this->secureUrl,
OpenGraphProperty::VIDEO_TYPE => $this->type,
OpenGraphProperty::VIDEO_WIDTH => $this->width,
OpenGraphProperty::VIDEO_HEIGHT => $this->height,
];
}
}
@@ -1,16 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Objects;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
/**
* Class Website.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Website extends OpenGraphObject
{
}
@@ -1,17 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
/**
* Interface OpenGraphConsumer.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface OpenGraphConsumer
{
public function consumeUrl(string $url): ?OpenGraphObject;
public function consumeHtml(string $html, string $fallbackUrl): ?OpenGraphObject;
}
@@ -1,26 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
/**
* Class GraphElement.
*
* @author bernard-ng <bernard@devscast.tech>
*/
abstract class OpenGraphElement
{
abstract public function supportedProperties(): array;
public function getProperties(): array
{
return array_filter(
array_map(
fn (string $key, mixed $value): ?OpenGraphProperty => $value !== null ? new OpenGraphProperty($key, $value) : null,
array_keys($this->supportedProperties()),
array_values($this->supportedProperties())
),
);
}
}
@@ -1,283 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements\Audio;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements\Image;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements\Video;
/**
* Class GraphObject.
*
* @author bernard-ng <bernard@devscast.tech>
*/
abstract class OpenGraphObject
{
public function __construct(
public array $audios = [],
public ?string $description = null,
public ?string $determiner = null,
public array $images = [],
public ?string $locale = null,
public array $localeAlternate = [],
public ?bool $richAttachment = null,
public array $seeAlso = [],
public ?string $siteName = null,
public ?string $title = null,
public ?string $type = null,
public ?\DateTimeImmutable $updatedTime = null,
public ?string $url = null,
public array $videos = []
) {
}
public function assignProperties(array $properties, bool $debug = false): void
{
foreach ($properties as $property) {
$name = $property->key;
$value = $property->value;
switch ($name) {
case OpenGraphProperty::AUDIO:
case OpenGraphProperty::AUDIO_URL:
$this->audios[] = new Audio($value);
break;
case OpenGraphProperty::AUDIO_SECURE_URL:
case OpenGraphProperty::AUDIO_TYPE:
if ($this->audios !== []) {
$this->handleAudioAttribute($this->audios[\count($this->audios) - 1], $name, $value);
} elseif ($debug) {
throw new \UnexpectedValueException(
\sprintf(
"Found '%s' property but no audio was found before.",
$name
)
);
}
break;
case OpenGraphProperty::DESCRIPTION:
if ($this->description === null) {
$this->description = $value;
}
break;
case OpenGraphProperty::DETERMINER:
if ($this->determiner === null) {
$this->determiner = $value;
}
break;
case OpenGraphProperty::IMAGE:
case OpenGraphProperty::IMAGE_URL:
$this->images[] = new Image($value);
break;
case OpenGraphProperty::IMAGE_HEIGHT:
case OpenGraphProperty::IMAGE_SECURE_URL:
case OpenGraphProperty::IMAGE_TYPE:
case OpenGraphProperty::IMAGE_WIDTH:
case OpenGraphProperty::IMAGE_USER_GENERATED:
if ($this->images !== []) {
$this->handleImageAttribute($this->images[\count($this->images) - 1], $name, $value);
} elseif ($debug) {
throw new \UnexpectedValueException(
\sprintf(
"Found '%s' property but no image was found before.",
$name
)
);
}
break;
case OpenGraphProperty::LOCALE:
if ($this->locale === null) {
$this->locale = $value;
}
break;
case OpenGraphProperty::LOCALE_ALTERNATE:
$this->localeAlternate[] = $value;
break;
case OpenGraphProperty::RICH_ATTACHMENT:
$this->richAttachment = $this->convertToBoolean($value);
break;
case OpenGraphProperty::SEE_ALSO:
$this->seeAlso[] = $value;
break;
case OpenGraphProperty::SITE_NAME:
if ($this->siteName === null) {
$this->siteName = $value;
}
break;
case OpenGraphProperty::TITLE:
if ($this->title === null) {
$this->title = $value;
}
break;
case OpenGraphProperty::UPDATED_TIME:
if (! $this->updatedTime instanceof \DateTimeImmutable) {
$this->updatedTime = $this->convertToDateTime($value);
}
break;
case OpenGraphProperty::URL:
if ($this->url === null) {
$this->url = $value;
}
break;
case OpenGraphProperty::VIDEO:
case OpenGraphProperty::VIDEO_URL:
$this->videos[] = new Video($value);
break;
case OpenGraphProperty::VIDEO_HEIGHT:
case OpenGraphProperty::VIDEO_SECURE_URL:
case OpenGraphProperty::VIDEO_TYPE:
case OpenGraphProperty::VIDEO_WIDTH:
if ($this->videos !== []) {
$this->handleVideoAttribute($this->videos[\count($this->videos) - 1], $name, $value);
} elseif ($debug) {
throw new \UnexpectedValueException(\sprintf(
"Found '%s' property but no video was found before.",
$name
));
}
}
}
}
public function getProperties(): array
{
$properties = [];
foreach ($this->audios as $audio) {
$properties = array_merge($properties, $audio->getProperties());
}
if ($this->title !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::TITLE, $this->title);
}
if ($this->description !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::DESCRIPTION, $this->description);
}
if ($this->determiner !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::DETERMINER, $this->determiner);
}
foreach ($this->images as $image) {
$properties = array_merge($properties, $image->getProperties());
}
if ($this->locale !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::LOCALE, $this->locale);
}
foreach ($this->localeAlternate as $locale) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::LOCALE_ALTERNATE, $locale);
}
if ($this->richAttachment !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::RICH_ATTACHMENT, (int) $this->richAttachment);
}
foreach ($this->seeAlso as $seeAlso) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::SEE_ALSO, $seeAlso);
}
if ($this->siteName !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::SITE_NAME, $this->siteName);
}
if ($this->type !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::TYPE, $this->type);
}
if ($this->updatedTime instanceof \DateTimeImmutable) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::UPDATED_TIME, $this->updatedTime->format('c'));
}
if ($this->url !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::URL, $this->url);
}
foreach ($this->videos as $video) {
$properties = array_merge($properties, $video->getProperties());
}
return $properties;
}
protected function convertToBoolean(string $value): bool
{
return match (strtolower($value)) {
'1', 'true' => true,
default => false,
};
}
protected function convertToDateTime(string $value): ?\DateTimeImmutable
{
try {
return new \DateTimeImmutable($value);
} catch (\Throwable) {
return null;
}
}
private function handleAudioAttribute(Audio $element, string $name, string $value): void
{
switch ($name) {
case OpenGraphProperty::AUDIO_TYPE:
$element->type = $value;
break;
case OpenGraphProperty::AUDIO_SECURE_URL:
$element->secureUrl = $value;
break;
}
}
private function handleImageAttribute(Image $element, string $name, string $value): void
{
switch ($name) {
case OpenGraphProperty::IMAGE_HEIGHT:
$element->height = (int) $value;
break;
case OpenGraphProperty::IMAGE_WIDTH:
$element->width = (int) $value;
break;
case OpenGraphProperty::IMAGE_TYPE:
$element->type = $value;
break;
case OpenGraphProperty::IMAGE_SECURE_URL:
$element->secureUrl = $value;
break;
case OpenGraphProperty::IMAGE_USER_GENERATED:
$element->userGenerated = $this->convertToBoolean($value);
break;
}
}
private function handleVideoAttribute(Video $element, string $name, string $value): void
{
switch ($name) {
case OpenGraphProperty::VIDEO_HEIGHT:
$element->height = (int) $value;
break;
case OpenGraphProperty::VIDEO_WIDTH:
$element->width = (int) $value;
break;
case OpenGraphProperty::VIDEO_TYPE:
$element->type = $value;
break;
case OpenGraphProperty::VIDEO_SECURE_URL:
$element->secureUrl = $value;
break;
}
}
}
@@ -1,75 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
/**
* Class Property.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class OpenGraphProperty
{
public const string AUDIO = 'og:audio';
public const string AUDIO_SECURE_URL = 'og:audio:secure_url';
public const string AUDIO_TYPE = 'og:audio:type';
public const string AUDIO_URL = 'og:audio:url';
public const string DESCRIPTION = 'og:description';
public const string DETERMINER = 'og:determiner';
public const string IMAGE = 'og:image';
public const string IMAGE_HEIGHT = 'og:image:height';
public const string IMAGE_SECURE_URL = 'og:image:secure_url';
public const string IMAGE_TYPE = 'og:image:type';
public const string IMAGE_URL = 'og:image:url';
public const string IMAGE_WIDTH = 'og:image:width';
public const string IMAGE_USER_GENERATED = 'og:image:user_generated';
public const string LOCALE = 'og:locale';
public const string LOCALE_ALTERNATE = 'og:locale:alternate';
public const string RICH_ATTACHMENT = 'og:rich_attachment';
public const string SEE_ALSO = 'og:see_also';
public const string SITE_NAME = 'og:site_name';
public const string TITLE = 'og:title';
public const string TYPE = 'og:type';
public const string UPDATED_TIME = 'og:updated_time';
public const string URL = 'og:url';
public const string VIDEO = 'og:video';
public const string VIDEO_HEIGHT = 'og:video:height';
public const string VIDEO_SECURE_URL = 'og:video:secure_url';
public const string VIDEO_TYPE = 'og:video:type';
public const string VIDEO_URL = 'og:video:url';
public const string VIDEO_WIDTH = 'og:video:width';
public function __construct(
public string $key,
public mixed $value,
) {
}
}
@@ -1,22 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Interface SourceCrawler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface SourceCrawler
{
public function fetch(CrawlingSettings $settings): void;
public function fetchOne(string $html, ?DateRange $dateRange = null): void;
public function supports(string $source): bool;
}
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Service;
namespace Basango\Aggregator\Domain\Service;
/**
* Class HashCalculator.
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Scoring;
namespace Basango\Aggregator\Domain\Service\Scoring;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
/**
* Interface CredibilityAnalyser.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Scoring;
namespace Basango\Aggregator\Domain\Service\Scoring;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
/**
* Interface SentimentAnalyser.
@@ -1,65 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler;
use Psr\Log\LoggerInterface;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Class HttpClientFactory.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class HttpClientFactory
{
public function __construct(
private string $projectDir,
private Filesystem $filesystem,
private HttpClientInterface $client,
private LoggerInterface $logger
) {
}
public function create(): HttpClientInterface
{
$proxy = $this->getProxy();
return $this->client->withOptions([
'headers' => [
'User-Agent' => UserAgents::random(),
],
'proxy' => $proxy !== null ? 'https://' . $proxy : null,
]);
}
private function getProxy(): ?string
{
$flag = boolval(getenv('USE_PROXY'));
if ($flag === false) {
return null;
}
try {
$filename = sprintf('%s/data/proxies.txt', $this->projectDir);
$content = $this->filesystem->readFile($filename);
/** @var list<string> $proxies */
$proxies = preg_split('/\r\n|\n|\r/', $content);
$proxies = array_filter($proxies, static fn ($proxy): bool => $proxy !== '' && $proxy !== '0');
$proxy = $proxies[array_rand($proxies)];
$this->logger->info('HttpClient is using proxy: ' . $proxy);
return $proxy;
} catch (\Throwable $e) {
$this->logger->error('Unable to read proxy file', [
'exception' => $e,
]);
return null;
}
}
}
@@ -1,128 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\OpenGraph;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Objects\Website;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphConsumer;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
use App\Aggregator\Infrastructure\Crawler\HttpClientFactory;
use App\Aggregator\Infrastructure\Crawler\UserAgents;
use Psr\Log\LoggerInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Class OpenGraphConsumer.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class DomCrawlerConsumer implements OpenGraphConsumer
{
private HttpClientInterface $client;
public function __construct(
HttpClientFactory $clientFactory,
private LoggerInterface $logger,
private bool $useFallbackMode = true,
private bool $debug = false,
) {
$this->client = $clientFactory->create();
}
public function consumeUrl(string $url): ?OpenGraphObject
{
try {
$response = $this->client->request('GET', $url, [
'headers' => [
'User-Agent' => UserAgents::OPEN_GRAPH->value,
],
])->getContent();
return $this->consumeHtml($response, $url);
} catch (\Throwable $e) {
$this->logger->error(
'Unable to consume OpenGraph URL',
[
'url' => $url,
'exception' => $e,
]
);
return null;
}
}
public function consumeHtml(string $html, string $fallbackUrl): ?OpenGraphObject
{
try {
$object = $this->consume($html);
if ($this->useFallbackMode && $object->url === null) {
$object->url = $fallbackUrl;
}
return $object;
} catch (\Throwable $e) {
$this->logger->error(
'Unable to consume OpenGraph HTML',
[
'html' => $html,
'exception' => $e,
]
);
return null;
}
}
private function consume(string $content): OpenGraphObject
{
$crawler = new Crawler($content);
$object = new Website(type: 'website');
$properties = [];
foreach (['name', 'property'] as $t) {
$props = [];
/** @var \DOMElement $tag */
foreach ($crawler->filter(sprintf("meta[%s^='og:']", $t)) as $tag) {
$name = strtolower(trim($tag->getAttribute($t)));
$value = trim($tag->getAttribute('content'));
$props[] = new OpenGraphProperty($name, $value);
}
$properties = array_merge($properties, $props);
}
$object->assignProperties($properties, $this->debug);
// Fallback for url
if ($this->useFallbackMode && $object->url === null) {
$urlElement = $crawler->filter("link[rel='canonical']")->first();
if ($urlElement->count() > 0) {
$object->url = trim($urlElement->attr('href') ?? '');
}
}
// Fallback for title
if ($this->useFallbackMode && $object->title === null) {
$titleElement = $crawler->filter('title')->first();
if ($titleElement->count() > 0) {
$object->title = trim($titleElement->text());
}
}
// Fallback for description
if ($this->useFallbackMode && $object->description === null) {
$descriptionElement = $crawler->filter("meta[property='description']")->first();
if ($descriptionElement->count() > 0) {
$object->description = trim($descriptionElement->attr('content') ?? '');
}
}
return $object;
}
}
@@ -1,154 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\Source;
use App\Aggregator\Application\UseCase\Command\CreateArticle;
use App\Aggregator\Domain\Event\SourceCrawled;
use App\Aggregator\Domain\Exception\ArticleOutOfRange;
use App\Aggregator\Domain\Model\ValueObject\Crawling\PageRange;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Service\Crawling\DateParser;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphConsumer;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use App\Aggregator\Domain\Service\Crawling\SourceCrawler;
use App\Aggregator\Infrastructure\Crawler\HttpClientFactory;
use App\SharedKernel\Application\Messaging\CommandBus;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Psr\EventDispatcher\EventDispatcherInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\DependencyInjection\Attribute\AutoconfigureTag;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Stopwatch\Stopwatch;
use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Class SourceFetcher.
*
* @author bernard-ng <bernard@devscast.tech>
*/
#[AutoconfigureTag('app.data_source')]
abstract class Source implements SourceCrawler
{
protected const string URL = 'url';
protected const string ID = 'id';
private const string WATCH_EVENT_NAME = 'crawling';
protected Stopwatch $stopwatch;
protected HttpClientInterface $client;
public function __construct(
HttpClientFactory $clientFactory,
protected EventDispatcherInterface $dispatcher,
protected LoggerInterface $logger,
protected DateParser $dateParser,
protected CommandBus $commandBus,
protected OpenGraphConsumer $openGraphConsumer
) {
$this->stopwatch = new Stopwatch();
$this->client = $clientFactory->create();
}
#[\Override]
public function supports(string $source): bool
{
return $source === $this->getId();
}
abstract public function getPagination(?string $category = null): PageRange;
protected function getId(): string
{
return static::ID;
}
protected function getUrl(): string
{
return static::URL;
}
/**
* @throws \Throwable
*/
protected function crawle(string $url, ?int $page = null): Crawler
{
if ($page !== null) {
$this->logger->notice('> Page ' . $page);
}
$response = $this->client->request('GET', $url)->getContent();
return new Crawler($response);
}
protected function save(
string $title,
string $link,
string $categories,
string $body,
string $timestamp,
?OpenGraphObject $metadata = null
): void {
try {
$this->commandBus->handle(
new CreateArticle(
title: $title,
link: Link::from($link, $this->getId()),
categories: $categories,
body: $body,
source: $this->getId(),
timestamp: (int) $timestamp,
metadata: $metadata
)
);
$this->logger->notice(sprintf('> %s ✅', $title));
} catch (\Throwable $e) {
$this->logger->error(sprintf('> %s [Failed] ❌', $e->getMessage()));
}
}
protected function initialize(): void
{
$this->stopwatch->start(self::WATCH_EVENT_NAME);
$this->logger->notice('Initialized');
}
protected function completed(bool $notify = false): void
{
$event = $this->stopwatch->stop(self::WATCH_EVENT_NAME);
$this->dispatcher->dispatch(new SourceCrawled((string) $event, $this->getId(), $notify));
$this->logger->notice('Done');
}
protected function skip(DateRange $dateRange, string $timestamp, string $title, string $date): void
{
if ($dateRange->outRange((int) $timestamp)) {
throw ArticleOutOfRange::with($timestamp, $dateRange);
}
$this->logger->notice(sprintf('> %s [Skipped %s]', $title, $date));
}
/**
* @throws \Throwable
*/
protected function getLastPage(?string $url = null): int
{
$result = [];
/** @var string $node */
$node = $this->crawle($url ?? $this->getUrl())
->filter('ul.pagination > li a')
->last()
->attr('href');
/** @var string $query */
$query = parse_url($node, PHP_URL_QUERY);
parse_str($query, $result);
return (int) $result['page'];
}
}
@@ -1,63 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\Source;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Service\Crawling\SourceCrawler as SourceCrawlerInterface;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\DependencyInjection\Attribute\AutowireIterator;
/**
* Class SourceFetcher.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceCrawler implements SourceCrawlerInterface
{
/**
* @var iterable<SourceCrawlerInterface>
*/
private iterable $sources;
public function __construct(
#[AutowireIterator('app.data_source')] \Traversable $sources
) {
$this->sources = iterator_to_array($sources);
}
#[\Override]
public function fetch(CrawlingSettings $settings): void
{
foreach ($this->sources as $source) {
if ($source->supports($settings->id)) {
$source->fetch($settings);
}
}
}
#[\Override]
public function supports(string $source): bool
{
return true;
}
#[\Override]
public function fetchOne(string $html, ?DateRange $dateRange = null): void
{
throw new \RuntimeException('Not implemented');
}
public function get(string $id): Source
{
/** @var Source $source */
foreach ($this->sources as $source) {
if ($source->supports($id)) {
return $source;
}
}
throw new \RuntimeException('Source not found');
}
}
@@ -1,146 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\Source;
use App\Aggregator\Domain\Exception\ArticleOutOfRange;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Model\ValueObject\Crawling\PageRange;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class WordPressJson.
*
* Some WordPress websites expose their data in JSON format,
* this class will help to fetch data from those websites.
*
* @see https://developer.wordpress.org/rest-api/
*
* @author bernard-ng <bernard@devscast.tech>
*/
class WordPressJson extends Source
{
public const string POST_QUERY = '_fields=date,slug,link,title.rendered,content.rendered,categories&orderby=date&order=desc';
public const string CATEGORY_QUERY = '_fields=id,slug,count&orderby=count&order=desc&per_page=100';
public const string TOTAL_PAGES_HEADER = 'x-wp-totalpages';
public const string TOTAL_POSTS_HEADER = 'x-wp-total';
private array $categoryMap = [];
#[\Override]
public function getPagination(?string $category = null): PageRange
{
$response = $this->client->request('GET', sprintf('%s/wp-json/wp/v2/posts?_fields=id&per_page=100', $this->getUrl()));
$headers = $response->getHeaders();
$pages = (int) $headers[self::TOTAL_PAGES_HEADER][0];
$posts = (int) $headers[self::TOTAL_POSTS_HEADER][0];
$this->logger->notice(sprintf('WordPressJson %d posts, %d pages', $posts, $pages));
return PageRange::from(sprintf('1:%d', $pages));
}
#[\Override]
public function fetch(CrawlingSettings $settings): void
{
$this->initialize();
$page = $settings->pageRange ?? $this->getPagination();
for ($i = $page->start; $i <= $page->end; $i++) {
try {
$response = $this->client->request(
method: 'GET',
url: sprintf('%s/wp-json/wp/v2/posts?%s&page=%d&per_page=100', $this->getUrl(), self::POST_QUERY, $i)
);
/** @var array $articles */
$articles = json_decode($this->removeMisconfigurationError($response->getContent()), true);
} catch (\Throwable $e) {
$this->logger->error(sprintf('> page %d => %s [Failed] ❌', $i, $e->getMessage()));
continue;
}
try {
foreach ($articles as $article) {
$this->fetchOne((string) json_encode($article), $settings->dateRange);
}
} catch (ArticleOutOfRange) {
$this->logger->notice('No more articles to fetch in this range.');
break;
}
}
$this->completed($settings->notify);
}
#[\Override]
public function fetchOne(string $html, ?DateRange $dateRange = null): void
{
try {
/**
* @var array{
* link:string,
* title:array{rendered:string},
* content:array{rendered:string},
* date:string,
* categories:int[]
* } $data
*/
$data = json_decode($html, true);
$link = str_replace($this->getUrl(), '', $data['link']);
$title = strip_tags($data['title']['rendered']);
$body = strip_tags($data['content']['rendered']);
$timestamp = $this->dateParser->createTimeStamp($data['date'], format: 'c');
$categories = $this->mapCategories($data['categories']);
if (! $dateRange instanceof DateRange || $dateRange->inRange((int) $timestamp)) {
$metadata = $this->openGraphConsumer->consumeUrl($data['link']);
$this->save($title, $link, $categories, $body, $timestamp, $metadata);
} else {
$this->skip($dateRange, $timestamp, $title, $data['date']);
}
} catch (ArticleOutOfRange $e) {
throw $e;
} catch (\Throwable $e) {
$this->logger->error(sprintf('> %s [Failed] ❌', $e->getMessage()));
return;
}
}
/**
* edge case for some politico.cd website
* this invalidates the json, so we have to remove it
*/
private function removeMisconfigurationError(string $content): string
{
$error = '<br />
<b>Notice</b>: ob_end_flush(): Failed to send buffer of zlib output compression (0) in <b>/home/politico/public_html/wp-includes/functions.php</b> on line <b>5427</b><br />';
return str_replace($error, '', $content);
}
private function fetchCategories(): void
{
$response = $this->client->request('GET', sprintf('%s/wp-json/wp/v2/categories?%s', $this->getUrl(), self::CATEGORY_QUERY));
/** @var array{id: int, slug: string}[] $categories */
$categories = json_decode($response->getContent(), true);
foreach ($categories as $category) {
$this->categoryMap[$category['id']] = $category['slug'];
}
}
private function mapCategories(array $categories): string
{
if ($this->categoryMap === []) {
$this->fetchCategories();
}
return strtolower(implode(',', array_map(fn ($category) => $this->categoryMap[$category], $categories)));
}
}
@@ -1,31 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler;
/**
* Class UserAgents.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum UserAgents: string
{
case OPEN_GRAPH = 'facebookexternalhit/1.1';
case IPHONE = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5';
case LINUX = 'Mozilla/5.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7';
case ANDROID = 'Mozilla/5.0 (Linux; U; Android 5.0; SM-P815 Build/LRX22G) AppleWebKit/600.4 (KHTML, like Gecko) Chrome/48.0.1562.260 Mobile Safari/600.0';
case CHROME_WINDOWS = 'Mozilla/5.0 (Windows; U; Windows NT 6.3;) AppleWebKit/533.34 (KHTML, like Gecko) Chrome/51.0.1883.215 Safari/533';
case EXPLORER = 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; x64; en-US Trident/4.0)';
case MAC_FIREFOX = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_10_3) Gecko/20100101 Firefox/63.4';
case CHROME_LINUX = 'Mozilla/5.0 (Linux; Linux x86_64; en-US) AppleWebKit/603.50 (KHTML, like Gecko) Chrome/55.0.2226.116 Safari/601';
case MAC_FIREFOX_OLD = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 7_8_3; en-US) Gecko/20100101 Firefox/68.9';
case MOBILE_IPHONE = 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_9_8; like Mac OS X) AppleWebKit/603.34 (KHTML, like Gecko) Chrome/47.0.1126.107 Mobile Safari/602.7';
case MOBILE_IPOD = 'Mozilla/5.0 (iPod; CPU iPod OS 8_2_0; like Mac OS X) AppleWebKit/601.40 (KHTML, like Gecko) Chrome/47.0.1590.178 Mobile Safari/535.2';
public static function random(): string
{
$userAgents = array_map(fn (self $userAgent) => $userAgent->value, self::cases());
return $userAgents[array_rand($userAgents)];
}
}
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\ReadModel\ArticleForExport;
use App\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use App\Aggregator\Application\UseCase\QueryHandler\GetArticlesForExportHandler;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\Aggregator\Application\ReadModel\ArticleForExport;
use Basango\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use Basango\Aggregator\Application\UseCase\QueryHandler\GetArticlesForExportHandler;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
use Doctrine\DBAL\Connection;
/**
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use App\Aggregator\Application\UseCase\QueryHandler\GetEarliestPublicationDateHandler;
use App\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Basango\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use Basango\Aggregator\Application\UseCase\QueryHandler\GetEarliestPublicationDateHandler;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Psr\Log\LoggerInterface;
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use App\Aggregator\Application\UseCase\QueryHandler\GetLatestPublicationDateHandler;
use App\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Basango\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use Basango\Aggregator\Application\UseCase\QueryHandler\GetLatestPublicationDateHandler;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Psr\Log\LoggerInterface;
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\ReadModel\SourceStatisticsList;
use App\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use App\Aggregator\Application\UseCase\QueryHandler\GetSourceStatisticsListHandler;
use App\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Basango\Aggregator\Application\ReadModel\SourceStatisticsList;
use Basango\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use Basango\Aggregator\Application\UseCase\QueryHandler\GetSourceStatisticsListHandler;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
/**
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Symfony\Bridge\Doctrine\Types\AbstractUidType;
/**
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
use App\Aggregator\Domain\Model\ValueObject\Crawling\OpenGraph;
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
use Doctrine\DBAL\Platforms\AbstractPlatform;
use Doctrine\DBAL\Types\ConversionException;
use Doctrine\DBAL\Types\Type;
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
use App\Aggregator\Domain\Model\Identity\SourceId;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Symfony\Bridge\Doctrine\Types\AbstractUidType;
/**
@@ -2,13 +2,13 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\ORM;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\ORM;
use App\Aggregator\Domain\Exception\ArticleNotFound;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\Repository\ArticleRepository;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\Aggregator\Domain\Exception\ArticleNotFound;
use Basango\Aggregator\Domain\Model\Entity\Article;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\Aggregator\Domain\Model\Repository\ArticleRepository;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\Persistence\ManagerRegistry;
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\ORM;
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\ORM;
use App\Aggregator\Domain\Exception\SourceNotFound;
use App\Aggregator\Domain\Model\Entity\Source;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\Aggregator\Domain\Model\Repository\SourceRepository;
use Basango\Aggregator\Domain\Exception\SourceNotFound;
use Basango\Aggregator\Domain\Model\Entity\Source;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\Aggregator\Domain\Model\Repository\SourceRepository;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\Persistence\ManagerRegistry;
@@ -1,117 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Domain\Event\SourceCrawled;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphConsumer;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use App\SharedKernel\Domain\EventDispatcher\EventDispatcher;
use Doctrine\DBAL\Exception;
use Doctrine\ORM\EntityManagerInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Stopwatch\Stopwatch;
#[AsCommand(
name: 'app:open-graph',
description: 'Update OpenGraph data for articles',
)]
class ConsumeOpenGraphConsole extends Command
{
private const string WATCH_EVENT_NAME = 'open-graph-consume';
private SymfonyStyle $io;
public function __construct(
private readonly OpenGraphConsumer $openGraphConsumer,
private readonly EntityManagerInterface $entityManager,
private readonly LoggerInterface $logger,
private readonly EventDispatcher $eventDispatcher,
private readonly Stopwatch $stopwatch = new Stopwatch(false)
) {
parent::__construct();
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'The source to crawl');
$this->addOption('batch', null, InputOption::VALUE_OPTIONAL, 'Batch size', 50);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
$this->setProcessTitle('[DRC News] OpenGraph Consumer');
if ($input->getOption('no-interaction') === false && ! $this->io->confirm('This is a long process, do you want to continue ?', false)) {
$this->io->warning('Process aborted');
return Command::SUCCESS;
}
$index = 0;
$batchSize = $input->getOption('batch') ?? 50;
$source = $input->getArgument('source');
try {
$this->entityManager->getConnection()->executeQuery('SET SESSION interactive_timeout = 86400;');
$this->entityManager->getConnection()->executeQuery('SET SESSION wait_timeout = 86400;');
} catch (Exception $e) {
$this->logger->critical('Unable to set session timeout', [
'exception' => $e,
]);
return Command::FAILURE;
}
$query = $this->entityManager
->createQuery(<<<'DQL'
SELECT a
FROM App\Aggregator\Domain\Model\Entity\Article a
LEFT JOIN App\Aggregator\Domain\Model\Entity\Source s
WHERE s.name = :source AND a.metadata IS NULL
ORDER BY a.publishedAt DESC
DQL)
->setParameter('source', $source);
$this->stopwatch->start(self::WATCH_EVENT_NAME);
/** @var Article $article */
foreach ($query->toIterable() as $article) {
$object = $this->openGraphConsumer->consumeUrl((string) $article->link);
if ($object instanceof OpenGraphObject) {
$article->defineOpenGraph($object);
$this->logger->notice(sprintf('> %s ✅', $article->title));
} else {
$this->logger->notice(sprintf('> %s ❌', $article->title));
}
++$index;
if ($index % $batchSize === 0) {
$this->entityManager->flush();
$this->entityManager->clear();
}
}
$this->entityManager->flush();
$event = $this->stopwatch->stop(self::WATCH_EVENT_NAME);
$this->eventDispatcher->dispatch([new SourceCrawled((string) $event, 'open-graph')]);
$this->logger->notice('OpenGraph data fetched successfully');
return Command::SUCCESS;
}
}
@@ -1,121 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Model\ValueObject\Crawling\PageRange;
use App\Aggregator\Infrastructure\Crawler\Source\SourceCrawler;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Process\PhpSubprocess;
#[AsCommand(
name: 'app:crawl',
description: 'crawle a news website',
)]
class CrawlConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly SourceCrawler $sourceCrawler
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'the website source to crawle');
$this->addOption('date', null, InputOption::VALUE_OPTIONAL, 'Date interval to crawle');
$this->addOption('page', null, InputOption::VALUE_OPTIONAL, 'PageRange interval to crawle');
$this->addOption('category', null, InputOption::VALUE_OPTIONAL, 'the category to crawle');
$this->addOption('parallel', null, InputOption::VALUE_OPTIONAL, 'the number of parallel requests', default: 1);
$this->addOption('notify', null, InputOption::VALUE_NONE, 'enable notifications');
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var string $source */
$source = $input->getArgument('source');
/** @var string|null $page */
$page = $input->getOption('page');
/** @var string|null $date */
$date = $input->getOption('date');
/** @var string|null $category */
$category = $input->getOption('category');
/** @var string $parallel */
$parallel = $input->getOption('parallel');
$parallel = intval($parallel);
if ($parallel > 1) {
return $this->parallel($parallel, $source, $category);
}
$this->sourceCrawler->fetch(
settings: new CrawlingSettings(
id: $source,
pageRange: $page !== null ? PageRange::from($page) : null,
dateRange: $date !== null ? DateRange::from($date) : null,
category: $category,
notify: $input->getOption('notify') !== null
)
);
$this->io->success('website crawled successfully');
return Command::SUCCESS;
}
private function parallel(int $workers, string $source, ?string $category): int
{
$fetcher = $this->sourceCrawler->get($source);
$range = $fetcher->getPagination($category);
$workPerWorker = ceil(($range->end - $range->start + 1) / $workers);
$this->io->title(sprintf('Crawling %d pages with %d workers, %d pages per worker', $range->end - $range->start + 1, $workers, $workPerWorker));
$processes = [];
for ($i = 0; $i < $workers; $i++) {
$start = $range->start + ($i * $workPerWorker);
$end = min($range->start + (($i + 1) * $workPerWorker) - 1, $range->end);
$process = new PhpSubprocess(['bin/console', 'app:crawl', $source, sprintf('--page=%d:%d', $start, $end), '-v']);
$process->start();
$processes[] = $process;
if ($start > $range->end) {
break;
}
}
foreach ($processes as $process) {
while ($process->isRunning()) {
// waiting for process to finish
}
$this->io->writeln($process->getOutput());
}
$this->io->success('Website crawled successfully');
return Command::SUCCESS;
}
}
@@ -2,15 +2,15 @@
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
namespace Basango\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Command\CreateSource;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use App\SharedKernel\Application\Messaging\CommandBus;
use App\SharedKernel\Presentation\Console\AskArgumentFeature;
use Basango\Aggregator\Application\UseCase\Command\CreateSource;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use Basango\SharedKernel\Application\Messaging\CommandBus;
use Basango\SharedKernel\Presentation\Console\AskArgumentFeature;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
namespace Basango\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Command\DeleteArticles;
use App\SharedKernel\Application\Messaging\CommandBus;
use Basango\Aggregator\Application\UseCase\Command\DeleteArticles;
use Basango\SharedKernel\Application\Messaging\CommandBus;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
namespace Basango\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Command\ExportArticles;
use App\SharedKernel\Application\Messaging\CommandBus;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Basango\Aggregator\Application\UseCase\Command\ExportArticles;
use Basango\SharedKernel\Application\Messaging\CommandBus;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
namespace Basango\Aggregator\Presentation\Console;
use App\Aggregator\Application\ReadModel\SourceStatistics;
use App\Aggregator\Application\ReadModel\SourceStatisticsList;
use App\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use App\SharedKernel\Application\Messaging\QueryBus;
use Basango\Aggregator\Application\ReadModel\SourceStatistics;
use Basango\Aggregator\Application\ReadModel\SourceStatisticsList;
use Basango\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use Basango\SharedKernel\Application\Messaging\QueryBus;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
@@ -1,90 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use App\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Model\ValueObject\Crawling\UpdateDirection;
use App\Aggregator\Infrastructure\Crawler\Source\SourceCrawler;
use App\SharedKernel\Application\Messaging\QueryBus;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:update',
description: 'crawl a news website based on last update',
)]
class UpdateConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly SourceCrawler $sourceCrawler,
private readonly QueryBus $queryBus
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'the website source to crawle');
$this->addOption('category', null, InputOption::VALUE_OPTIONAL, 'the category to crawle');
$this->addOption('direction', null, InputOption::VALUE_OPTIONAL, 'the direction to crawle', 'forward', ['forward', 'backward']);
$this->addOption('days', null, InputOption::VALUE_OPTIONAL, 'the number of days to crawle');
$this->addOption('notify', null, InputOption::VALUE_OPTIONAL, 'enable notifications', default: false);
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var int|null $days */
$days = $input->getOption('days');
/** @var string $source */
$source = $input->getArgument('source');
/** @var string|null $category */
$category = $input->getOption('category');
/** @var string $direction */
$direction = $input->getOption('direction');
$direction = UpdateDirection::from($direction);
/** @var \DateTimeImmutable $date */
$date = $this->queryBus->handle(match ($direction) {
UpdateDirection::FORWARD => new GetLatestPublicationDate($source, $category),
UpdateDirection::BACKWARD => new GetEarliestPublicationDate($source, $category),
});
$dateRange = $direction === UpdateDirection::FORWARD ?
DateRange::forward($date) :
DateRange::backward($date, $days);
$this->io->title(sprintf('[%s] Updating with range %s', $direction->value, $dateRange->format()));
$this->sourceCrawler->fetch(new CrawlingSettings(
$source,
dateRange: $dateRange,
category: $category,
notify: $input->getOption('notify') !== null
));
$this->io->success('website crawled successfully');
return Command::SUCCESS;
}
}
@@ -1,66 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\SharedKernel\Domain\Assert;
use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Contracts\HttpClient\HttpClientInterface;
#[AsCommand(
name: 'app:update-proxies',
description: 'get an updated list of proxies',
)]
final class UpdateProxiesConsole extends Command
{
private const string UPDATE_URL = 'https://github.com/zloi-user/hideip.me/raw/refs/heads/master/https.txt';
private SymfonyStyle $io;
public function __construct(
private readonly string $projectDir,
private readonly HttpClientInterface $client,
private readonly Filesystem $filesystem,
private readonly LoggerInterface $logger
) {
parent::__construct();
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
try {
$response = $this->client->request('GET', self::UPDATE_URL);
$content = $response->getContent();
$content = preg_replace('/^([0-9\.]+:[0-9]+):.*$/m', '$1', $content);
Assert::string($content);
$this->filesystem->dumpFile(
filename: $this->projectDir . '/data/proxies.txt',
content: $content
);
} catch (\Throwable $e) {
$this->logger->critical('Failed to update proxies', [
'exception' => $e,
]);
return Command::FAILURE;
}
$this->io->success('Proxies updated successfully.');
return Command::SUCCESS;
}
}
@@ -0,0 +1,61 @@
<?php
declare(strict_types=1);
namespace Basango\Aggregator\Presentation\Web\Controller;
use Basango\Aggregator\Application\UseCase\Command\CreateArticle;
use Basango\Aggregator\Domain\Model\ValueObject\Link;
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
use Basango\Aggregator\Presentation\WriteModel\AddArticleModel;
use Basango\SharedKernel\Presentation\Web\Controller\AbstractController;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Component\HttpFoundation\JsonResponse;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\HttpKernel\Attribute\MapQueryParameter;
use Symfony\Component\HttpKernel\Attribute\MapRequestPayload;
use Symfony\Component\Routing\Attribute\Route;
use Symfony\Component\Routing\Requirement\Requirement;
/**
* Class AddArticleController.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class AddArticleController extends AbstractController
{
public function __construct(
#[Autowire(env: "BASANGO_CRAWLER_TOKEN")] private string $token
) {
}
#[Route(
path: '/api/aggregator/articles',
name: 'aggregator_add_article',
requirements: [
'token' => Requirement::ASCII_SLUG
],
methods: ['POST']
)]
public function __invoke(
#[MapQueryParameter] string $token,
#[MapRequestPayload] AddArticleModel $model
): JsonResponse {
if ($token !== $this->token) {
throw $this->createAccessDeniedException();
}
$this->handleCommand(new CreateArticle(
$model->title,
Link::from($model->link),
join(', ', $model->categories),
$model->body,
$model->source,
$model->timestamp,
$model->metadata,
));
return new JsonResponse(status: Response::HTTP_CREATED);
}
}
@@ -0,0 +1,35 @@
<?php
declare(strict_types=1);
namespace Basango\Aggregator\Presentation\WriteModel;
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
use Symfony\Component\Validator\Constraints as Assert;
/**
* Class AddArticleModel.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class AddArticleModel
{
#[Assert\NotBlank]
public string $title;
#[Assert\NotBlank]
public string $link;
#[Assert\NotBlank]
public string $body;
#[Assert\NotBlank]
public string $source;
#[Assert\NotBlank]
public int $timestamp;
public array $categories = [];
public ?OpenGraph $metadata = null;
}
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\Cache;
namespace Basango\FeedManagement\Application\Cache;
/**
* Enum SourceCacheAttributes.
@@ -2,18 +2,18 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\ValueObject\Crawling\OpenGraph;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Model\ValueObject\ReadingTime;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\Aggregator\Domain\Model\ValueObject\Link;
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
use Basango\Aggregator\Domain\Model\ValueObject\ReadingTime;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class ArticleDetails.
@@ -2,12 +2,12 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Model\ValueObject\ReadingTime;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\Aggregator\Domain\Model\ValueObject\Link;
use Basango\Aggregator\Domain\Model\ValueObject\ReadingTime;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class ArticleOverview.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
use Basango\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class ArticleOverviewList.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\FeedManagement\Domain\Model\Identity\BookmarkId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\FeedManagement\Domain\Model\Identity\BookmarkId;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class Bookmark.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
use Basango\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class BookmarkList.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
/**
* Class CategoryShare.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Assert;
/**
* Class CategoryShares.
@@ -2,11 +2,11 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use App\FeedManagement\Domain\Model\Identity\CommentId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use Basango\FeedManagement\Domain\Model\Identity\CommentId;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class Comment.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
use Basango\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class CommentList.
@@ -2,7 +2,7 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
/**
* Class DallyEntry.
@@ -2,9 +2,9 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Assert;
/**
* Class PublicationGraph.
@@ -2,14 +2,14 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceDetails.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceOverview.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
use Basango\SharedKernel\Domain\Assert;
use Basango\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class SourceOverviewList.
@@ -2,10 +2,10 @@
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
namespace Basango\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
use Basango\Aggregator\Domain\Model\Identity\SourceId;
use Basango\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceReference.

Some files were not shown because too many files have changed in this diff Show More