Initial commit

This commit is contained in:
2025-10-05 13:55:28 +02:00
commit 68d521677a
767 changed files with 46947 additions and 0 deletions
@@ -0,0 +1,38 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\EventListener;
use App\Aggregator\Application\Mailing\SourceCrawledEmail;
use App\Aggregator\Domain\Event\SourceCrawled;
use App\SharedKernel\Application\Mailing\Mailer;
use App\SharedKernel\Domain\EventListener\EventListener;
use App\SharedKernel\Domain\Model\ValueObject\EmailAddress;
/**
* Class SourceFetchedListener.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceCrawledListener implements EventListener
{
public function __construct(
private Mailer $mailer,
private string $crawlingNotificationEmail
) {
}
public function __invoke(SourceCrawled $event): void
{
if ($event->notify) {
$email = new SourceCrawledEmail(
EmailAddress::from($this->crawlingNotificationEmail),
$event->event,
$event->source
);
$this->mailer->send($email);
}
}
}
@@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\Mailing;
use App\SharedKernel\Application\Mailing\EmailDefinition;
use App\SharedKernel\Domain\Model\ValueObject\EmailAddress;
/**
* Class SourceFetched.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceCrawledEmail implements EmailDefinition
{
public function __construct(
private EmailAddress $recipient,
private string $event,
private string $source,
) {
}
#[\Override]
public function recipient(): EmailAddress
{
return $this->recipient;
}
#[\Override]
public function subject(): string
{
return 'aggregator.emails.source_crawled.subject';
}
#[\Override]
public function subjectVariables(): array
{
return [];
}
#[\Override]
public function template(): string
{
return 'aggregator/source_crawled';
}
#[\Override]
public function templateVariables(): array
{
return [
'source' => $this->source,
'event' => $this->event,
];
}
#[\Override]
public function locale(): string
{
return 'fr';
}
#[\Override]
public function getDomain(): string
{
return 'aggregator';
}
}
@@ -0,0 +1,44 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class ExportedArticle.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ArticleForExport
{
public function __construct(
public ArticleId $id,
public string $title,
public string $link,
public string $categories,
public string $body,
public string $source,
public string $hash,
public \DateTimeImmutable $publishedAt,
public \DateTimeImmutable $crawledAt
) {
}
public static function create(array $item): self
{
return new self(
ArticleId::fromBinary($item['article_id']),
DataMapping::string($item, 'article_title'),
DataMapping::string($item, 'article_link'),
DataMapping::string($item, 'article_categories'),
DataMapping::string($item, 'article_body'),
DataMapping::string($item, 'article_source'),
DataMapping::string($item, 'article_hash'),
DataMapping::datetime($item, 'article_published_at'),
DataMapping::datetime($item, 'article_crawled_at')
);
}
}
@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceStatistics.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceStatistics
{
public function __construct(
public SourceId $id,
public string $name,
public int $articlesCount,
public int $metadataAvailable,
public ?\DateTimeImmutable $crawledAt = null
) {
}
public static function create(array $item): self
{
return new self(
SourceId::fromBinary($item['source_id']),
DataMapping::string($item, 'source_name'),
DataMapping::integer($item, 'articles_count'),
DataMapping::integer($item, 'article_metadata_available'),
DataMapping::nullableDatetime($item, 'source_crawled_at')
);
}
}
@@ -0,0 +1,28 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
/**
* Class SourceStatisticsList.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceStatisticsList
{
public function __construct(
public array $items,
) {
Assert::allIsInstanceOf($items, SourceStatistics::class);
}
public static function create(array $items): self
{
return new self(
array_map(fn (array $item): SourceStatistics => SourceStatistics::create($item), $items),
);
}
}
@@ -0,0 +1,27 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
/**
* Class Save.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CreateArticle
{
public function __construct(
public string $title,
public Link $link,
public string $categories,
public string $body,
public string $source,
public int $timestamp,
public ?OpenGraphObject $metadata = null
) {
}
}
@@ -0,0 +1,23 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
/**
* Class CreateSource.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CreateSource
{
public function __construct(
public string $name,
public Credibility $credibility,
public ?string $displayName = null,
public ?string $description = null
) {
}
}
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
/**
* Class DeleteArticles.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class DeleteArticles
{
public function __construct(
public string $source,
public ?string $category = null
) {
}
}
@@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Command;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class Export.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ExportArticles
{
public function __construct(
public ?string $source = null,
public ?DateRange $date = null
) {
}
}
@@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\UseCase\Command\CreateArticle;
use App\Aggregator\Domain\Exception\DuplicatedArticle;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Model\Repository\ArticleRepository;
use App\Aggregator\Domain\Model\Repository\SourceRepository;
use App\Aggregator\Domain\Service\HashCalculator;
use App\SharedKernel\Application\Messaging\CommandHandler;
/**
* Class CreateArticlesHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CreateArticleHandler implements CommandHandler
{
public function __construct(
private SourceRepository $sourceRepository,
private ArticleRepository $articleRepository,
private HashCalculator $hashCalculator
) {
}
public function __invoke(CreateArticle $command): void
{
$hash = $this->hashCalculator->calculate((string) $command->link);
$article = $this->articleRepository->getByHash($hash);
if ($article instanceof Article) {
throw DuplicatedArticle::withLink($command->link);
}
/** @var \DateTimeImmutable $publishedAt */
$publishedAt = \DateTimeImmutable::createFromFormat('U', (string) $command->timestamp);
$source = $this->sourceRepository->getByName($command->source);
$article = new Article(
title: $command->title,
link: $command->link,
body: $command->body,
hash: $hash,
categories: mb_strtolower($command->categories),
source: $source,
publishedAt: $publishedAt
);
$article
->defineOpenGraph($command->metadata)
->computeReadingTime();
$this->articleRepository->add($article);
}
}
@@ -0,0 +1,32 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\UseCase\Command\CreateSource;
use App\Aggregator\Domain\Model\Entity\Source;
use App\Aggregator\Domain\Model\Repository\SourceRepository;
use App\SharedKernel\Application\Messaging\CommandHandler;
/**
* Class AddSourceHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CreateSourceHandler implements CommandHandler
{
public function __construct(
private SourceRepository $sourceRepository
) {
}
public function __invoke(CreateSource $command): void
{
$source = Source::create($command->name, sprintf('https://%s', $command->name))
->defineCredibility($command->credibility)
->defineProfileInfos($command->displayName, $command->description);
$this->sourceRepository->add($source);
}
}
@@ -0,0 +1,27 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\UseCase\Command\DeleteArticles;
use App\Aggregator\Domain\Model\Repository\ArticleRepository;
use App\SharedKernel\Application\Messaging\CommandHandler;
/**
* Class DeleteArticlesHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class DeleteArticlesHandler implements CommandHandler
{
public function __construct(
private ArticleRepository $articleRepository,
) {
}
public function __invoke(DeleteArticles $command): int
{
return $this->articleRepository->clear($command->source, $command->category);
}
}
@@ -0,0 +1,42 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\CommandHandler;
use App\Aggregator\Application\ReadModel\ArticleForExport;
use App\Aggregator\Application\UseCase\Command\ExportArticles;
use App\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use App\SharedKernel\Application\Messaging\CommandHandler;
use App\SharedKernel\Application\Messaging\QueryBus;
use App\SharedKernel\Domain\DataTransfert\DataExporter;
use App\SharedKernel\Domain\DataTransfert\TransfertSetting;
/**
* Class GetArticlesForExportHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ExportArticlesHandler implements CommandHandler
{
public function __construct(
private QueryBus $queryBus,
private DataExporter $exporter,
private string $projectDir
) {
}
public function __invoke(ExportArticles $command): void
{
$filename = sprintf(
'%s/data/export-%s.csv',
$this->projectDir,
new \DateTimeImmutable('now')->format('U')
);
/** @var iterable<ArticleForExport> $articles */
$articles = $this->queryBus->handle(new GetArticlesForExport($command->source, $command->date));
$this->exporter->export($articles, new TransfertSetting($filename));
}
}
@@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class GetArticlesForExport.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetArticlesForExport
{
public function __construct(
public ?string $source = null,
public ?DateRange $date = null
) {
}
}
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
/**
* Class GetEarliestPublicationDate.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetEarliestPublicationDate
{
public function __construct(
public string $source,
public ?string $category = null
) {
}
}
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
/**
* Class GetLatestPublicationDate.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetLatestPublicationDate
{
public function __construct(
public string $source,
public ?string $category = null
) {
}
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\Query;
/**
* Class GetSourceStatisticsList.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetSourceStatisticsList
{
}
@@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\ReadModel\ArticleForExport;
use App\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use App\SharedKernel\Application\Messaging\QueryHandler;
/**
* Class GetArticlesForExportHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface GetArticlesForExportHandler extends QueryHandler
{
/**
* @return iterable<ArticleForExport>
*/
public function __invoke(GetArticlesForExport $query): iterable;
}
@@ -0,0 +1,18 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use App\SharedKernel\Application\Messaging\QueryHandler;
/**
* Interface GetEarliestPublicationDateHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface GetEarliestPublicationDateHandler extends QueryHandler
{
public function __invoke(GetEarliestPublicationDate $query): \DateTimeImmutable;
}
@@ -0,0 +1,18 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use App\SharedKernel\Application\Messaging\QueryHandler;
/**
* Interface GetLatestPublicationDateHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface GetLatestPublicationDateHandler extends QueryHandler
{
public function __invoke(GetLatestPublicationDate $query): \DateTimeImmutable;
}
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Application\UseCase\QueryHandler;
use App\Aggregator\Application\ReadModel\SourceStatisticsList;
use App\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use App\SharedKernel\Application\Messaging\QueryHandler;
/**
* Interface GetSourceStatisticsListHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface GetSourceStatisticsListHandler extends QueryHandler
{
public function __invoke(GetSourceStatisticsList $query): SourceStatisticsList;
}
@@ -0,0 +1,20 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Event;
/**
* Class SourceFetched.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceCrawled
{
public function __construct(
public string $event,
public string $source,
public bool $notify = false
) {
}
}
@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class ArticleNotFound.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class ArticleNotFound extends \DomainException implements UserFacingError
{
public static function withId(ArticleId $id): self
{
return new self(sprintf('article with id %s was not found', $id->toString()));
}
public function translationId(): string
{
return 'aggregator.exceptions.article_not_found';
}
public function translationParameters(): array
{
return [];
}
public function translationDomain(): string
{
return 'aggregator';
}
}
@@ -0,0 +1,40 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
use App\SharedKernel\Domain\Exception\UserFacingError;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class ArticleOutOfRange.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class ArticleOutOfRange extends \DomainException implements UserFacingError
{
public static function with(string $timestamp, DateRange $dateRange): self
{
$date = new \DateTimeImmutable('@' . $timestamp)
->format('Y-m-d H:i:s');
$range = $dateRange->format('Y-m-d H:i:s');
return new self(sprintf('article with timestamp %s is out of range %s', $date, $range));
}
public function translationId(): string
{
return 'aggregator.exceptions.article_out_of_range';
}
public function translationParameters(): array
{
return [];
}
public function translationDomain(): string
{
return 'aggregator';
}
}
@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class DuplicatedArticle.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class DuplicatedArticle extends \DomainException implements UserFacingError
{
public static function withLink(Link $link): self
{
return new self(sprintf('duplicate article with %s link', (string) $link));
}
public function translationId(): string
{
return 'aggregator.exceptions.duplicate_article';
}
public function translationParameters(): array
{
return [];
}
public function translationDomain(): string
{
return 'aggregator';
}
}
@@ -0,0 +1,35 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
use App\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class DuplicatedArticle.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class DuplicatedSource extends \DomainException implements UserFacingError
{
public static function withName(string $name): self
{
return new self(sprintf('duplicate source with %s name', $name));
}
public function translationId(): string
{
return 'aggregator.exceptions.duplicate_source';
}
public function translationParameters(): array
{
return [];
}
public function translationDomain(): string
{
return 'aggregator';
}
}
@@ -0,0 +1,41 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Exception;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\Exception\UserFacingError;
/**
* Class SourceNotFound.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class SourceNotFound extends \DomainException implements UserFacingError
{
public static function withName(string $name): self
{
return new self(sprintf('source with name %s was not found', $name));
}
public static function withId(SourceId $sourceId): self
{
return new self(sprintf('source with id %s was not found', $sourceId->toString()));
}
public function translationId(): string
{
return 'aggregator.exceptions.source_not_found';
}
public function translationParameters(): array
{
return [];
}
public function translationDomain(): string
{
return 'aggregator';
}
}
@@ -0,0 +1,95 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Entity;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\ValueObject\Crawling\OpenGraph;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Model\ValueObject\ReadingTime;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
/**
* Class Article.
*
* @author bernard-ng <bernard@devscast.tech>
*/
class Article
{
public readonly ArticleId $id;
public function __construct(
public readonly string $title,
public readonly Link $link,
public readonly string $body,
public readonly string $hash,
private(set) string $categories,
public readonly Source $source,
public readonly \DateTimeImmutable $publishedAt,
public readonly \DateTimeImmutable $crawledAt = new \DateTimeImmutable(),
private(set) Credibility $credibility = new Credibility(),
private(set) Sentiment $sentiment = Sentiment::NEUTRAL,
private(set) ?OpenGraph $metadata = null,
private(set) ?ReadingTime $readingTime = null,
private(set) ?\DateTimeImmutable $updatedAt = null,
public readonly ?string $image = null,
public readonly ?string $excerpt = null,
) {
$this->id = new ArticleId();
}
public function defineCredibility(Credibility $credibility): self
{
$this->credibility = $credibility;
$this->updatedAt = new \DateTimeImmutable();
return $this;
}
public function defineSentiment(Sentiment $sentiment): self
{
$this->sentiment = $sentiment;
$this->updatedAt = new \DateTimeImmutable();
return $this;
}
public function assignCategories(string $categories): self
{
$this->categories = $categories;
$this->updatedAt = new \DateTimeImmutable();
return $this;
}
public function computeReadingTime(): self
{
$this->readingTime = ReadingTime::fromContent($this->body);
$this->updatedAt = new \DateTimeImmutable();
return $this;
}
public function defineOpenGraph(?OpenGraphObject $object): self
{
if ($object instanceof OpenGraphObject) {
$image = $object->images[0] ?? null;
$video = $object->videos[0] ?? null;
$audio = $object->audios[0] ?? null;
$this->metadata = new OpenGraph(
title: $object->title,
description: $object->description,
image: $image->url ?? $image?->secureUrl,
video: $video->url ?? $video?->secureUrl,
audio: $audio->url ?? $audio?->secureUrl,
locale: $object->locale
);
}
return $this;
}
}
@@ -0,0 +1,27 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Entity;
use App\Aggregator\Domain\Model\Identity\CategoryId;
/**
* Class Category.
*
* @author bernard-ng <bernard@devscast.tech>
*/
class Category
{
public readonly CategoryId $id;
public function __construct(
public string $name,
public string $slug,
public array $children = [],
public ?string $description = null,
public ?string $image = null,
) {
$this->id = new CategoryId();
}
}
@@ -0,0 +1,52 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Entity;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
/**
* Class Source.
*
* @author bernard-ng <bernard@devscast.tech>
*/
class Source
{
public readonly SourceId $id;
public function __construct(
public readonly string $name,
public readonly string $url,
private(set) Credibility $credibility = new Credibility(),
private(set) ?string $displayName = null,
private(set) ?string $description = null,
private(set) ?\DateTimeImmutable $updatedAt = null
) {
$this->id = new SourceId();
}
public static function create(string $name, string $url): self
{
return new self($name, $url);
}
public function defineCredibility(Credibility $credibility): self
{
$this->credibility = $credibility;
$this->updatedAt = new \DateTimeImmutable();
return $this;
}
public function defineProfileInfos(?string $displayName, ?string $description): self
{
$this->displayName = $displayName;
$this->description = $description;
$this->updatedAt = new \DateTimeImmutable();
return $this;
}
}
@@ -0,0 +1,16 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Identity;
use Symfony\Component\Uid\UuidV7;
/**
* Class ArticleId.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class ArticleId extends UuidV7
{
}
@@ -0,0 +1,16 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Identity;
use Symfony\Component\Uid\UuidV7;
/**
* Class CategoryId.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class CategoryId extends UuidV7
{
}
@@ -0,0 +1,16 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Identity;
use Symfony\Component\Uid\UuidV7;
/**
* Class SourceId.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class SourceId extends UuidV7
{
}
@@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Repository;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Interface ArticleRepository.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface ArticleRepository
{
public function add(Article $article): void;
public function remove(Article $article): void;
public function getById(ArticleId $id): Article;
public function getByHash(string $hash): ?Article;
public function export(?string $source, ?DateRange $date): \Generator;
public function clear(string $source, ?string $category): int;
}
@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\Repository;
use App\Aggregator\Domain\Model\Entity\Source;
use App\Aggregator\Domain\Model\Identity\SourceId;
/**
* Interface SourceRepository.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface SourceRepository
{
public function add(Source $source): void;
public function remove(Source $source): void;
public function getByName(string $name): Source;
public function getById(SourceId $sourceId): Source;
}
@@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class FetchConfig.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CrawlingSettings
{
public function __construct(
public string $id,
public ?PageRange $pageRange = null,
public ?DateRange $dateRange = null,
public ?string $category = null,
public bool $notify = false
) {
Assert::notEmpty($this->id);
}
}
@@ -0,0 +1,58 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
/**
* Class OpenGraphMeta.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class OpenGraph implements \JsonSerializable
{
public function __construct(
public ?string $title = null,
public ?string $description = null,
public ?string $image = null,
public ?string $video = null,
public ?string $audio = null,
public ?string $locale = null,
) {
}
public static function tryFrom(?string $value): ?self
{
if ($value === null) {
return null;
}
try {
$object = \json_decode($value, true, 512, JSON_THROW_ON_ERROR);
return new self(
$object['title'] ?? null,
$object['description'] ?? null,
$object['image'] ?? null,
$object['video'] ?? null,
$object['audio'] ?? null,
$object['locale'] ?? null,
);
} catch (\Throwable) {
return null;
}
}
#[\Override]
public function jsonSerialize(): array
{
return [
'title' => $this->title,
'description' => $this->description,
'image' => $this->image,
'video' => $this->video,
'audio' => $this->audio,
'locale' => $this->locale,
];
}
}
@@ -0,0 +1,50 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
use App\SharedKernel\Domain\Assert;
/**
* Class PageRange.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class PageRange implements \Stringable
{
public int $start;
public int $end;
private function __construct(int $start, int $end)
{
Assert::greaterThanEq($start, 0);
Assert::greaterThanEq($end, 0);
Assert::greaterThan($end, $start);
$this->start = $start;
$this->end = $end;
}
#[\Override]
public function __toString(): string
{
return $this->start . ':' . $this->end;
}
public static function from(string $interval): self
{
[$start, $end] = explode(':', $interval);
$start = (int) $start;
$end = (int) $end;
return new self($start, $end);
}
public function inRange(int $page): bool
{
return $page >= $this->start && $page <= $this->end;
}
}
@@ -0,0 +1,16 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Crawling;
/**
* Class UpdateDirection.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum UpdateDirection: string
{
case FORWARD = 'forward';
case BACKWARD = 'backward';
}
@@ -0,0 +1,42 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject;
use App\SharedKernel\Domain\Assert;
/**
* Class Link.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class Link implements \Stringable, \JsonSerializable
{
public string $link;
private function __construct(string $url, ?string $source = null)
{
if (! str_starts_with($url, 'http')) {
Assert::notNull($source, 'You must provide a source if the URL is not absolute.');
$this->link = sprintf('https://%s/%s', $source, trim($url, '/'));
} else {
$this->link = $url;
}
}
public function __toString(): string
{
return $this->link;
}
public static function from(string $url, ?string $source = null): self
{
return new self($url, $source);
}
public function jsonSerialize(): string
{
return $this->link;
}
}
@@ -0,0 +1,55 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject;
/**
* Class ReadingTime.
*
* The average reading rate is actually 238, but 200 is a nice compromise and is easier to remember.
*
* Heres the formula:
* Get your total word count (including the headline and subhead).
* Divide total word count by 200. The number before the decimal is your minutes.
* Take the decimal points and multiply that number by .60. That will give you your seconds.
*
* Example:
* 783 words ÷ 200 = 3.915 (3 = 3 minutes)
* .915 × .60 = .549 (a little over 54 seconds, so Id bump it up to 60 seconds, or a full minute)
* reading time for this example article is 4 minutes
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ReadingTime implements \Stringable, \JsonSerializable
{
public const int WORDS_PER_MINUTE = 200;
public int $readingTime;
public function __construct(
string|int $value
) {
$this->readingTime = is_string($value) ? intval(str_word_count($value) / self::WORDS_PER_MINUTE) : $value;
}
public function __toString(): string
{
return (string) $this->readingTime;
}
public static function create(?int $value): self
{
return new self($value ?? 1);
}
public static function fromContent(string $content): self
{
return new self($content);
}
public function jsonSerialize(): string
{
return (string) $this->readingTime;
}
}
@@ -0,0 +1,18 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Class Bias.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum Bias: string
{
case NEUTRAL = 'neutral';
case SLIGHTLY = 'slightly';
case PARTISAN = 'partisan';
case EXTREME = 'extreme';
}
@@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Class Credibility.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class Credibility implements \JsonSerializable
{
public function __construct(
public Bias $bias = Bias::NEUTRAL,
public Reliability $reliability = Reliability::RELIABLE,
public Transparency $transparency = Transparency::MEDIUM
) {
}
public function jsonSerialize(): mixed
{
return [
'bias' => $this->bias->value,
'reliability' => $this->reliability->value,
'transparency' => $this->transparency->value,
];
}
}
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Class Reliability.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum Reliability: string
{
case TRUSTED = 'trusted';
case RELIABLE = 'reliable';
case AVERAGE = 'average';
case LOW_TRUST = 'low_trust';
case UNRELIABLE = 'unreliable';
}
@@ -0,0 +1,17 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Enum Sentiment.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum Sentiment: string
{
case NEGATIVE = 'negative';
case POSITIVE = 'positive';
case NEUTRAL = 'neutral';
}
@@ -0,0 +1,17 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Model\ValueObject\Scoring;
/**
* Enum Transparency.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum Transparency: string
{
case HIGH = 'high';
case MEDIUM = 'medium';
case LOW = 'low';
}
@@ -0,0 +1,72 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling;
/**
* Class DateParser.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class DateParser
{
public const array MONTHS = [
'janvier' => '01',
'février' => '02',
'mars' => '03',
'avril' => '04',
'mai' => '05',
'juin' => '06',
'juillet' => '07',
'août' => '08',
'septembre' => '09',
'octobre' => '10',
'novembre' => '11',
'décembre' => '12',
];
public const array DAYS = [
'dimanche' => '0',
'lundi' => '1',
'mardi' => '2',
'mercredi' => '3',
'jeudi' => '4',
'vendredi' => '5',
'samedi' => '6',
];
public const string DEFAULT_DATE_FORMAT = 'Y-m-d H:i';
/**
* @throws \Throwable
*/
public function createTimeStamp(
string $date,
?string $format = null,
?string $pattern = null,
?string $replacement = null
): string {
/** @var string $date */
$date = strtr(strtr(strtolower($date), self::DAYS), self::MONTHS);
if ($pattern !== null && $replacement !== null) {
/** @var string $date */
$date = preg_replace(
pattern: $pattern,
replacement: $replacement,
subject: $date
);
}
if ($format === 'c') {
$date = str_replace('t', ' ', $date);
$format = 'Y-m-d H:i:s';
}
$datetime = \DateTime::createFromFormat($format ?? self::DEFAULT_DATE_FORMAT, $date);
return $datetime !== false ?
$datetime->format('U') :
new \DateTime('midnight')->format('U');
}
}
@@ -0,0 +1,32 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphElement;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
/**
* Class Audio.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Audio extends OpenGraphElement
{
public function __construct(
public ?string $url = null,
public ?string $secureUrl = null,
public ?string $type = null
) {
}
public function supportedProperties(): array
{
return [
OpenGraphProperty::AUDIO_URL => $this->url,
OpenGraphProperty::AUDIO_SECURE_URL => $this->secureUrl,
OpenGraphProperty::AUDIO_TYPE => $this->type,
];
}
}
@@ -0,0 +1,38 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphElement;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
/**
* Class Image.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Image extends OpenGraphElement
{
public function __construct(
public ?string $url = null,
public ?string $secureUrl = null,
public ?string $type = null,
public ?int $width = null,
public ?int $height = null,
public ?bool $userGenerated = null
) {
}
public function supportedProperties(): array
{
return [
OpenGraphProperty::IMAGE => $this->url,
OpenGraphProperty::IMAGE_SECURE_URL => $this->secureUrl,
OpenGraphProperty::IMAGE_TYPE => $this->type,
OpenGraphProperty::IMAGE_WIDTH => $this->width,
OpenGraphProperty::IMAGE_HEIGHT => $this->height,
OpenGraphProperty::IMAGE_USER_GENERATED => $this->userGenerated,
];
}
}
@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphElement;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
/**
* Class Video.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Video extends OpenGraphElement
{
public function __construct(
public ?string $url = null,
public ?string $secureUrl = null,
public ?string $type = null,
public ?int $width = null,
public ?int $height = null
) {
}
public function supportedProperties(): array
{
return [
OpenGraphProperty::VIDEO_URL => $this->url,
OpenGraphProperty::VIDEO_SECURE_URL => $this->secureUrl,
OpenGraphProperty::VIDEO_TYPE => $this->type,
OpenGraphProperty::VIDEO_WIDTH => $this->width,
OpenGraphProperty::VIDEO_HEIGHT => $this->height,
];
}
}
@@ -0,0 +1,16 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph\Objects;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
/**
* Class Website.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class Website extends OpenGraphObject
{
}
@@ -0,0 +1,17 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
/**
* Interface OpenGraphConsumer.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface OpenGraphConsumer
{
public function consumeUrl(string $url): ?OpenGraphObject;
public function consumeHtml(string $html, string $fallbackUrl): ?OpenGraphObject;
}
@@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
/**
* Class GraphElement.
*
* @author bernard-ng <bernard@devscast.tech>
*/
abstract class OpenGraphElement
{
abstract public function supportedProperties(): array;
public function getProperties(): array
{
return array_filter(
array_map(
fn (string $key, mixed $value): ?OpenGraphProperty => $value !== null ? new OpenGraphProperty($key, $value) : null,
array_keys($this->supportedProperties()),
array_values($this->supportedProperties())
),
);
}
}
@@ -0,0 +1,283 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements\Audio;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements\Image;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Elements\Video;
/**
* Class GraphObject.
*
* @author bernard-ng <bernard@devscast.tech>
*/
abstract class OpenGraphObject
{
public function __construct(
public array $audios = [],
public ?string $description = null,
public ?string $determiner = null,
public array $images = [],
public ?string $locale = null,
public array $localeAlternate = [],
public ?bool $richAttachment = null,
public array $seeAlso = [],
public ?string $siteName = null,
public ?string $title = null,
public ?string $type = null,
public ?\DateTimeImmutable $updatedTime = null,
public ?string $url = null,
public array $videos = []
) {
}
public function assignProperties(array $properties, bool $debug = false): void
{
foreach ($properties as $property) {
$name = $property->key;
$value = $property->value;
switch ($name) {
case OpenGraphProperty::AUDIO:
case OpenGraphProperty::AUDIO_URL:
$this->audios[] = new Audio($value);
break;
case OpenGraphProperty::AUDIO_SECURE_URL:
case OpenGraphProperty::AUDIO_TYPE:
if ($this->audios !== []) {
$this->handleAudioAttribute($this->audios[\count($this->audios) - 1], $name, $value);
} elseif ($debug) {
throw new \UnexpectedValueException(
\sprintf(
"Found '%s' property but no audio was found before.",
$name
)
);
}
break;
case OpenGraphProperty::DESCRIPTION:
if ($this->description === null) {
$this->description = $value;
}
break;
case OpenGraphProperty::DETERMINER:
if ($this->determiner === null) {
$this->determiner = $value;
}
break;
case OpenGraphProperty::IMAGE:
case OpenGraphProperty::IMAGE_URL:
$this->images[] = new Image($value);
break;
case OpenGraphProperty::IMAGE_HEIGHT:
case OpenGraphProperty::IMAGE_SECURE_URL:
case OpenGraphProperty::IMAGE_TYPE:
case OpenGraphProperty::IMAGE_WIDTH:
case OpenGraphProperty::IMAGE_USER_GENERATED:
if ($this->images !== []) {
$this->handleImageAttribute($this->images[\count($this->images) - 1], $name, $value);
} elseif ($debug) {
throw new \UnexpectedValueException(
\sprintf(
"Found '%s' property but no image was found before.",
$name
)
);
}
break;
case OpenGraphProperty::LOCALE:
if ($this->locale === null) {
$this->locale = $value;
}
break;
case OpenGraphProperty::LOCALE_ALTERNATE:
$this->localeAlternate[] = $value;
break;
case OpenGraphProperty::RICH_ATTACHMENT:
$this->richAttachment = $this->convertToBoolean($value);
break;
case OpenGraphProperty::SEE_ALSO:
$this->seeAlso[] = $value;
break;
case OpenGraphProperty::SITE_NAME:
if ($this->siteName === null) {
$this->siteName = $value;
}
break;
case OpenGraphProperty::TITLE:
if ($this->title === null) {
$this->title = $value;
}
break;
case OpenGraphProperty::UPDATED_TIME:
if (! $this->updatedTime instanceof \DateTimeImmutable) {
$this->updatedTime = $this->convertToDateTime($value);
}
break;
case OpenGraphProperty::URL:
if ($this->url === null) {
$this->url = $value;
}
break;
case OpenGraphProperty::VIDEO:
case OpenGraphProperty::VIDEO_URL:
$this->videos[] = new Video($value);
break;
case OpenGraphProperty::VIDEO_HEIGHT:
case OpenGraphProperty::VIDEO_SECURE_URL:
case OpenGraphProperty::VIDEO_TYPE:
case OpenGraphProperty::VIDEO_WIDTH:
if ($this->videos !== []) {
$this->handleVideoAttribute($this->videos[\count($this->videos) - 1], $name, $value);
} elseif ($debug) {
throw new \UnexpectedValueException(\sprintf(
"Found '%s' property but no video was found before.",
$name
));
}
}
}
}
public function getProperties(): array
{
$properties = [];
foreach ($this->audios as $audio) {
$properties = array_merge($properties, $audio->getProperties());
}
if ($this->title !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::TITLE, $this->title);
}
if ($this->description !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::DESCRIPTION, $this->description);
}
if ($this->determiner !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::DETERMINER, $this->determiner);
}
foreach ($this->images as $image) {
$properties = array_merge($properties, $image->getProperties());
}
if ($this->locale !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::LOCALE, $this->locale);
}
foreach ($this->localeAlternate as $locale) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::LOCALE_ALTERNATE, $locale);
}
if ($this->richAttachment !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::RICH_ATTACHMENT, (int) $this->richAttachment);
}
foreach ($this->seeAlso as $seeAlso) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::SEE_ALSO, $seeAlso);
}
if ($this->siteName !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::SITE_NAME, $this->siteName);
}
if ($this->type !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::TYPE, $this->type);
}
if ($this->updatedTime instanceof \DateTimeImmutable) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::UPDATED_TIME, $this->updatedTime->format('c'));
}
if ($this->url !== null) {
$properties[] = new OpenGraphProperty(OpenGraphProperty::URL, $this->url);
}
foreach ($this->videos as $video) {
$properties = array_merge($properties, $video->getProperties());
}
return $properties;
}
protected function convertToBoolean(string $value): bool
{
return match (strtolower($value)) {
'1', 'true' => true,
default => false,
};
}
protected function convertToDateTime(string $value): ?\DateTimeImmutable
{
try {
return new \DateTimeImmutable($value);
} catch (\Throwable) {
return null;
}
}
private function handleAudioAttribute(Audio $element, string $name, string $value): void
{
switch ($name) {
case OpenGraphProperty::AUDIO_TYPE:
$element->type = $value;
break;
case OpenGraphProperty::AUDIO_SECURE_URL:
$element->secureUrl = $value;
break;
}
}
private function handleImageAttribute(Image $element, string $name, string $value): void
{
switch ($name) {
case OpenGraphProperty::IMAGE_HEIGHT:
$element->height = (int) $value;
break;
case OpenGraphProperty::IMAGE_WIDTH:
$element->width = (int) $value;
break;
case OpenGraphProperty::IMAGE_TYPE:
$element->type = $value;
break;
case OpenGraphProperty::IMAGE_SECURE_URL:
$element->secureUrl = $value;
break;
case OpenGraphProperty::IMAGE_USER_GENERATED:
$element->userGenerated = $this->convertToBoolean($value);
break;
}
}
private function handleVideoAttribute(Video $element, string $name, string $value): void
{
switch ($name) {
case OpenGraphProperty::VIDEO_HEIGHT:
$element->height = (int) $value;
break;
case OpenGraphProperty::VIDEO_WIDTH:
$element->width = (int) $value;
break;
case OpenGraphProperty::VIDEO_TYPE:
$element->type = $value;
break;
case OpenGraphProperty::VIDEO_SECURE_URL:
$element->secureUrl = $value;
break;
}
}
}
@@ -0,0 +1,75 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling\OpenGraph;
/**
* Class Property.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class OpenGraphProperty
{
public const string AUDIO = 'og:audio';
public const string AUDIO_SECURE_URL = 'og:audio:secure_url';
public const string AUDIO_TYPE = 'og:audio:type';
public const string AUDIO_URL = 'og:audio:url';
public const string DESCRIPTION = 'og:description';
public const string DETERMINER = 'og:determiner';
public const string IMAGE = 'og:image';
public const string IMAGE_HEIGHT = 'og:image:height';
public const string IMAGE_SECURE_URL = 'og:image:secure_url';
public const string IMAGE_TYPE = 'og:image:type';
public const string IMAGE_URL = 'og:image:url';
public const string IMAGE_WIDTH = 'og:image:width';
public const string IMAGE_USER_GENERATED = 'og:image:user_generated';
public const string LOCALE = 'og:locale';
public const string LOCALE_ALTERNATE = 'og:locale:alternate';
public const string RICH_ATTACHMENT = 'og:rich_attachment';
public const string SEE_ALSO = 'og:see_also';
public const string SITE_NAME = 'og:site_name';
public const string TITLE = 'og:title';
public const string TYPE = 'og:type';
public const string UPDATED_TIME = 'og:updated_time';
public const string URL = 'og:url';
public const string VIDEO = 'og:video';
public const string VIDEO_HEIGHT = 'og:video:height';
public const string VIDEO_SECURE_URL = 'og:video:secure_url';
public const string VIDEO_TYPE = 'og:video:type';
public const string VIDEO_URL = 'og:video:url';
public const string VIDEO_WIDTH = 'og:video:width';
public function __construct(
public string $key,
public mixed $value,
) {
}
}
@@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Crawling;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Interface SourceCrawler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface SourceCrawler
{
public function fetch(CrawlingSettings $settings): void;
public function fetchOne(string $html, ?DateRange $dateRange = null): void;
public function supports(string $source): bool;
}
@@ -0,0 +1,18 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service;
/**
* Class HashCalculator.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class HashCalculator
{
public function calculate(string $data): string
{
return md5($data);
}
}
@@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Scoring;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
/**
* Interface CredibilityAnalyser.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface CredibilityAnalyser
{
public function getBias(string $content): Bias;
public function getTransparency(string $content): Transparency;
public function getReliability(string $content): Reliability;
public function analyse(string $content): Credibility;
}
@@ -0,0 +1,17 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Domain\Service\Scoring;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
/**
* Interface SentimentAnalyser.
*
* @author bernard-ng <bernard@devscast.tech>
*/
interface SentimentAnalyser
{
public function analyse(string $content): Sentiment;
}
@@ -0,0 +1,65 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler;
use Psr\Log\LoggerInterface;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Class HttpClientFactory.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class HttpClientFactory
{
public function __construct(
private string $projectDir,
private Filesystem $filesystem,
private HttpClientInterface $client,
private LoggerInterface $logger
) {
}
public function create(): HttpClientInterface
{
$proxy = $this->getProxy();
return $this->client->withOptions([
'headers' => [
'User-Agent' => UserAgents::random(),
],
'proxy' => $proxy !== null ? 'https://' . $proxy : null,
]);
}
private function getProxy(): ?string
{
$flag = boolval(getenv('USE_PROXY'));
if ($flag === false) {
return null;
}
try {
$filename = sprintf('%s/data/proxies.txt', $this->projectDir);
$content = $this->filesystem->readFile($filename);
/** @var list<string> $proxies */
$proxies = preg_split('/\r\n|\n|\r/', $content);
$proxies = array_filter($proxies, static fn ($proxy): bool => $proxy !== '' && $proxy !== '0');
$proxy = $proxies[array_rand($proxies)];
$this->logger->info('HttpClient is using proxy: ' . $proxy);
return $proxy;
} catch (\Throwable $e) {
$this->logger->error('Unable to read proxy file', [
'exception' => $e,
]);
return null;
}
}
}
@@ -0,0 +1,128 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\OpenGraph;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\Objects\Website;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphConsumer;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphProperty;
use App\Aggregator\Infrastructure\Crawler\HttpClientFactory;
use App\Aggregator\Infrastructure\Crawler\UserAgents;
use Psr\Log\LoggerInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Class OpenGraphConsumer.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class DomCrawlerConsumer implements OpenGraphConsumer
{
private HttpClientInterface $client;
public function __construct(
HttpClientFactory $clientFactory,
private LoggerInterface $logger,
private bool $useFallbackMode = true,
private bool $debug = false,
) {
$this->client = $clientFactory->create();
}
public function consumeUrl(string $url): ?OpenGraphObject
{
try {
$response = $this->client->request('GET', $url, [
'headers' => [
'User-Agent' => UserAgents::OPEN_GRAPH->value,
],
])->getContent();
return $this->consumeHtml($response, $url);
} catch (\Throwable $e) {
$this->logger->error(
'Unable to consume OpenGraph URL',
[
'url' => $url,
'exception' => $e,
]
);
return null;
}
}
public function consumeHtml(string $html, string $fallbackUrl): ?OpenGraphObject
{
try {
$object = $this->consume($html);
if ($this->useFallbackMode && $object->url === null) {
$object->url = $fallbackUrl;
}
return $object;
} catch (\Throwable $e) {
$this->logger->error(
'Unable to consume OpenGraph HTML',
[
'html' => $html,
'exception' => $e,
]
);
return null;
}
}
private function consume(string $content): OpenGraphObject
{
$crawler = new Crawler($content);
$object = new Website(type: 'website');
$properties = [];
foreach (['name', 'property'] as $t) {
$props = [];
/** @var \DOMElement $tag */
foreach ($crawler->filter(sprintf("meta[%s^='og:']", $t)) as $tag) {
$name = strtolower(trim($tag->getAttribute($t)));
$value = trim($tag->getAttribute('content'));
$props[] = new OpenGraphProperty($name, $value);
}
$properties = array_merge($properties, $props);
}
$object->assignProperties($properties, $this->debug);
// Fallback for url
if ($this->useFallbackMode && $object->url === null) {
$urlElement = $crawler->filter("link[rel='canonical']")->first();
if ($urlElement->count() > 0) {
$object->url = trim($urlElement->attr('href') ?? '');
}
}
// Fallback for title
if ($this->useFallbackMode && $object->title === null) {
$titleElement = $crawler->filter('title')->first();
if ($titleElement->count() > 0) {
$object->title = trim($titleElement->text());
}
}
// Fallback for description
if ($this->useFallbackMode && $object->description === null) {
$descriptionElement = $crawler->filter("meta[property='description']")->first();
if ($descriptionElement->count() > 0) {
$object->description = trim($descriptionElement->attr('content') ?? '');
}
}
return $object;
}
}
@@ -0,0 +1,154 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\Source;
use App\Aggregator\Application\UseCase\Command\CreateArticle;
use App\Aggregator\Domain\Event\SourceCrawled;
use App\Aggregator\Domain\Exception\ArticleOutOfRange;
use App\Aggregator\Domain\Model\ValueObject\Crawling\PageRange;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Service\Crawling\DateParser;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphConsumer;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use App\Aggregator\Domain\Service\Crawling\SourceCrawler;
use App\Aggregator\Infrastructure\Crawler\HttpClientFactory;
use App\SharedKernel\Application\Messaging\CommandBus;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Psr\EventDispatcher\EventDispatcherInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\DependencyInjection\Attribute\AutoconfigureTag;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Stopwatch\Stopwatch;
use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Class SourceFetcher.
*
* @author bernard-ng <bernard@devscast.tech>
*/
#[AutoconfigureTag('app.data_source')]
abstract class Source implements SourceCrawler
{
protected const string URL = 'url';
protected const string ID = 'id';
private const string WATCH_EVENT_NAME = 'crawling';
protected Stopwatch $stopwatch;
protected HttpClientInterface $client;
public function __construct(
HttpClientFactory $clientFactory,
protected EventDispatcherInterface $dispatcher,
protected LoggerInterface $logger,
protected DateParser $dateParser,
protected CommandBus $commandBus,
protected OpenGraphConsumer $openGraphConsumer
) {
$this->stopwatch = new Stopwatch();
$this->client = $clientFactory->create();
}
#[\Override]
public function supports(string $source): bool
{
return $source === $this->getId();
}
abstract public function getPagination(?string $category = null): PageRange;
protected function getId(): string
{
return static::ID;
}
protected function getUrl(): string
{
return static::URL;
}
/**
* @throws \Throwable
*/
protected function crawle(string $url, ?int $page = null): Crawler
{
if ($page !== null) {
$this->logger->notice('> Page ' . $page);
}
$response = $this->client->request('GET', $url)->getContent();
return new Crawler($response);
}
protected function save(
string $title,
string $link,
string $categories,
string $body,
string $timestamp,
?OpenGraphObject $metadata = null
): void {
try {
$this->commandBus->handle(
new CreateArticle(
title: $title,
link: Link::from($link, $this->getId()),
categories: $categories,
body: $body,
source: $this->getId(),
timestamp: (int) $timestamp,
metadata: $metadata
)
);
$this->logger->notice(sprintf('> %s ✅', $title));
} catch (\Throwable $e) {
$this->logger->error(sprintf('> %s [Failed] ❌', $e->getMessage()));
}
}
protected function initialize(): void
{
$this->stopwatch->start(self::WATCH_EVENT_NAME);
$this->logger->notice('Initialized');
}
protected function completed(bool $notify = false): void
{
$event = $this->stopwatch->stop(self::WATCH_EVENT_NAME);
$this->dispatcher->dispatch(new SourceCrawled((string) $event, $this->getId(), $notify));
$this->logger->notice('Done');
}
protected function skip(DateRange $dateRange, string $timestamp, string $title, string $date): void
{
if ($dateRange->outRange((int) $timestamp)) {
throw ArticleOutOfRange::with($timestamp, $dateRange);
}
$this->logger->notice(sprintf('> %s [Skipped %s]', $title, $date));
}
/**
* @throws \Throwable
*/
protected function getLastPage(?string $url = null): int
{
$result = [];
/** @var string $node */
$node = $this->crawle($url ?? $this->getUrl())
->filter('ul.pagination > li a')
->last()
->attr('href');
/** @var string $query */
$query = parse_url($node, PHP_URL_QUERY);
parse_str($query, $result);
return (int) $result['page'];
}
}
@@ -0,0 +1,63 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\Source;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Service\Crawling\SourceCrawler as SourceCrawlerInterface;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\DependencyInjection\Attribute\AutowireIterator;
/**
* Class SourceFetcher.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceCrawler implements SourceCrawlerInterface
{
/**
* @var iterable<SourceCrawlerInterface>
*/
private iterable $sources;
public function __construct(
#[AutowireIterator('app.data_source')] \Traversable $sources
) {
$this->sources = iterator_to_array($sources);
}
#[\Override]
public function fetch(CrawlingSettings $settings): void
{
foreach ($this->sources as $source) {
if ($source->supports($settings->id)) {
$source->fetch($settings);
}
}
}
#[\Override]
public function supports(string $source): bool
{
return true;
}
#[\Override]
public function fetchOne(string $html, ?DateRange $dateRange = null): void
{
throw new \RuntimeException('Not implemented');
}
public function get(string $id): Source
{
/** @var Source $source */
foreach ($this->sources as $source) {
if ($source->supports($id)) {
return $source;
}
}
throw new \RuntimeException('Source not found');
}
}
@@ -0,0 +1,146 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler\Source;
use App\Aggregator\Domain\Exception\ArticleOutOfRange;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Model\ValueObject\Crawling\PageRange;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
/**
* Class WordPressJson.
*
* Some WordPress websites expose their data in JSON format,
* this class will help to fetch data from those websites.
*
* @see https://developer.wordpress.org/rest-api/
*
* @author bernard-ng <bernard@devscast.tech>
*/
class WordPressJson extends Source
{
public const string POST_QUERY = '_fields=date,slug,link,title.rendered,content.rendered,categories&orderby=date&order=desc';
public const string CATEGORY_QUERY = '_fields=id,slug,count&orderby=count&order=desc&per_page=100';
public const string TOTAL_PAGES_HEADER = 'x-wp-totalpages';
public const string TOTAL_POSTS_HEADER = 'x-wp-total';
private array $categoryMap = [];
#[\Override]
public function getPagination(?string $category = null): PageRange
{
$response = $this->client->request('GET', sprintf('%s/wp-json/wp/v2/posts?_fields=id&per_page=100', $this->getUrl()));
$headers = $response->getHeaders();
$pages = (int) $headers[self::TOTAL_PAGES_HEADER][0];
$posts = (int) $headers[self::TOTAL_POSTS_HEADER][0];
$this->logger->notice(sprintf('WordPressJson %d posts, %d pages', $posts, $pages));
return PageRange::from(sprintf('1:%d', $pages));
}
#[\Override]
public function fetch(CrawlingSettings $settings): void
{
$this->initialize();
$page = $settings->pageRange ?? $this->getPagination();
for ($i = $page->start; $i <= $page->end; $i++) {
try {
$response = $this->client->request(
method: 'GET',
url: sprintf('%s/wp-json/wp/v2/posts?%s&page=%d&per_page=100', $this->getUrl(), self::POST_QUERY, $i)
);
/** @var array $articles */
$articles = json_decode($this->removeMisconfigurationError($response->getContent()), true);
} catch (\Throwable $e) {
$this->logger->error(sprintf('> page %d => %s [Failed] ❌', $i, $e->getMessage()));
continue;
}
try {
foreach ($articles as $article) {
$this->fetchOne((string) json_encode($article), $settings->dateRange);
}
} catch (ArticleOutOfRange) {
$this->logger->notice('No more articles to fetch in this range.');
break;
}
}
$this->completed($settings->notify);
}
#[\Override]
public function fetchOne(string $html, ?DateRange $dateRange = null): void
{
try {
/**
* @var array{
* link:string,
* title:array{rendered:string},
* content:array{rendered:string},
* date:string,
* categories:int[]
* } $data
*/
$data = json_decode($html, true);
$link = str_replace($this->getUrl(), '', $data['link']);
$title = strip_tags($data['title']['rendered']);
$body = strip_tags($data['content']['rendered']);
$timestamp = $this->dateParser->createTimeStamp($data['date'], format: 'c');
$categories = $this->mapCategories($data['categories']);
if (! $dateRange instanceof DateRange || $dateRange->inRange((int) $timestamp)) {
$metadata = $this->openGraphConsumer->consumeUrl($data['link']);
$this->save($title, $link, $categories, $body, $timestamp, $metadata);
} else {
$this->skip($dateRange, $timestamp, $title, $data['date']);
}
} catch (ArticleOutOfRange $e) {
throw $e;
} catch (\Throwable $e) {
$this->logger->error(sprintf('> %s [Failed] ❌', $e->getMessage()));
return;
}
}
/**
* edge case for some politico.cd website
* this invalidates the json, so we have to remove it
*/
private function removeMisconfigurationError(string $content): string
{
$error = '<br />
<b>Notice</b>: ob_end_flush(): Failed to send buffer of zlib output compression (0) in <b>/home/politico/public_html/wp-includes/functions.php</b> on line <b>5427</b><br />';
return str_replace($error, '', $content);
}
private function fetchCategories(): void
{
$response = $this->client->request('GET', sprintf('%s/wp-json/wp/v2/categories?%s', $this->getUrl(), self::CATEGORY_QUERY));
/** @var array{id: int, slug: string}[] $categories */
$categories = json_decode($response->getContent(), true);
foreach ($categories as $category) {
$this->categoryMap[$category['id']] = $category['slug'];
}
}
private function mapCategories(array $categories): string
{
if ($this->categoryMap === []) {
$this->fetchCategories();
}
return strtolower(implode(',', array_map(fn ($category) => $this->categoryMap[$category], $categories)));
}
}
@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Crawler;
/**
* Class UserAgents.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum UserAgents: string
{
case OPEN_GRAPH = 'facebookexternalhit/1.1';
case IPHONE = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5';
case LINUX = 'Mozilla/5.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7';
case ANDROID = 'Mozilla/5.0 (Linux; U; Android 5.0; SM-P815 Build/LRX22G) AppleWebKit/600.4 (KHTML, like Gecko) Chrome/48.0.1562.260 Mobile Safari/600.0';
case CHROME_WINDOWS = 'Mozilla/5.0 (Windows; U; Windows NT 6.3;) AppleWebKit/533.34 (KHTML, like Gecko) Chrome/51.0.1883.215 Safari/533';
case EXPLORER = 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; x64; en-US Trident/4.0)';
case MAC_FIREFOX = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_10_3) Gecko/20100101 Firefox/63.4';
case CHROME_LINUX = 'Mozilla/5.0 (Linux; Linux x86_64; en-US) AppleWebKit/603.50 (KHTML, like Gecko) Chrome/55.0.2226.116 Safari/601';
case MAC_FIREFOX_OLD = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 7_8_3; en-US) Gecko/20100101 Firefox/68.9';
case MOBILE_IPHONE = 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_9_8; like Mac OS X) AppleWebKit/603.34 (KHTML, like Gecko) Chrome/47.0.1126.107 Mobile Safari/602.7';
case MOBILE_IPOD = 'Mozilla/5.0 (iPod; CPU iPod OS 8_2_0; like Mac OS X) AppleWebKit/601.40 (KHTML, like Gecko) Chrome/47.0.1590.178 Mobile Safari/535.2';
public static function random(): string
{
$userAgents = array_map(fn (self $userAgent) => $userAgent->value, self::cases());
return $userAgents[array_rand($userAgents)];
}
}
@@ -0,0 +1,76 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\ReadModel\ArticleForExport;
use App\Aggregator\Application\UseCase\Query\GetArticlesForExport;
use App\Aggregator\Application\UseCase\QueryHandler\GetArticlesForExportHandler;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Doctrine\DBAL\Connection;
/**
* Class GetArticlesForExportDbalHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetArticlesForExportDbalHandler implements GetArticlesForExportHandler
{
private const int BATCH_SIZE = 1000;
public function __construct(
private Connection $connection
) {
}
#[\Override]
public function __invoke(GetArticlesForExport $query): iterable
{
$qb = $this->connection->createQueryBuilder()
->select(
'a.id as article_id',
'a.title as article_title',
'a.link as article_link',
'a.categories as article_categories',
'a.body as article_body',
's.name as article_source',
'a.hash as article_hash',
'a.published_at as article_published_at',
'a.crawled_at as article_crawled_at'
)
->from('article', 'a')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->orderBy('a.published_at', 'DESC');
if ($query->source !== null) {
$qb->andWhere('s.name = :source')
->setParameter('source', $query->source);
}
if ($query->date instanceof DateRange) {
$qb->andWhere('a.published_at BETWEEN :start AND :end')
->setParameter('start', $query->date->start)
->setParameter('end', $query->date->end);
}
$offset = 0;
while (true) {
$qb->setFirstResult($offset);
$qb->setMaxResults(self::BATCH_SIZE);
/** @var array<array<string, mixed>> $data */
$data = $qb->executeQuery()->fetchAllAssociative();
if (count($data) === 0) {
break;
}
foreach ($data as $article) {
yield ArticleForExport::create($article);
}
$offset += self::BATCH_SIZE;
}
}
}
@@ -0,0 +1,51 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use App\Aggregator\Application\UseCase\QueryHandler\GetEarliestPublicationDateHandler;
use App\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Psr\Log\LoggerInterface;
/**
* Class GetEarliestPublicationDateDBalHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetEarliestPublicationDateDBalHandler implements GetEarliestPublicationDateHandler
{
public function __construct(
private Connection $connection,
private LoggerInterface $logger
) {
}
#[\Override]
public function __invoke(GetEarliestPublicationDate $query): \DateTimeImmutable
{
$qb = $this->connection->createQueryBuilder()
->select('MIN(a.published_at)')
->from('article', 'a')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->where('s.name = :source')
->setParameter('source', $query->source);
if ($query->category !== null) {
$qb->andWhere('a.categories LIKE :category')
->setParameter('category', sprintf('%%%s%%', $query->category));
}
try {
/** @var string|null $date */
$date = $qb->executeQuery()->fetchOne();
return new \DateTimeImmutable($date ?? 'now');
} catch (\Throwable $e) {
$this->logger->critical('Unable to fetch earliest publication date');
throw NoResult::forQuery($qb->getSQL(), $qb->getParameters(), $e);
}
}
}
@@ -0,0 +1,51 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use App\Aggregator\Application\UseCase\QueryHandler\GetLatestPublicationDateHandler;
use App\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Psr\Log\LoggerInterface;
/**
* Class GetLatestPublicationDateDBalHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetLatestPublicationDateDBalHandler implements GetLatestPublicationDateHandler
{
public function __construct(
private Connection $connection,
private LoggerInterface $logger
) {
}
#[\Override]
public function __invoke(GetLatestPublicationDate $query): \DateTimeImmutable
{
$qb = $this->connection->createQueryBuilder()
->select('MAX(a.published_at)')
->from('article', 'a')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->where('s.name = :source')
->setParameter('source', $query->source);
if ($query->category !== null) {
$qb->andWhere('a.categories LIKE :category')
->setParameter('category', sprintf('%%%s%%', $query->category));
}
try {
/** @var string|null $date */
$date = $qb->executeQuery()->fetchOne();
return new \DateTimeImmutable($date ?? 'now');
} catch (\Throwable $e) {
$this->logger->critical('Unable to fetch latest publication date');
throw NoResult::forQuery($qb->getSQL(), $qb->getParameters(), $e);
}
}
}
@@ -0,0 +1,48 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL;
use App\Aggregator\Application\ReadModel\SourceStatisticsList;
use App\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use App\Aggregator\Application\UseCase\QueryHandler\GetSourceStatisticsListHandler;
use App\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
/**
* Class GetSourceStatisticsListDbalHandler.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class GetSourceStatisticsListDbalHandler implements GetSourceStatisticsListHandler
{
public function __construct(
private Connection $connection
) {
}
public function __invoke(GetSourceStatisticsList $query): SourceStatisticsList
{
$qb = $this->connection->createQueryBuilder()
->select(
's.id as source_id',
's.name as source_name',
'MAX(a.crawled_at) as source_crawled_at',
'COUNT(a.id) as articles_count',
'SUM(CASE WHEN a.metadata IS NOT NULL THEN 1 ELSE 0 END) as article_metadata_available'
)
->from('source', 's')
->leftJoin('s', 'article', 'a', 'a.source_id = s.id')
->groupBy('s.id')
->orderBy('s.name', 'ASC');
try {
$data = $qb->executeQuery()->fetchAllAssociative();
} catch (\Throwable $e) {
throw NoResult::forQuery($qb->getSQL(), $qb->getParameters(), $e);
}
return SourceStatisticsList::create($data);
}
}
@@ -0,0 +1,28 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use Symfony\Bridge\Doctrine\Types\AbstractUidType;
/**
* Class ArticleId.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class ArticleIdType extends AbstractUidType
{
#[\Override]
public function getName(): string
{
return 'article_id';
}
#[\Override]
protected function getUidClass(): string
{
return ArticleId::class;
}
}
@@ -0,0 +1,66 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
use App\Aggregator\Domain\Model\ValueObject\Crawling\OpenGraph;
use Doctrine\DBAL\Platforms\AbstractPlatform;
use Doctrine\DBAL\Types\ConversionException;
use Doctrine\DBAL\Types\Type;
/**
* Class OpenGraphType.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class OpenGraphType extends Type
{
public function getSQLDeclaration(array $column, AbstractPlatform $platform): string
{
return $platform->getJsonTypeDeclarationSQL([
'nullable' => true,
]);
}
public function getName(): string
{
return 'open_graph';
}
#[\Override]
public function convertToPHPValue(mixed $value, AbstractPlatform $platform): ?OpenGraph
{
if ($value === null) {
return null;
}
if (! \is_string($value)) {
throw ConversionException::conversionFailedInvalidType($value, $this->getName(), ['null', 'string', OpenGraph::class]);
}
try {
return OpenGraph::tryFrom($value);
} catch (\Throwable $e) {
throw ConversionException::conversionFailed($value, $this->getName(), $e);
}
}
#[\Override]
public function convertToDatabaseValue($value, AbstractPlatform $platform): ?string
{
if ($value instanceof OpenGraph) {
return json_encode($value) ?: null;
}
if ($value === null || $value === '') {
return null;
}
if (! \is_string($value)) {
throw ConversionException::conversionFailedInvalidType($value, $this->getName(), ['null', 'string', OpenGraph::class]);
}
throw ConversionException::conversionFailed($value, $this->getName());
}
}
@@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
use App\Aggregator\Domain\Model\Identity\SourceId;
use Symfony\Bridge\Doctrine\Types\AbstractUidType;
/**
* Class SourceId.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class SourceIdType extends AbstractUidType
{
public function getName(): string
{
return 'source_id';
}
protected function getUidClass(): string
{
return SourceId::class;
}
}
@@ -0,0 +1,130 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\ORM;
use App\Aggregator\Domain\Exception\ArticleNotFound;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\Repository\ArticleRepository;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\Persistence\ManagerRegistry;
/**
* Class ArticleOrmRepository.
*
* @extends ServiceEntityRepository<Article>
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class ArticleOrmRepository extends ServiceEntityRepository implements ArticleRepository
{
public function __construct(ManagerRegistry $registry)
{
parent::__construct($registry, Article::class);
}
#[\Override]
public function add(Article $article): void
{
$this->getEntityManager()->persist($article);
$this->getEntityManager()->flush();
}
#[\Override]
public function remove(Article $article): void
{
$this->getEntityManager()->remove($article);
$this->getEntityManager()->flush();
}
#[\Override]
public function getById(ArticleId $id): Article
{
/** @var Article|null $article */
$article = $this->findOneBy([
'id' => $id,
]);
if ($article === null) {
throw ArticleNotFound::withId($id);
}
return $article;
}
#[\Override]
public function export(?string $source, ?DateRange $date): \Generator
{
$qb = $this->createQueryBuilder('a')
->orderBy('a.publishedAt', 'DESC');
if ($source !== null) {
$qb
->leftJoin('a.source', 's')
->andWhere('s.name = :source')
->setParameter('source', $source);
}
if ($date instanceof DateRange) {
$qb->andWhere('a.publishedAt BETWEEN FROM_UNIXTIME(:start) AND FROM_UNIXTIME(:end)')
->setParameter('start', $date->start)
->setParameter('end', $date->end);
}
$limit = 1000;
$offset = 0;
while (true) {
$qb->setFirstResult($offset);
$qb->setMaxResults($limit);
/** @var Article[] $articles */
$articles = $qb->getQuery()->getResult();
if (count($articles) === 0) {
break;
}
foreach ($articles as $article) {
yield $article;
$this->getEntityManager()->detach($article);
}
$offset += $limit;
}
}
#[\Override]
public function getByHash(string $hash): ?Article
{
/** @var Article|null $article */
$article = $this->findOneBy([
'hash' => $hash,
]);
return $article;
}
#[\Override]
public function clear(string $source, ?string $category): int
{
$qb = $this->createQueryBuilder('a')
->leftJoin('a.source', 's')
->where('s.name = :source')
->setParameter('source', $source);
if ($category !== null) {
$qb->andWhere('a.categories LIKE :category')
->setParameter('category', sprintf('%%%s%%', $category));
}
/** @var int $result */
$result = $qb->delete(Article::class, 'a')
->getQuery()
->execute();
return $result;
}
}
@@ -0,0 +1,65 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Infrastructure\Persistence\Doctrine\ORM;
use App\Aggregator\Domain\Exception\SourceNotFound;
use App\Aggregator\Domain\Model\Entity\Source;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\Aggregator\Domain\Model\Repository\SourceRepository;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\Persistence\ManagerRegistry;
/**
* Class SourceOrmRepository.
*
* @extends ServiceEntityRepository<Source>
*
* @author bernard-ng <bernard@devscast.tech>
*/
final class SourceOrmRepository extends ServiceEntityRepository implements SourceRepository
{
public function __construct(ManagerRegistry $registry)
{
parent::__construct($registry, Source::class);
}
public function add(Source $source): void
{
$this->getEntityManager()->persist($source);
$this->getEntityManager()->flush();
}
public function remove(Source $source): void
{
$this->getEntityManager()->remove($source);
$this->getEntityManager()->flush();
}
public function getByName(string $name): Source
{
$source = $this->findOneBy([
'name' => $name,
]);
if ($source === null) {
throw SourceNotFound::withName($name);
}
return $source;
}
public function getById(SourceId $sourceId): Source
{
$source = $this->findOneBy([
'id' => $sourceId,
]);
if ($source === null) {
throw SourceNotFound::withId($sourceId);
}
return $source;
}
}
@@ -0,0 +1,117 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Domain\Event\SourceCrawled;
use App\Aggregator\Domain\Model\Entity\Article;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphConsumer;
use App\Aggregator\Domain\Service\Crawling\OpenGraph\OpenGraphObject;
use App\SharedKernel\Domain\EventDispatcher\EventDispatcher;
use Doctrine\DBAL\Exception;
use Doctrine\ORM\EntityManagerInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Stopwatch\Stopwatch;
#[AsCommand(
name: 'app:open-graph',
description: 'Update OpenGraph data for articles',
)]
class ConsumeOpenGraphConsole extends Command
{
private const string WATCH_EVENT_NAME = 'open-graph-consume';
private SymfonyStyle $io;
public function __construct(
private readonly OpenGraphConsumer $openGraphConsumer,
private readonly EntityManagerInterface $entityManager,
private readonly LoggerInterface $logger,
private readonly EventDispatcher $eventDispatcher,
private readonly Stopwatch $stopwatch = new Stopwatch(false)
) {
parent::__construct();
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'The source to crawl');
$this->addOption('batch', null, InputOption::VALUE_OPTIONAL, 'Batch size', 50);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
$this->setProcessTitle('[DRC News] OpenGraph Consumer');
if ($input->getOption('no-interaction') === false && ! $this->io->confirm('This is a long process, do you want to continue ?', false)) {
$this->io->warning('Process aborted');
return Command::SUCCESS;
}
$index = 0;
$batchSize = $input->getOption('batch') ?? 50;
$source = $input->getArgument('source');
try {
$this->entityManager->getConnection()->executeQuery('SET SESSION interactive_timeout = 86400;');
$this->entityManager->getConnection()->executeQuery('SET SESSION wait_timeout = 86400;');
} catch (Exception $e) {
$this->logger->critical('Unable to set session timeout', [
'exception' => $e,
]);
return Command::FAILURE;
}
$query = $this->entityManager
->createQuery(<<<'DQL'
SELECT a
FROM App\Aggregator\Domain\Model\Entity\Article a
LEFT JOIN App\Aggregator\Domain\Model\Entity\Source s
WHERE s.name = :source AND a.metadata IS NULL
ORDER BY a.publishedAt DESC
DQL)
->setParameter('source', $source);
$this->stopwatch->start(self::WATCH_EVENT_NAME);
/** @var Article $article */
foreach ($query->toIterable() as $article) {
$object = $this->openGraphConsumer->consumeUrl((string) $article->link);
if ($object instanceof OpenGraphObject) {
$article->defineOpenGraph($object);
$this->logger->notice(sprintf('> %s ✅', $article->title));
} else {
$this->logger->notice(sprintf('> %s ❌', $article->title));
}
++$index;
if ($index % $batchSize === 0) {
$this->entityManager->flush();
$this->entityManager->clear();
}
}
$this->entityManager->flush();
$event = $this->stopwatch->stop(self::WATCH_EVENT_NAME);
$this->eventDispatcher->dispatch([new SourceCrawled((string) $event, 'open-graph')]);
$this->logger->notice('OpenGraph data fetched successfully');
return Command::SUCCESS;
}
}
@@ -0,0 +1,121 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Model\ValueObject\Crawling\PageRange;
use App\Aggregator\Infrastructure\Crawler\Source\SourceCrawler;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Process\PhpSubprocess;
#[AsCommand(
name: 'app:crawl',
description: 'crawle a news website',
)]
class CrawlConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly SourceCrawler $sourceCrawler
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'the website source to crawle');
$this->addOption('date', null, InputOption::VALUE_OPTIONAL, 'Date interval to crawle');
$this->addOption('page', null, InputOption::VALUE_OPTIONAL, 'PageRange interval to crawle');
$this->addOption('category', null, InputOption::VALUE_OPTIONAL, 'the category to crawle');
$this->addOption('parallel', null, InputOption::VALUE_OPTIONAL, 'the number of parallel requests', default: 1);
$this->addOption('notify', null, InputOption::VALUE_NONE, 'enable notifications');
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var string $source */
$source = $input->getArgument('source');
/** @var string|null $page */
$page = $input->getOption('page');
/** @var string|null $date */
$date = $input->getOption('date');
/** @var string|null $category */
$category = $input->getOption('category');
/** @var string $parallel */
$parallel = $input->getOption('parallel');
$parallel = intval($parallel);
if ($parallel > 1) {
return $this->parallel($parallel, $source, $category);
}
$this->sourceCrawler->fetch(
settings: new CrawlingSettings(
id: $source,
pageRange: $page !== null ? PageRange::from($page) : null,
dateRange: $date !== null ? DateRange::from($date) : null,
category: $category,
notify: $input->getOption('notify') !== null
)
);
$this->io->success('website crawled successfully');
return Command::SUCCESS;
}
private function parallel(int $workers, string $source, ?string $category): int
{
$fetcher = $this->sourceCrawler->get($source);
$range = $fetcher->getPagination($category);
$workPerWorker = ceil(($range->end - $range->start + 1) / $workers);
$this->io->title(sprintf('Crawling %d pages with %d workers, %d pages per worker', $range->end - $range->start + 1, $workers, $workPerWorker));
$processes = [];
for ($i = 0; $i < $workers; $i++) {
$start = $range->start + ($i * $workPerWorker);
$end = min($range->start + (($i + 1) * $workPerWorker) - 1, $range->end);
$process = new PhpSubprocess(['bin/console', 'app:crawl', $source, sprintf('--page=%d:%d', $start, $end), '-v']);
$process->start();
$processes[] = $process;
if ($start > $range->end) {
break;
}
}
foreach ($processes as $process) {
while ($process->isRunning()) {
// waiting for process to finish
}
$this->io->writeln($process->getOutput());
}
$this->io->success('Website crawled successfully');
return Command::SUCCESS;
}
}
@@ -0,0 +1,95 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Command\CreateSource;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use App\SharedKernel\Application\Messaging\CommandBus;
use App\SharedKernel\Presentation\Console\AskArgumentFeature;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:create-source',
description: 'add a new data source'
)]
class CreateSourceConsole extends Command
{
use AskArgumentFeature;
private SymfonyStyle $io;
public function __construct(
private readonly CommandBus $commandBus
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'the website source to crawle');
$this->addArgument('displayName', InputArgument::OPTIONAL, 'the display name of the source');
$this->addArgument('description', InputArgument::OPTIONAL, 'the description of the source');
$this->addOption('bias', 'b', InputArgument::OPTIONAL, 'bias of the source', Bias::NEUTRAL->value);
$this->addOption('reliability', 'r', InputArgument::OPTIONAL, 'reliability of the source', Reliability::AVERAGE->value);
$this->addOption('transparency', 't', InputArgument::OPTIONAL, 'transparency of the source', Transparency::MEDIUM->value);
}
#[\Override]
protected function interact(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
$this->io->title('Create a new data source');
$this->askArgument($input, 'source');
$this->askArgument($input, 'displayName');
$this->askOption($input, 'bias');
$this->askOption($input, 'reliability');
$this->askOption($input, 'transparency');
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
if (! $this->io->confirm('Do you want to continue?', false)) {
$this->io->warning('Process aborted');
return Command::FAILURE;
}
/** @var string $source */
$source = $input->getArgument('source');
/** @var string|null $displayName */
$displayName = $input->getArgument('displayName');
/** @var string|null $description */
$description = $input->getArgument('description');
$credibility = new Credibility(
bias: Bias::from($input->getOption('bias')),
reliability: Reliability::from($input->getOption('reliability')),
transparency: Transparency::from($input->getOption('transparency')),
);
$this->commandBus->handle(new CreateSource($source, $credibility, $displayName, $description));
$this->io->success('Source add successfully');
return Command::SUCCESS;
}
}
@@ -0,0 +1,71 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Command\DeleteArticles;
use App\SharedKernel\Application\Messaging\CommandBus;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Question\Question;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:delete-articles',
description: 'remove all articles from the database by source',
)]
class DeleteArticlesConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly CommandBus $commandBus
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'the website source to crawle');
$this->addOption('category', null, InputOption::VALUE_OPTIONAL, 'the category to crawle');
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var string $source */
$source = $input->getArgument('source');
/** @var string|null $category */
$category = $input->getOption('category');
if (
$this->io->confirm('Delete all articles ?', false) &&
$this->io->confirm('Are you sure ?', false)
) {
$confirmation = $this->io->askQuestion(new Question('Specify the source to confirm : '));
if ($confirmation === $source) {
/** @var int $count */
$count = $this->commandBus->handle(new DeleteArticles($source, $category));
$this->io->success(sprintf('%d articles from %s removed', $count, $source));
} else {
$this->io->warning('Source does not match, aborting !');
}
}
return Command::SUCCESS;
}
}
@@ -0,0 +1,65 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Command\ExportArticles;
use App\SharedKernel\Application\Messaging\CommandBus;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:export-articles',
description: 'export crawled news website',
)]
final class ExportArticlesConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly CommandBus $commandBus
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::OPTIONAL, 'the website source to crawle');
$this->addOption('date', null, InputOption::VALUE_OPTIONAL, 'Date interval to crawle', null);
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var string|null $source */
$source = $input->getArgument('source');
/** @var string|null $date */
$date = $input->getOption('date');
$confirmation = $this->io->confirm('This can take a while, would like to continue ?', false);
if ($confirmation) {
$this->commandBus->handle(new ExportArticles(
source: $source,
date: $date !== null ? DateRange::from($date) : null
));
}
$this->io->success('articles exported successfully');
return Command::SUCCESS;
}
}
@@ -0,0 +1,63 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Application\ReadModel\SourceStatistics;
use App\Aggregator\Application\ReadModel\SourceStatisticsList;
use App\Aggregator\Application\UseCase\Query\GetSourceStatisticsList;
use App\SharedKernel\Application\Messaging\QueryBus;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Stopwatch\Stopwatch;
#[AsCommand(
name: 'app:stats',
description: 'show stats about the articles in the database',
)]
class GetSourceStatisticsListConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly QueryBus $queryBus
) {
parent::__construct();
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var SourceStatisticsList $stats */
$stats = $this->queryBus->handle(new GetSourceStatisticsList());
$stopWatch = new Stopwatch(true);
$stopWatch->start('app:stats');
$this->io->table(
['Source', 'Articles', 'Metadata', 'CrawledAt'],
array_map(
fn (SourceStatistics $source): array => [
$source->name,
number_format($source->articlesCount, decimal_separator: '.', thousands_separator: ','),
number_format($source->metadataAvailable, decimal_separator: '.', thousands_separator: ','),
$source->crawledAt?->format('Y-m-d H:i:s') ?? 'Never',
],
$stats->items
)
);
$this->io->text((string) $stopWatch->stop('app:stats'));
return Command::SUCCESS;
}
}
@@ -0,0 +1,90 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\Aggregator\Application\UseCase\Query\GetEarliestPublicationDate;
use App\Aggregator\Application\UseCase\Query\GetLatestPublicationDate;
use App\Aggregator\Domain\Model\ValueObject\Crawling\CrawlingSettings;
use App\Aggregator\Domain\Model\ValueObject\Crawling\UpdateDirection;
use App\Aggregator\Infrastructure\Crawler\Source\SourceCrawler;
use App\SharedKernel\Application\Messaging\QueryBus;
use App\SharedKernel\Domain\Model\ValueObject\DateRange;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:update',
description: 'crawl a news website based on last update',
)]
class UpdateConsole extends Command
{
private SymfonyStyle $io;
public function __construct(
private readonly SourceCrawler $sourceCrawler,
private readonly QueryBus $queryBus
) {
parent::__construct();
}
#[\Override]
protected function configure(): void
{
$this->addArgument('source', InputArgument::REQUIRED, 'the website source to crawle');
$this->addOption('category', null, InputOption::VALUE_OPTIONAL, 'the category to crawle');
$this->addOption('direction', null, InputOption::VALUE_OPTIONAL, 'the direction to crawle', 'forward', ['forward', 'backward']);
$this->addOption('days', null, InputOption::VALUE_OPTIONAL, 'the number of days to crawle');
$this->addOption('notify', null, InputOption::VALUE_OPTIONAL, 'enable notifications', default: false);
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
/** @var int|null $days */
$days = $input->getOption('days');
/** @var string $source */
$source = $input->getArgument('source');
/** @var string|null $category */
$category = $input->getOption('category');
/** @var string $direction */
$direction = $input->getOption('direction');
$direction = UpdateDirection::from($direction);
/** @var \DateTimeImmutable $date */
$date = $this->queryBus->handle(match ($direction) {
UpdateDirection::FORWARD => new GetLatestPublicationDate($source, $category),
UpdateDirection::BACKWARD => new GetEarliestPublicationDate($source, $category),
});
$dateRange = $direction === UpdateDirection::FORWARD ?
DateRange::forward($date) :
DateRange::backward($date, $days);
$this->io->title(sprintf('[%s] Updating with range %s', $direction->value, $dateRange->format()));
$this->sourceCrawler->fetch(new CrawlingSettings(
$source,
dateRange: $dateRange,
category: $category,
notify: $input->getOption('notify') !== null
));
$this->io->success('website crawled successfully');
return Command::SUCCESS;
}
}
@@ -0,0 +1,66 @@
<?php
declare(strict_types=1);
namespace App\Aggregator\Presentation\Console;
use App\SharedKernel\Domain\Assert;
use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Contracts\HttpClient\HttpClientInterface;
#[AsCommand(
name: 'app:update-proxies',
description: 'get an updated list of proxies',
)]
final class UpdateProxiesConsole extends Command
{
private const string UPDATE_URL = 'https://github.com/zloi-user/hideip.me/raw/refs/heads/master/https.txt';
private SymfonyStyle $io;
public function __construct(
private readonly string $projectDir,
private readonly HttpClientInterface $client,
private readonly Filesystem $filesystem,
private readonly LoggerInterface $logger
) {
parent::__construct();
}
#[\Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[\Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
try {
$response = $this->client->request('GET', self::UPDATE_URL);
$content = $response->getContent();
$content = preg_replace('/^([0-9\.]+:[0-9]+):.*$/m', '$1', $content);
Assert::string($content);
$this->filesystem->dumpFile(
filename: $this->projectDir . '/data/proxies.txt',
content: $content
);
} catch (\Throwable $e) {
$this->logger->critical('Failed to update proxies', [
'exception' => $e,
]);
return Command::FAILURE;
}
$this->io->success('Proxies updated successfully.');
return Command::SUCCESS;
}
}
@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\Cache;
/**
* Enum SourceCacheAttributes.
*
* @author bernard-ng <bernard@devscast.tech>
*/
enum SourceCacheAttributes: string
{
case CATEGORIES = 'categories_shares';
case PUBLICATIONS = 'publications_graph';
public const int CACHE_TTL = 86400;
public function withId(string $id): string
{
return sprintf('%s_%s', $this->value, $id);
}
}
@@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\ValueObject\Crawling\OpenGraph;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Model\ValueObject\ReadingTime;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class ArticleDetails.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ArticleDetails
{
public function __construct(
public ArticleId $id,
public string $title,
public Link $link,
public array $categories,
public string $body,
public SourceReference $source,
public string $hash,
public Credibility $credibility,
public Sentiment $sentiment,
public ?OpenGraph $metadata,
public ReadingTime $readingTime,
public \DateTimeImmutable $publishedAt,
public \DateTimeImmutable $crawledAt,
public ?\DateTimeImmutable $updatedAt,
public bool $bookmarked = false
) {
}
public static function create(array $item): self
{
return new self(
ArticleId::fromBinary($item['article_id']),
DataMapping::string($item, 'article_title'),
Link::from(DataMapping::string($item, 'article_link')),
explode(',', DataMapping::string($item, 'article_categories')),
DataMapping::string($item, 'article_body'),
SourceReference::create($item),
DataMapping::string($item, 'article_hash'),
new Credibility(
DataMapping::enum($item, 'article_bias', Bias::class),
DataMapping::enum($item, 'article_reliability', Reliability::class),
DataMapping::enum($item, 'article_transparency', Transparency::class)
),
DataMapping::enum($item, 'article_sentiment', Sentiment::class),
OpenGraph::tryFrom(DataMapping::nullableString($item, 'article_metadata')),
ReadingTime::create(DataMapping::nullableInteger($item, 'article_reading_time')),
DataMapping::datetime($item, 'article_published_at'),
DataMapping::datetime($item, 'article_crawled_at'),
DataMapping::nullableDatetime($item, 'article_updated_at'),
DataMapping::boolean($item, 'article_is_bookmarked'),
);
}
}
@@ -0,0 +1,48 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\Aggregator\Domain\Model\ValueObject\Link;
use App\Aggregator\Domain\Model\ValueObject\ReadingTime;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class ArticleOverview.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ArticleOverview
{
public function __construct(
public ArticleId $id,
public string $title,
public Link $link,
public array $categories,
public string $excerpt,
public SourceReference $source,
public ?string $image,
public ReadingTime $readingTime,
public \DateTimeImmutable $publishedAt,
public bool $bookmarked = false
) {
}
public static function create(array $item): self
{
return new self(
ArticleId::fromBinary($item['article_id']),
DataMapping::string($item, 'article_title'),
Link::from(DataMapping::string($item, 'article_link')),
explode(',', DataMapping::string($item, 'article_categories')),
trim(DataMapping::string($item, 'article_excerpt')),
SourceReference::create($item),
DataMapping::nullableString($item, 'article_image'),
ReadingTime::create(DataMapping::nullableInteger($item, 'article_reading_time')),
DataMapping::datetime($item, 'article_published_at'),
DataMapping::boolean($item, 'article_is_bookmarked'),
);
}
}
@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class ArticleOverviewList.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class ArticleOverviewList
{
public function __construct(
public array $items,
public PaginationInfo $pagination
) {
Assert::allIsInstanceOf($this->items, ArticleOverview::class);
}
public static function create(array $items, PaginationInfo $pagination): self
{
return new self(
array_map(fn (array $item): ArticleOverview => ArticleOverview::create($item), $items),
$pagination
);
}
}
@@ -0,0 +1,40 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\FeedManagement\Domain\Model\Identity\BookmarkId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class Bookmark.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class Bookmark
{
public function __construct(
public BookmarkId $id,
public string $name,
public \DateTimeImmutable $createdAt,
public ?string $description = null,
public int $articlesCount = 0,
public bool $isPublic = false,
public ?\DateTimeImmutable $updatedAt = null
) {
}
public static function create(array $item): self
{
return new self(
BookmarkId::fromBinary($item['bookmark_id']),
DataMapping::string($item, 'bookmark_name'),
DataMapping::datetime($item, 'bookmark_created_at'),
DataMapping::nullableString($item, 'bookmark_description'),
DataMapping::integer($item, 'bookmark_articles_count'),
DataMapping::boolean($item, 'bookmark_is_public'),
DataMapping::nullableDatetime($item, 'bookmark_updated_at')
);
}
}
@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class BookmarkList.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class BookmarkList
{
public function __construct(
public array $items,
public PaginationInfo $pagination
) {
Assert::allIsInstanceOf($this->items, Bookmark::class);
}
public static function create(array $items, PaginationInfo $pagination): self
{
return new self(
array_map(fn (array $item): Bookmark => Bookmark::create($item), $items),
$pagination
);
}
}
@@ -0,0 +1,20 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
/**
* Class CategoryShare.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CategoryShare
{
public function __construct(
public string $category,
public int $count,
public float $percentage
) {
}
}
@@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
/**
* Class CategoryShares.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CategoryShares
{
public function __construct(
public array $items = [],
public int $total = 0
) {
Assert::allIsInstanceOf($this->items, CategoryShare::class);
}
}
@@ -0,0 +1,37 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
use App\FeedManagement\Domain\Model\Identity\CommentId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class Comment.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class Comment
{
public function __construct(
public CommentId $id,
public UserReference $user,
public Sentiment $sentiment,
public string $content,
public \DateTimeImmutable $createdAt,
) {
}
public static function create(array $item): self
{
return new self(
CommentId::fromBinary($item['comment_id']),
UserReference::create($item),
DataMapping::enum($item, 'comment_sentiment', Sentiment::class),
DataMapping::string($item, 'comment_content'),
DataMapping::dateTime($item, 'comment_created_at')
);
}
}
@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class CommentList.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class CommentList
{
public function __construct(
public array $items,
public PaginationInfo $pagination
) {
Assert::allIsInstanceOf($this->items, Comment::class);
}
public static function create(array $items, PaginationInfo $pagination): self
{
return new self(
array_map(fn (array $item): Comment => Comment::create($item), $items),
$pagination
);
}
}
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
/**
* Class DallyEntry.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class PublicationEntry
{
public function __construct(
public string $date,
public int $count
) {
}
}
@@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
/**
* Class PublicationGraph.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class PublicationGraph
{
public function __construct(
public array $items = [],
public int $total = 0
) {
Assert::allIsInstanceOf($this->items, PublicationEntry::class);
}
}
@@ -0,0 +1,62 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Bias;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Reliability;
use App\Aggregator\Domain\Model\ValueObject\Scoring\Transparency;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceDetails.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceDetails
{
public function __construct(
public SourceId $id,
public string $name,
public string $url,
public Credibility $credibility,
public PublicationGraph $publicationGraph,
public CategoryShares $categoryShares,
public int $articlesCount,
public string $crawledAt,
public ?string $displayName = null,
public ?string $description = null,
public ?string $updatedAt = null,
public int $metadataAvailable = 0,
public bool $followed = false,
public ?string $image = null,
) {
}
public static function create(array $item, PublicationGraph $publicationGraph, CategoryShares $categoryShares): self
{
return new self(
SourceId::fromBinary($item['source_id']),
DataMapping::string($item, 'source_name'),
DataMapping::string($item, 'source_url'),
new Credibility(
DataMapping::enum($item, 'source_bias', Bias::class),
DataMapping::enum($item, 'source_reliability', Reliability::class),
DataMapping::enum($item, 'source_transparency', Transparency::class)
),
$publicationGraph,
$categoryShares,
DataMapping::integer($item, 'articles_count'),
DataMapping::string($item, 'source_crawled_at'),
DataMapping::nullableString($item, 'source_display_name'),
DataMapping::nullableString($item, 'source_description'),
DataMapping::nullableString($item, 'source_updated_at'),
DataMapping::integer($item, 'articles_metadata_available'),
DataMapping::boolean($item, 'source_is_followed'),
DataMapping::nullableString($item, 'source_image'),
);
}
}
@@ -0,0 +1,38 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceOverview.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceOverview
{
public function __construct(
public SourceId $id,
public string $name,
public string $url,
public ?string $displayName = null,
public bool $followed = false,
public ?string $image = null
) {
}
public static function create(array $item): self
{
return new self(
SourceId::fromBinary($item['source_id']),
DataMapping::string($item, 'source_name'),
DataMapping::string($item, 'source_url'),
DataMapping::nullableString($item, 'source_display_name'),
DataMapping::boolean($item, 'source_is_followed'),
DataMapping::nullableString($item, 'source_image')
);
}
}
@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\SharedKernel\Domain\Assert;
use App\SharedKernel\Domain\Model\Pagination\PaginationInfo;
/**
* Class SourceOverviewList.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceOverviewList
{
public function __construct(
public array $items,
public PaginationInfo $pagination,
) {
Assert::allIsInstanceOf($items, SourceOverview::class);
}
public static function create(array $items, PaginationInfo $pagination): self
{
return new self(
array_map(fn (array $item): SourceOverview => SourceOverview::create($item), $items),
$pagination
);
}
}
@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\Aggregator\Domain\Model\Identity\SourceId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class SourceReference.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class SourceReference
{
public function __construct(
public SourceId $id,
public string $name,
public ?string $displayName,
public ?string $image,
public string $url
) {
}
public static function create(array $item): self
{
return new self(
SourceId::fromBinary($item['source_id']),
DataMapping::string($item, 'source_name'),
DataMapping::nullableString($item, 'source_display_name'),
DataMapping::nullableString($item, 'source_image'),
DataMapping::string($item, 'source_url'),
);
}
}
@@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\ReadModel;
use App\IdentityAndAccess\Domain\Model\Identity\UserId;
use App\SharedKernel\Domain\DataTransfert\DataMapping;
/**
* Class UserReference.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class UserReference
{
public function __construct(
public UserId $id,
public string $name,
) {
}
public static function create(array $item): self
{
return new self(
UserId::fromBinary($item['user_id']),
DataMapping::string($item, 'user_name')
);
}
}
@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
namespace App\FeedManagement\Application\UseCase\Command;
use App\Aggregator\Domain\Model\Identity\ArticleId;
use App\FeedManagement\Domain\Model\Identity\BookmarkId;
use App\IdentityAndAccess\Domain\Model\Identity\UserId;
/**
* Class AddArticleToBookmark.
*
* @author bernard-ng <bernard@devscast.tech>
*/
final readonly class AddArticleToBookmark
{
public function __construct(
public UserId $userId,
public ArticleId $articleId,
public BookmarkId $bookmarkId,
) {
}
}

Some files were not shown because too many files have changed in this diff Show More