[backend, crawler] feat: support token statistics
This commit is contained in:
@@ -6,6 +6,7 @@ namespace Basango\Aggregator\Application\UseCase\Command;
|
||||
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\Link;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\TokenStatistics;
|
||||
|
||||
/**
|
||||
* Class Save.
|
||||
@@ -17,11 +18,12 @@ final readonly class CreateArticle
|
||||
public function __construct(
|
||||
public string $title,
|
||||
public Link $link,
|
||||
public string $categories,
|
||||
public array $categories,
|
||||
public string $body,
|
||||
public string $source,
|
||||
public int $timestamp,
|
||||
public ?OpenGraph $metadata = null
|
||||
public ?OpenGraph $metadata = null,
|
||||
public ?TokenStatistics $tokenStatistics = null
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
+2
-1
@@ -43,12 +43,13 @@ final readonly class CreateArticleHandler implements CommandHandler
|
||||
link: $command->link,
|
||||
body: $command->body,
|
||||
hash: $hash,
|
||||
categories: mb_strtolower($command->categories),
|
||||
categories: $command->categories,
|
||||
source: $source,
|
||||
publishedAt: $publishedAt
|
||||
);
|
||||
$article
|
||||
->defineOpenGraph($command->metadata)
|
||||
->defineTokenStatistics($command->tokenStatistics)
|
||||
->computeReadingTime();
|
||||
|
||||
$this->articleRepository->add($article);
|
||||
|
||||
@@ -10,6 +10,7 @@ use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\ReadingTime;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Credibility;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\Scoring\Sentiment;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\TokenStatistics;
|
||||
|
||||
/**
|
||||
* Class Article.
|
||||
@@ -25,13 +26,14 @@ class Article
|
||||
public readonly Link $link,
|
||||
public readonly string $body,
|
||||
public readonly string $hash,
|
||||
private(set) string $categories,
|
||||
private(set) array $categories,
|
||||
public readonly Source $source,
|
||||
public readonly \DateTimeImmutable $publishedAt,
|
||||
public readonly \DateTimeImmutable $crawledAt = new \DateTimeImmutable(),
|
||||
private(set) Credibility $credibility = new Credibility(),
|
||||
private(set) Sentiment $sentiment = Sentiment::NEUTRAL,
|
||||
private(set) ?OpenGraph $metadata = null,
|
||||
private(set) ?TokenStatistics $tokenStatistics = null,
|
||||
private(set) ?ReadingTime $readingTime = null,
|
||||
private(set) ?\DateTimeImmutable $updatedAt = null,
|
||||
public readonly ?string $image = null,
|
||||
@@ -56,7 +58,7 @@ class Article
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function assignCategories(string $categories): self
|
||||
public function assignCategories(array $categories): self
|
||||
{
|
||||
$this->categories = $categories;
|
||||
$this->updatedAt = new \DateTimeImmutable();
|
||||
@@ -83,4 +85,11 @@ class Article
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function defineTokenStatistics(?TokenStatistics $statistics): self
|
||||
{
|
||||
$this->tokenStatistics = $statistics;
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Basango\Aggregator\Domain\Model\ValueObject;
|
||||
|
||||
/**
|
||||
* Class TokenStatistics.
|
||||
*
|
||||
* @author bernard-ng <bernard@devscast.tech>
|
||||
*/
|
||||
final class TokenStatistics implements \JsonSerializable
|
||||
{
|
||||
public ?int $total {
|
||||
get {
|
||||
return ($this->title ?? 0)
|
||||
+ ($this->body ?? 0)
|
||||
+ ($this->excerpt ?? 0)
|
||||
+ ($this->categories ?? 0);
|
||||
}
|
||||
}
|
||||
|
||||
public function __construct(
|
||||
public readonly ?int $title = null,
|
||||
public readonly ?int $body = null,
|
||||
public readonly ?int $excerpt = null,
|
||||
public readonly ?int $categories = null,
|
||||
) {
|
||||
}
|
||||
|
||||
public static function tryFrom(?string $value): ?self
|
||||
{
|
||||
if ($value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
$object = \json_decode($value, true, 512, JSON_THROW_ON_ERROR);
|
||||
|
||||
return new self(
|
||||
$object['title'] ?? null,
|
||||
$object['body'] ?? null,
|
||||
$object['excerpt'] ?? null,
|
||||
$object['categories'] ?? null,
|
||||
);
|
||||
} catch (\Throwable) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
#[\Override]
|
||||
public function jsonSerialize(): array
|
||||
{
|
||||
return [
|
||||
'title' => $this->title,
|
||||
'body' => $this->body,
|
||||
'excerpt' => $this->excerpt,
|
||||
'categories' => $this->categories,
|
||||
'total' => $this->total,
|
||||
];
|
||||
}
|
||||
}
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Basango\Aggregator\Infrastructure\Persistence\Doctrine\DBAL\Types;
|
||||
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\TokenStatistics;
|
||||
use Doctrine\DBAL\Platforms\AbstractPlatform;
|
||||
use Doctrine\DBAL\Types\ConversionException;
|
||||
use Doctrine\DBAL\Types\Type;
|
||||
|
||||
/**
|
||||
* Class TokenStatisticsType.
|
||||
*
|
||||
* @author bernard-ng <bernard@devscast.tech>
|
||||
*/
|
||||
final class TokenStatisticsType extends Type
|
||||
{
|
||||
public function getSQLDeclaration(array $column, AbstractPlatform $platform): string
|
||||
{
|
||||
return $platform->getJsonTypeDeclarationSQL([
|
||||
'nullable' => true,
|
||||
'jsonb' => true,
|
||||
]);
|
||||
}
|
||||
|
||||
public function getName(): string
|
||||
{
|
||||
return 'token_statistics';
|
||||
}
|
||||
|
||||
#[\Override]
|
||||
public function convertToPHPValue(mixed $value, AbstractPlatform $platform): ?TokenStatistics
|
||||
{
|
||||
if ($value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (! \is_string($value)) {
|
||||
throw ConversionException::conversionFailedInvalidType($value, $this->getName(), ['null', 'string', TokenStatistics::class]);
|
||||
}
|
||||
|
||||
try {
|
||||
return TokenStatistics::tryFrom($value);
|
||||
} catch (\Throwable $e) {
|
||||
throw ConversionException::conversionFailed($value, $this->getName(), $e);
|
||||
}
|
||||
}
|
||||
|
||||
#[\Override]
|
||||
public function convertToDatabaseValue($value, AbstractPlatform $platform): ?string
|
||||
{
|
||||
if ($value instanceof TokenStatistics) {
|
||||
return json_encode($value) ?: null;
|
||||
}
|
||||
|
||||
if ($value === null || $value === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (! \is_string($value)) {
|
||||
throw ConversionException::conversionFailedInvalidType($value, $this->getName(), ['null', 'string', TokenStatistics::class]);
|
||||
}
|
||||
|
||||
throw ConversionException::conversionFailed($value, $this->getName());
|
||||
}
|
||||
}
|
||||
+2
-1
@@ -47,11 +47,12 @@ final class AddArticleController extends AbstractController
|
||||
$this->handleCommand(new CreateArticle(
|
||||
$model->title,
|
||||
Link::from($model->link),
|
||||
implode(', ', $model->categories),
|
||||
$model->categories,
|
||||
$model->body,
|
||||
$model->source,
|
||||
$model->timestamp,
|
||||
$model->metadata,
|
||||
$model->tokenStatistics
|
||||
));
|
||||
|
||||
return new JsonResponse(status: Response::HTTP_CREATED);
|
||||
|
||||
@@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace Basango\Aggregator\Presentation\WriteModel;
|
||||
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\OpenGraph;
|
||||
use Basango\Aggregator\Domain\Model\ValueObject\TokenStatistics;
|
||||
use Symfony\Component\Validator\Constraints as Assert;
|
||||
|
||||
/**
|
||||
@@ -32,4 +33,6 @@ final class AddArticleModel
|
||||
public array $categories = [];
|
||||
|
||||
public ?OpenGraph $metadata = null;
|
||||
|
||||
public ?TokenStatistics $tokenStatistics = null;
|
||||
}
|
||||
|
||||
+1
-1
@@ -42,7 +42,7 @@ final readonly class GetArticleOverviewListDbalHandler implements GetArticleOver
|
||||
|
||||
$qb->from('article', 'a')
|
||||
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
|
||||
//->orderBy('a.published_at', $query->filters->sortDirection->value)
|
||||
->orderBy('a.published_at', $query->filters->sortDirection->value)
|
||||
->setParameter('userId', $query->userId->toString())
|
||||
;
|
||||
|
||||
|
||||
+6
-4
@@ -62,15 +62,17 @@ trait ArticleQuery
|
||||
private function applyArticleFilters(QueryBuilder $qb, ArticleFilters $filters): QueryBuilder
|
||||
{
|
||||
if ($filters->category !== null) {
|
||||
// PostgreSQL array containment for single value
|
||||
$qb->andWhere(':category = ANY(a.categories)')
|
||||
->setParameter('category', $filters->category);
|
||||
}
|
||||
|
||||
if ($filters->search !== null) {
|
||||
// Case-insensitive search in PostgreSQL
|
||||
$qb->andWhere('a.title ILIKE :search')
|
||||
->setParameter('search', sprintf('%%%s%%', $filters->search));
|
||||
$qb
|
||||
->addSelect("ts_rank(a.tsv, to_tsquery('french', :search)) AS rank")
|
||||
->andWhere("a.tsv @@ to_tsquery('french', :search)")
|
||||
->setParameter('search', $filters->search)
|
||||
->resetOrderBy()
|
||||
->orderBy('rank', $filters->sortDirection->value);
|
||||
}
|
||||
|
||||
if ($filters->dateRange instanceof DateRange) {
|
||||
|
||||
@@ -4,8 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Basango\IdentityAndAccess\Domain\Model\Entity;
|
||||
|
||||
use Gesdinet\JWTRefreshTokenBundle\Entity\RefreshToken as BaseRefreshToken;
|
||||
use Gesdinet\JWTRefreshTokenBundle\Model\AbstractRefreshToken;
|
||||
|
||||
class RefreshToken extends BaseRefreshToken
|
||||
class RefreshToken extends AbstractRefreshToken
|
||||
{
|
||||
}
|
||||
|
||||
@@ -11,15 +11,15 @@ namespace Basango\SharedKernel\Domain;
|
||||
*/
|
||||
final class Application
|
||||
{
|
||||
public string $name = 'DRC News Corpus';
|
||||
public string $name = 'Basango';
|
||||
|
||||
public string $website = 'https://research.devscast.org/drc-news-corpus';
|
||||
public string $website = 'https://basango.ngandu.dev';
|
||||
|
||||
public string $emailAddress = 'contact@devscast.tech';
|
||||
|
||||
public string $infoAddress = 'contact@devscast.tech';
|
||||
|
||||
public string $emailName = 'DRC News Corpus';
|
||||
public string $emailName = 'Basango';
|
||||
|
||||
public string $legalName = 'Devscast Software SàSu';
|
||||
|
||||
|
||||
+2
-3
@@ -48,14 +48,13 @@ trait PaginationQuery
|
||||
PaginatorKeyset $keyset,
|
||||
SortDirection $direction = SortDirection::DESC
|
||||
): QueryBuilder {
|
||||
$orderDirection = strtoupper($direction->value);
|
||||
$comparisonOperator = $direction === SortDirection::ASC ? '>' : '<';
|
||||
|
||||
if ($keyset->date !== null) {
|
||||
$qb->addOrderBy($keyset->date, $orderDirection);
|
||||
$qb->addOrderBy($keyset->date, $direction->value);
|
||||
}
|
||||
|
||||
$qb->addOrderBy($keyset->id, $orderDirection);
|
||||
$qb->addOrderBy($keyset->id, $direction->value);
|
||||
|
||||
$cursor = PaginationCursor::decode($page->cursor);
|
||||
if (! $cursor instanceof PaginationCursor) {
|
||||
|
||||
@@ -22,9 +22,9 @@ final class DefaultController extends AbstractController
|
||||
public function __invoke(): JsonResponse
|
||||
{
|
||||
return $this->json([
|
||||
'repository' => 'https://github.com/bernard-ng/drc-news-corpus',
|
||||
'title' => 'DRC News Corpus : Towards a scalable and intelligent system for Congolese News curation',
|
||||
'description' => 'The DRC News Corpus is a structured and scalable dataset of news articles sourced from major media outlets covering diverse aspects of the Democratic Republic of Congo (DRC). Designed for efficiency, this system enables the automated collection, processing, and organization of news stories spanning politics, economy, society, culture, environment, and international affairs.',
|
||||
'repository' => 'https://github.com/bernard-ng/basango',
|
||||
'title' => 'Basango : Towards a scalable and intelligent system for Congolese News curation',
|
||||
'description' => 'The Basango is a structured and scalable dataset of news articles sourced from major media outlets covering diverse aspects of the Democratic Republic of Congo (DRC). Designed for efficiency, this system enables the automated collection, processing, and organization of news stories spanning politics, economy, society, culture, environment, and international affairs.',
|
||||
'status' => 200,
|
||||
]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user