[backend] from mariadb to postgres

This commit is contained in:
2025-10-20 13:50:56 +02:00
parent 49733e03b7
commit c334452426
69 changed files with 1082 additions and 1506 deletions
@@ -32,7 +32,7 @@ final readonly class GetArticlesForExportDbalHandler implements GetArticlesForEx
'a.id as article_id',
'a.title as article_title',
'a.link as article_link',
'a.categories as article_categories',
"array_to_string(a.categories, ',') as article_categories",
'a.body as article_body',
's.name as article_source',
'a.hash as article_hash',
@@ -49,7 +49,7 @@ final readonly class GetArticlesForExportDbalHandler implements GetArticlesForEx
}
if ($query->date instanceof DateRange) {
$qb->andWhere('a.published_at BETWEEN :start AND :end')
$qb->andWhere('a.published_at BETWEEN to_timestamp(:start) AND to_timestamp(:end)')
->setParameter('start', $query->date->start)
->setParameter('end', $query->date->end);
}
@@ -34,8 +34,8 @@ final readonly class GetEarliestPublicationDateDBalHandler implements GetEarlies
->setParameter('source', $query->source);
if ($query->category !== null) {
$qb->andWhere('a.categories LIKE :category')
->setParameter('category', sprintf('%%%s%%', $query->category));
$qb->andWhere(':category = ANY(a.categories)')
->setParameter('category', $query->category);
}
try {
@@ -34,8 +34,8 @@ final readonly class GetLatestPublicationDateDBalHandler implements GetLatestPub
->setParameter('source', $query->source);
if ($query->category !== null) {
$qb->andWhere('a.categories LIKE :category')
->setParameter('category', sprintf('%%%s%%', $query->category));
$qb->andWhere(':category = ANY(a.categories)')
->setParameter('category', $query->category);
}
try {
@@ -34,7 +34,7 @@ final readonly class GetSourceStatisticsListDbalHandler implements GetSourceStat
)
->from('source', 's')
->leftJoin('s', 'article', 'a', 'a.source_id = s.id')
->groupBy('s.id')
->groupBy('s.id, s.name')
->orderBy('s.name', 'ASC');
try {
@@ -20,6 +20,7 @@ final class OpenGraphType extends Type
{
return $platform->getJsonTypeDeclarationSQL([
'nullable' => true,
'jsonb' => true,
]);
}
@@ -9,6 +9,7 @@ use Basango\Aggregator\Domain\Model\Entity\Article;
use Basango\Aggregator\Domain\Model\Identity\ArticleId;
use Basango\Aggregator\Domain\Model\Repository\ArticleRepository;
use Basango\SharedKernel\Domain\Model\ValueObject\DateRange;
use DateTimeImmutable;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\Persistence\ManagerRegistry;
@@ -69,9 +70,9 @@ final class ArticleOrmRepository extends ServiceEntityRepository implements Arti
}
if ($date instanceof DateRange) {
$qb->andWhere('a.publishedAt BETWEEN FROM_UNIXTIME(:start) AND FROM_UNIXTIME(:end)')
->setParameter('start', $date->start)
->setParameter('end', $date->end);
$qb->andWhere('a.publishedAt BETWEEN :startDate AND :endDate')
->setParameter('startDate', new DateTimeImmutable()->setTimestamp($date->start))
->setParameter('endDate', new DateTimeImmutable()->setTimestamp($date->end));
}
$limit = 1000;
@@ -11,7 +11,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\Features\PaginationQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetArticleCommentListDbalHandler.
@@ -41,7 +40,7 @@ final readonly class GetArticleCommentListDbalHandler implements GetArticleComme
->innerJoin('c', 'user', 'u', 'c.user_id = u.id')
->where('c.article_id = :articleId')
->orderBy('c.created_at', 'DESC')
->setParameter('articleId', $query->articleId->toBinary(), ParameterType::BINARY);
->setParameter('articleId', $query->articleId->toRfc4122());
$qb = $this->applyCursorPagination($qb, $query->page, new PaginatorKeyset('c.id', 'c.created_at'));
@@ -13,7 +13,6 @@ use Basango\FeedManagement\Infrastructure\Persistence\Doctrine\DBAL\Queries\Book
use Basango\FeedManagement\Infrastructure\Persistence\Doctrine\DBAL\Queries\SourceQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetArticleDetailsDbalHandler.
@@ -42,8 +41,8 @@ final readonly class GetArticleDetailsDbalHandler implements GetArticleDetailsHa
$qb->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->from('article', 'a')
->where('a.id = :articleId')
->setParameter('articleId', $query->id->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY)
->setParameter('articleId', $query->id->toRfc4122())
->setParameter('userId', $query->userId->toRfc4122())
;
try {
@@ -14,7 +14,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\Features\PaginationQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetArticleOverviewListDbalHandler.
@@ -44,7 +43,7 @@ final readonly class GetArticleOverviewListDbalHandler implements GetArticleOver
$qb->from('article', 'a')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
//->orderBy('a.published_at', $query->filters->sortDirection->value)
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toRfc4122())
;
$qb = $this->applyArticleFilters($qb, $query->filters);
@@ -12,7 +12,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\Features\PaginationQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetBookmarkListDbalHandler.
@@ -39,7 +38,7 @@ final readonly class GetBookmarkListDbalHandler implements GetBookmarkListHandle
->where('b.user_id = :userId')
->groupBy('b.id')
->orderBy('b.id', 'DESC')
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toRfc4122())
;
$qb = $this->applyCursorPagination($qb, $query->page, new PaginatorKeyset('b.id'));
@@ -13,7 +13,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\Features\PaginationQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetBookmarkedArticleListDbalHandler.
@@ -44,8 +43,8 @@ final readonly class GetBookmarkedArticleListDbalHandler implements GetBookmarke
->innerJoin('ba', 'bookmark', 'b', 'b.id = ba.bookmark_id AND b.user_id = :userId')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->where('b.id = :bookmarkId')
->setParameter('bookmarkId', $query->bookmarkId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY)
->setParameter('bookmarkId', $query->bookmarkId->toRfc4122())
->setParameter('userId', $query->userId->toRfc4122())
;
$qb = $this->applyArticleFilters($qb, $query->filters);
@@ -14,7 +14,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\Features\PaginationQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetArticleOverviewListDbalHandler.
@@ -45,8 +44,8 @@ final readonly class GetSourceArticleOverviewListDbalHandler implements GetSourc
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->where('s.id = :sourceId')
->orderBy('a.published_at', $query->filters->sortDirection->value)
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY)
->setParameter('sourceId', $query->sourceId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toRfc4122())
->setParameter('sourceId', $query->sourceId->toRfc4122())
;
$qb = $this->applyArticleFilters($qb, $query->filters);
@@ -49,8 +49,10 @@ final readonly class GetSourceDetailsDbalHandler implements GetSourceDetailsHand
$qb->from('source', 's')
->leftJoin('s', 'article', 'a', 'a.source_id = s.id')
->where('s.id = :sourceId')
->setParameter('sourceId', $query->sourceId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY);
->setParameter('sourceId', $query->sourceId->toRfc4122())
->setParameter('userId', $query->userId->toRfc4122());
// Aggregate columns are selected; include non-aggregated columns in GROUP BY for PostgreSQL
$qb->groupBy('s.id, s.name, s.description, s.url, s.updated_at, s.display_name, s.bias, s.reliability, s.transparency');
try {
$data = $qb->executeQuery()->fetchAssociative();
@@ -79,10 +81,10 @@ final readonly class GetSourceDetailsDbalHandler implements GetSourceDetailsHand
->from('article', 'a')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->where(' s.id = :sourceId')
->andWhere('a.published_at BETWEEN FROM_UNIXTIME(:start) AND FROM_UNIXTIME(:end)')
->andWhere('a.published_at BETWEEN to_timestamp(:start) AND to_timestamp(:end)')
->groupBy('day')
->orderBy('day', 'ASC')
->setParameter('sourceId', $query->sourceId->toBinary(), ParameterType::BINARY)
->setParameter('sourceId', $query->sourceId->toRfc4122())
->setParameter('start', $dateRange->start, ParameterType::INTEGER)
->setParameter('end', $dateRange->end, ParameterType::INTEGER)
->enableResultCache(new QueryCacheProfile(SourceCacheAttributes::CACHE_TTL, $cacheKey));
@@ -120,11 +122,11 @@ final readonly class GetSourceDetailsDbalHandler implements GetSourceDetailsHand
{
$cacheKey = SourceCacheAttributes::CATEGORIES->withId($query->sourceId->toString());
$qb = $this->connection->createQueryBuilder()
->select('a.categories')
->select("array_to_string(a.categories, ',') AS categories")
->from('article', 'a')
->innerJoin('a', 'source', 's', 'a.source_id = s.id')
->where('s.id = :sourceId')
->setParameter('sourceId', $query->sourceId->toBinary(), ParameterType::BINARY)
->setParameter('sourceId', $query->sourceId->toRfc4122())
->enableResultCache(new QueryCacheProfile(SourceCacheAttributes::CACHE_TTL, $cacheKey));
try {
@@ -12,7 +12,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\Features\PaginationQuery;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\DBAL\NoResult;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetSourceOverviewListDbalHandler.
@@ -37,8 +36,7 @@ final readonly class GetSourceOverviewListDbalHandler implements GetSourceOvervi
$qb = $this->addFollowedSourceExistsQuery($qb);
$qb->from('source', 's')
->groupBy('s.name')
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $query->userId->toRfc4122())
;
$qb = $this->applyCursorPagination($qb, $query->page, new PaginatorKeyset('s.id', 's.created_at'));
@@ -22,7 +22,7 @@ trait ArticleQuery
'a.id as article_id',
'a.title as article_title',
'a.link as article_link',
'a.categories as article_categories',
"array_to_string(a.categories, ',') as article_categories",
'a.excerpt as article_excerpt',
'a.published_at as article_published_at',
'a.image as article_image',
@@ -36,7 +36,7 @@ trait ArticleQuery
'a.id as article_id',
'a.title as article_title',
'a.link as article_link',
'a.categories as article_categories',
"array_to_string(a.categories, ',') as article_categories",
'a.body as article_body',
'a.hash as article_hash',
'a.published_at as article_published_at',
@@ -62,17 +62,19 @@ trait ArticleQuery
private function applyArticleFilters(QueryBuilder $qb, ArticleFilters $filters): QueryBuilder
{
if ($filters->category !== null) {
$qb->andWhere('a.categories LIKE :category')
->setParameter('category', sprintf('%%%s%%', $filters->category));
// PostgreSQL array containment for single value
$qb->andWhere(':category = ANY(a.categories)')
->setParameter('category', $filters->category);
}
if ($filters->search !== null) {
$qb->andWhere('a.title LIKE :search')
// Case-insensitive search in PostgreSQL
$qb->andWhere('a.title ILIKE :search')
->setParameter('search', sprintf('%%%s%%', $filters->search));
}
if ($filters->dateRange instanceof DateRange) {
$qb->andWhere('a.published_at BETWEEN FROM_UNIXTIME(:start) AND FROM_UNIXTIME(:end)')
$qb->andWhere('a.published_at BETWEEN to_timestamp(:start) AND to_timestamp(:end)')
->setParameter('start', $filters->dateRange->start, ParameterType::INTEGER)
->setParameter('end', $filters->dateRange->end, ParameterType::INTEGER);
}
@@ -40,7 +40,7 @@ trait SourceQuery
"CONCAT('https://devscast.org/images/sources/', s.name, '.png') as source_image",
'COUNT(a.hash) AS articles_count',
'MAX(a.crawled_at) AS source_crawled_at',
'COUNT(CASE WHEN a.metadata IS NOT NULL THEN 1 ELSE NULL END) AS articles_metadata_available',
'COUNT(*) FILTER (WHERE a.metadata IS NOT NULL) AS articles_metadata_available',
);
}
@@ -9,7 +9,6 @@ use Basango\FeedManagement\Domain\Model\Entity\FollowedSource;
use Basango\FeedManagement\Domain\Model\Repository\FollowedSourceRepository;
use Basango\IdentityAndAccess\Domain\Model\Identity\UserId;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\DBAL\ParameterType;
use Doctrine\Persistence\ManagerRegistry;
/**
@@ -41,10 +40,10 @@ final class FollowedSourceOrmRepository extends ServiceEntityRepository implemen
public function getByUserId(UserId $userId, SourceId $sourceId): ?FollowedSource
{
return $this->createQueryBuilder('fs')
->andWhere('fs.follower = :userId')
->andWhere('fs.source = :sourceId')
->setParameter('sourceId', $sourceId->toBinary(), ParameterType::BINARY)
->setParameter('userId', $userId->toBinary(), ParameterType::BINARY)
->andWhere('IDENTITY(fs.follower) = :userId')
->andWhere('IDENTITY(fs.source) = :sourceId')
->setParameter('sourceId', $sourceId->toRfc4122())
->setParameter('userId', $userId->toRfc4122())
->getQuery()
->getOneOrNullResult();
}
@@ -33,7 +33,7 @@ class User
public readonly UserId $id;
private function __construct(
public function __construct(
private(set) string $name,
private(set) EmailAddress $email,
private(set) Roles $roles,
@@ -9,7 +9,6 @@ use Basango\IdentityAndAccess\Application\UseCase\Query\GetUserProfile;
use Basango\IdentityAndAccess\Application\UseCase\QueryHandler\GetUserProfileHandler;
use Basango\IdentityAndAccess\Domain\Exception\UserNotFound;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\ParameterType;
/**
* Class GetUserProfileDbalHandler.
@@ -35,7 +34,7 @@ final readonly class GetUserProfileDbalHandler implements GetUserProfileHandler
)
->from('user', 'u')
->where('u.id = :userId')
->setParameter('userId', $query->userId->toBinary(), ParameterType::BINARY);
->setParameter('userId', $query->userId->toRfc4122());
/** @var array<string, mixed>|false $data */
$data = $qb->executeQuery()->fetchAssociative();
@@ -52,7 +52,7 @@ final class LoginAttemptOrmRepository extends ServiceEntityRepository implements
$this->createQueryBuilder('la')
->delete(LoginAttempt::class, 'la')
->where('la.user = :user')
->setParameter('user', $user->id->toBinary())
->setParameter('user', $user)
->getQuery()
->execute();
}
@@ -8,7 +8,6 @@ use Basango\SharedKernel\Domain\Model\Pagination\Page;
use Basango\SharedKernel\Domain\Model\Pagination\PaginationCursor;
use Basango\SharedKernel\Domain\Model\Pagination\PaginationInfo;
use Basango\SharedKernel\Domain\Model\Pagination\PaginatorKeyset;
use Doctrine\DBAL\ParameterType;
use Doctrine\DBAL\Query\QueryBuilder;
/**
@@ -41,12 +40,12 @@ trait PaginationQuery
if ($keyset->date === null) {
$qb
->andWhere(sprintf('%s <= :cursorLastId', $keyset->id))
->setParameter('cursorLastId', $cursor->id->toString(), ParameterType::BINARY);
->setParameter('cursorLastId', $cursor->id->toRfc4122());
} else {
$qb
->andWhere(sprintf('(%s, %s) <= (:cursorLastDate, :cursorLastId)', $keyset->date, $keyset->id))
->setParameter('cursorLastDate', $cursor->id->toBinary(), ParameterType::BINARY)
->setParameter('cursorLastId', $cursor->date->format('Y-m-d H:i:s'));
->setParameter('cursorLastDate', $cursor->date->format('Y-m-d H:i:s'))
->setParameter('cursorLastId', $cursor->id->toRfc4122());
}
return $qb->setMaxResults($page->limit + 1);
@@ -0,0 +1,301 @@
<?php
declare(strict_types=1);
namespace Basango\SharedKernel\Infrastructure\Persistence\Doctrine\Importer;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Statement;
use Doctrine\ORM\EntityManagerInterface;
use Generator;
use PDO;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Component\Uid\Uuid;
use Throwable;
/**
* ImportEngine: unified, naming-accurate API for migrating data
* from a source database (old MariaDB over PDO) to a target database
* (new PostgreSQL via Doctrine DBAL/ORM).
*
* - Source: MariaDB/MySQL via PDO (unbuffered)
* - Target: PostgreSQL via Doctrine DBAL/ORM
*
* Memory tactics:
* - Reuse a fixed-size params array for inserts (no per-row allocations)
* - Stream source rows unbuffered; close cursor in finally
* - Batch transactions; commit regularly
* - Disable DBAL middlewares/loggers; disable PDO emulate prepares
* - Periodic gc_collect_cycles() on long runs
*/
final readonly class ImportEngine
{
/**
* Columns to ignore per target table.
* Key = normalized table name (lowercase, unquoted),
* Value = list of column names to exclude from insert.
*/
private const array IGNORE_COLUMNS = [
'article' => ['tsv', 'image', 'excerpt'],
];
private Connection $targetConnection;
private PDO $sourceConnection;
public function __construct(
private EntityManagerInterface $em,
#[Autowire(env: 'SOURCE_DATABASE_HOST')] private string $host,
#[Autowire(env: 'SOURCE_DATABASE_USER')] private string $user,
#[Autowire(env: 'SOURCE_DATABASE_PASS')] private string $pass,
#[Autowire(env: 'SOURCE_DATABASE_PORT')] private int $port = 3306,
#[Autowire(env: 'SOURCE_DATABASE_NAME')] private string $name = 'app',
) {
// Target (PostgreSQL via Doctrine DBAL)
$this->targetConnection = $this->em->getConnection();
$this->targetConnection->getConfiguration()->setMiddlewares([]);
// If DBAL exposes a native PDO, harden it for low memory
try {
$native = $this->targetConnection->getNativeConnection();
if ($native instanceof PDO) {
// Use server-side prepares; avoids driver-side buffering
$native->setAttribute(PDO::ATTR_EMULATE_PREPARES, false);
$native->setAttribute(PDO::ATTR_STRINGIFY_FETCHES, false);
$native->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
}
} catch (\Throwable) {
// If the platform/driver doesnt expose a PDO, ignore safely
}
// Source (MariaDB/MySQL via PDO), unbuffered
$this->sourceConnection = new PDO(
dsn: sprintf('mysql:host=%s;port=%d;dbname=%s;charset=utf8mb4', $this->host, $this->port, $this->name),
username: $this->user,
password: $this->pass,
options: [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
]
);
// Unbuffered cursor (critical for memory)
if (defined('PDO::MYSQL_ATTR_USE_BUFFERED_QUERY')) {
$this->sourceConnection->setAttribute(constant('PDO::MYSQL_ATTR_USE_BUFFERED_QUERY'), false);
}
}
public function import(string $table, int $batchSize = 1000): int
{
$this->reset($table);
$rows = $this->copy($table);
return $this->paste($table, $rows, $batchSize);
}
/**
* Truncate target table safely with replication role toggling.
*/
private function reset(string $tableName): void
{
$platform = $this->targetConnection->getDatabasePlatform();
$this->targetConnection->beginTransaction();
try {
$this->targetConnection->executeStatement("SET session_replication_role = 'replica'");
$sql = $platform->getTruncateTableSQL($tableName, true);
$this->targetConnection->executeStatement($sql);
$this->targetConnection->executeStatement("SET session_replication_role = 'origin'");
$this->targetConnection->commit();
} catch (Throwable $e) {
if ($this->targetConnection->isTransactionActive()) {
$this->targetConnection->rollBack();
}
throw $e;
}
}
/**
* Stream rows from MySQL unbuffered; ensure cursor is always closed.
*/
private function copy(string $table): iterable
{
$sql = sprintf('SELECT * FROM `%s`', str_replace('`', '', $table));
$stmt = $this->sourceConnection->query($sql);
if ($stmt === false) {
// Return an empty iterable on failure
return [];
}
return (function () use ($stmt): Generator {
try {
while (($row = $stmt->fetch(PDO::FETCH_ASSOC)) !== false) {
yield $row;
}
} finally {
// Free server resources ASAP
$stmt->closeCursor();
}
})();
}
/**
* Insert rows into PostgreSQL with minimal allocations.
* - Fixed-size $params array reused per row
* - Batch transactions to limit peak memory
* - Periodic GC for long streams
*/
private function paste(string $table, iterable $rows, int $batchSize = 1000): int
{
if ($batchSize <= 0) {
$batchSize = 1000;
}
$platform = $this->targetConnection->getDatabasePlatform();
$quote = static fn (string|int $id) => $platform->quoteIdentifier((string) $id);
$ignored = $this->ignoredColumnsFor($table);
$ignoredFlip = $ignored !== [] ? array_flip($ignored) : [];
$columns = null;
$statement = null;
$params = null; // fixed-size, reused
$total = 0;
$inBatch = 0;
try {
foreach ($rows as $row) {
// Build statement on first row (after ignoring columns)
if ($columns === null) {
if ($ignoredFlip !== []) {
$row = array_diff_key($row, $ignoredFlip);
}
/** @var list<string> $columns */
$columns = array_map(static fn (int|string $k): string => (string) $k, array_keys($row));
$columnList = implode(', ', array_map($quote, $columns));
$placeholders = implode(', ', array_fill(0, count($columns), '?'));
$sql = sprintf('INSERT INTO %s (%s) VALUES (%s)', $quote($table), $columnList, $placeholders);
$statement = $this->targetConnection->prepare($sql);
// Allocate params array once, with fixed size
$params = array_fill(0, count($columns), null);
// Begin first batch transaction
$this->targetConnection->beginTransaction();
}
// Fill params by index (avoid per-row array allocs)
$i = 0;
foreach ($columns as $col) {
$val = $row[$col] ?? null;
if ($val !== null) {
// Convert BINARY(16) UUIDs to canonical RFC4122
if ($col === 'id' || str_ends_with((string) $col, '_id')) {
$params[$i++] = Uuid::fromBinary($val)->toRfc4122();
continue;
}
// Convert invalid date to now()
if (str_ends_with((string) $col, '_at') && $val === '0000-00-00 00:00:00') {
$val = new \DateTimeImmutable('now')->format('Y-m-d H:i:s');
$params[$i++] = $val;
continue;
}
// Convert categories to PG text[] literal cheaply
if ($col === 'categories') {
if (is_string($val)) {
$val = $this->ensureUtf8String($val);
}
$params[$i++] = sprintf('{%s}', $val);
continue;
}
if (is_string($val)) {
$params[$i++] = $this->ensureUtf8String($val);
continue;
}
}
$params[$i++] = $val;
}
if (! $statement instanceof Statement) {
throw new \LogicException('Insert statement not initialized.');
}
// @phpstan-ignore-next-line
$statement->executeStatement($params);
$total++;
$inBatch++;
if ($inBatch >= $batchSize) {
$this->targetConnection->commit();
$inBatch = 0;
// Start next batch transaction
$this->targetConnection->beginTransaction();
// Help GC on very long imports
if (($total % ($batchSize * 5)) === 0) {
gc_collect_cycles();
}
}
}
// Commit trailing rows if any
if ($inBatch > 0 && $this->targetConnection->isTransactionActive()) {
$this->targetConnection->commit();
}
} catch (Throwable $e) {
if ($this->targetConnection->isTransactionActive()) {
$this->targetConnection->rollBack();
}
// Keep failure payloads small to avoid memory spikes
throw $e;
} finally {
// Release large references promptly
$statement = null;
$columns = null;
$params = null;
gc_collect_cycles();
}
return $total;
}
private function ignoredColumnsFor(string $table): array
{
$normalized = strtolower(trim($table, '`"'));
return self::IGNORE_COLUMNS[$normalized] ?? [];
}
/**
* Keep it cheap: fast-path valid UTF-8; otherwise minimal conversions.
*/
private function ensureUtf8String(string $value): string
{
// Fast path: valid UTF-8
if (@preg_match('//u', $value) === 1) {
return $value;
}
// Try common legacy encodings with transliteration
$converted = @iconv('CP1252', 'UTF-8//TRANSLIT', $value);
if ($converted === false) {
$converted = @iconv('ISO-8859-1', 'UTF-8//TRANSLIT', $value);
}
if ($converted === false) {
// Last resort: drop invalid sequences
$converted = @iconv('UTF-8', 'UTF-8//IGNORE', $value);
}
return $converted !== false ? $converted : $value;
}
}
@@ -0,0 +1,54 @@
<?php
declare(strict_types=1);
namespace Basango\SharedKernel\Presentation\Console;
use Basango\SharedKernel\Infrastructure\Persistence\Doctrine\Importer\ImportEngine;
use Override;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:sync-import',
description: 'from mariadb to postgres'
)]
class SyncImport extends Command
{
use AskArgumentFeature;
private SymfonyStyle $io;
public function __construct(
private readonly ImportEngine $importEngine
) {
parent::__construct();
}
#[Override]
protected function initialize(InputInterface $input, OutputInterface $output): void
{
$this->io = new SymfonyStyle($input, $output);
}
#[Override]
protected function execute(InputInterface $input, OutputInterface $output): int
{
if (! $this->io->confirm('Do you want to continue?', false)) {
$this->io->warning('Process aborted');
return Command::FAILURE;
}
$tables = ['user', 'source', 'article'];
foreach ($tables as $table) {
$count = $this->importEngine->import($table);
$this->io->text(sprintf('Imported %d records into %s table.', $count, $table));
}
$this->io->success('Source add successfully');
return Command::SUCCESS;
}
}