feat(db): resumable updates

This commit is contained in:
2025-11-12 17:02:01 +02:00
parent a3f46b6b38
commit b5a24c886a
+86 -9
View File
@@ -18,6 +18,7 @@ type TargetOptions = {
batchSize?: number; batchSize?: number;
pageSize?: number; pageSize?: number;
ignoreColumns?: Record<string, string[]>; ignoreColumns?: Record<string, string[]>;
resume?: boolean;
}; };
const DEFAULT_IGNORE: Record<string, string[]> = { const DEFAULT_IGNORE: Record<string, string[]> = {
@@ -30,6 +31,7 @@ class Engine {
private readonly ignore: Record<string, string[]>; private readonly ignore: Record<string, string[]>;
private readonly pageSize: number; private readonly pageSize: number;
private readonly batchSize: number; private readonly batchSize: number;
private readonly resume: boolean;
constructor( constructor(
private readonly sourceOptions: SourceOptions, private readonly sourceOptions: SourceOptions,
@@ -43,8 +45,9 @@ class Engine {
this.ignore = { ...DEFAULT_IGNORE, ...(this.targetOptions.ignoreColumns ?? {}) }; this.ignore = { ...DEFAULT_IGNORE, ...(this.targetOptions.ignoreColumns ?? {}) };
this.pageSize = this.targetOptions.pageSize ?? 1000; this.pageSize = this.targetOptions.pageSize ?? 1000;
this.batchSize = Math.max(1, this.targetOptions.batchSize ?? 50); this.batchSize = Math.max(1, this.targetOptions.batchSize ?? 50);
this.resume = !!this.targetOptions.resume;
console.log( console.log(
`Engine initialized with pageSize=${this.pageSize} and batchSize=${this.batchSize}`, `Engine initialized with pageSize=${this.pageSize} and batchSize=${this.batchSize} (resume=${this.resume})`,
); );
} }
@@ -53,11 +56,21 @@ class Engine {
} }
async import(table: string): Promise<number> { async import(table: string): Promise<number> {
await this.reset(table); await this.ensureProgressTable();
return await this.paste(table, this.copy(table));
let startOffset = 0;
if (this.resume) {
startOffset = await this.getProgressOffset(table);
console.log(`Resuming import for ${table} from offset=${startOffset}`);
} else {
await this.reset(table);
await this.setProgressOffset(table, 0);
}
return await this.paste(table, this.copy(table, startOffset), startOffset);
} }
private async *copy(table: string): AsyncGenerator<Record<string, unknown>> { private async *copy(table: string, startOffset = 0): AsyncGenerator<Record<string, unknown>> {
const mysql = await import("mysql2/promise"); const mysql = await import("mysql2/promise");
const source = mysql.createPool({ const source = mysql.createPool({
@@ -70,12 +83,12 @@ class Engine {
user: this.sourceOptions.user, user: this.sourceOptions.user,
}); });
let offset = 0; let offset = startOffset;
const size = this.pageSize; const size = this.pageSize;
try { try {
while (true) { while (true) {
const [rows] = await source.query<RowDataPacket[]>( const [rows] = await source.query<RowDataPacket[]>(
`SELECT * FROM \`${this.escapeBacktick(table)}\` LIMIT ? OFFSET ?`, `SELECT * FROM \`${this.escapeBacktick(table)}\` ORDER BY \`id\` LIMIT ? OFFSET ?`,
[size, offset], [size, offset],
); );
@@ -98,12 +111,14 @@ class Engine {
private async paste( private async paste(
table: string, table: string,
rows: AsyncGenerator<Record<string, unknown>>, rows: AsyncGenerator<Record<string, unknown>>,
startOffset = 0,
): Promise<number> { ): Promise<number> {
const target = await this.target.connect(); const target = await this.target.connect();
let total = 0; let total = 0;
let inBatch = 0; let inBatch = 0;
let columns: string[] | null = null; let columns: string[] | null = null;
let insertSql = ""; let insertSql = "";
let upsertSql = "";
const ignored = this.ignoredColumnsFor(table); const ignored = this.ignoredColumnsFor(table);
const ignoredSet = new Set(ignored); const ignoredSet = new Set(ignored);
@@ -131,6 +146,12 @@ class Engine {
const placeholders = columns.map((_, i) => `$${i + 1}`).join(", "); const placeholders = columns.map((_, i) => `$${i + 1}`).join(", ");
insertSql = `INSERT INTO ${this.quote(table)} (${colsSql}) VALUES (${placeholders})`; insertSql = `INSERT INTO ${this.quote(table)} (${colsSql}) VALUES (${placeholders})`;
const updateCols = columns.filter((c) => c !== "id");
const setSql = updateCols
.map((c) => `${this.quote(c)} = EXCLUDED.${this.quote(c)}`)
.join(", ");
upsertSql = `${insertSql} ON CONFLICT ("id") DO UPDATE SET ${setSql}`;
await target.query("BEGIN"); await target.query("BEGIN");
} }
@@ -139,14 +160,14 @@ class Engine {
const params = columns!.map((c) => this.valueForColumn(c, transformed)); const params = columns!.map((c) => this.valueForColumn(c, transformed));
try { try {
await target.query(insertSql, params); await target.query(upsertSql, params);
} catch (err: unknown) { } catch (err: unknown) {
// Fallback: coerce all *_at params to now() and retry once // Fallback: coerce all *_at params to now() and retry once
// This will never happen in production but anyway let's keep it safe // This will never happen in production but anyway let's keep it safe
const msg = String((err as Error)?.message ?? ""); const msg = String((err as Error)?.message ?? "");
if (msg.includes("invalid input syntax for type timestamp")) { if (msg.includes("invalid input syntax for type timestamp")) {
const fixed = columns!.map((c, i) => (c.endsWith("_at") ? new Date() : params[i])); const fixed = columns!.map((c, i) => (c.endsWith("_at") ? new Date() : params[i]));
await target.query(insertSql, fixed); await target.query(upsertSql, fixed);
} else { } else {
throw err; throw err;
} }
@@ -155,6 +176,7 @@ class Engine {
inBatch++; inBatch++;
if (inBatch >= this.batchSize) { if (inBatch >= this.batchSize) {
await this.setProgressOffset(table, startOffset + total, target);
await target.query("COMMIT"); await target.query("COMMIT");
inBatch = 0; inBatch = 0;
await target.query("BEGIN"); await target.query("BEGIN");
@@ -163,6 +185,7 @@ class Engine {
} }
if (inBatch > 0) { if (inBatch > 0) {
await this.setProgressOffset(table, startOffset + total, target);
await target.query("COMMIT"); await target.query("COMMIT");
} }
} catch (e) { } catch (e) {
@@ -385,6 +408,56 @@ class Engine {
private escapeBacktick(id: string) { private escapeBacktick(id: string) {
return id.replaceAll("`", "``"); return id.replaceAll("`", "``");
} }
private async ensureProgressTable() {
const client = await this.target.connect();
try {
await client.query(
`CREATE TABLE IF NOT EXISTS "__sync_state" (
table_name text PRIMARY KEY,
last_offset integer NOT NULL DEFAULT 0,
updated_at timestamp NOT NULL DEFAULT NOW()
)`,
);
} finally {
client.release();
}
}
private async getProgressOffset(table: string): Promise<number> {
const client = await this.target.connect();
try {
const { rows } = await client.query<{ last_offset: number }>(
`SELECT last_offset FROM "__sync_state" WHERE table_name = $1`,
[this.normalizedName(table)],
);
return rows[0]?.last_offset ?? 0;
} finally {
client.release();
}
}
private async setProgressOffset(
table: string,
offset: number,
client?: PoolClient,
): Promise<void> {
const run = async (c: PoolClient) => {
await c.query(
`INSERT INTO "__sync_state" (table_name, last_offset, updated_at)
VALUES ($1, $2, NOW())
ON CONFLICT (table_name) DO UPDATE SET last_offset = EXCLUDED.last_offset, updated_at = NOW()`,
[this.normalizedName(table), offset],
);
};
if (client) return run(client);
const c = await this.target.connect();
try {
await run(c);
} finally {
c.release();
}
}
} }
function bufferToUuid(buf: Buffer): string { function bufferToUuid(buf: Buffer): string {
@@ -410,6 +483,10 @@ async function safeRollback(client: PoolClient) {
} }
async function main() { async function main() {
const argv = process.argv.slice(2);
const resume = argv.includes("--resume") || argv.includes("-r");
const tables = argv.filter((a) => !a.startsWith("-"));
const engine = new Engine( const engine = new Engine(
{ {
database: env("BASANGO_SOURCE_DATABASE_NAME"), database: env("BASANGO_SOURCE_DATABASE_NAME"),
@@ -419,11 +496,11 @@ async function main() {
}, },
{ {
database: env("BASANGO_DATABASE_URL"), database: env("BASANGO_DATABASE_URL"),
resume,
}, },
); );
try { try {
const tables = process.argv.slice(2);
if (tables.length === 0) tables.push("user", "source", "article"); if (tables.length === 0) tables.push("user", "source", "article");
for (const t of tables) { for (const t of tables) {
const count = await engine.import(t); const count = await engine.import(t);