feat(db): resumable updates
This commit is contained in:
@@ -18,6 +18,7 @@ type TargetOptions = {
|
|||||||
batchSize?: number;
|
batchSize?: number;
|
||||||
pageSize?: number;
|
pageSize?: number;
|
||||||
ignoreColumns?: Record<string, string[]>;
|
ignoreColumns?: Record<string, string[]>;
|
||||||
|
resume?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
const DEFAULT_IGNORE: Record<string, string[]> = {
|
const DEFAULT_IGNORE: Record<string, string[]> = {
|
||||||
@@ -30,6 +31,7 @@ class Engine {
|
|||||||
private readonly ignore: Record<string, string[]>;
|
private readonly ignore: Record<string, string[]>;
|
||||||
private readonly pageSize: number;
|
private readonly pageSize: number;
|
||||||
private readonly batchSize: number;
|
private readonly batchSize: number;
|
||||||
|
private readonly resume: boolean;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly sourceOptions: SourceOptions,
|
private readonly sourceOptions: SourceOptions,
|
||||||
@@ -43,8 +45,9 @@ class Engine {
|
|||||||
this.ignore = { ...DEFAULT_IGNORE, ...(this.targetOptions.ignoreColumns ?? {}) };
|
this.ignore = { ...DEFAULT_IGNORE, ...(this.targetOptions.ignoreColumns ?? {}) };
|
||||||
this.pageSize = this.targetOptions.pageSize ?? 1000;
|
this.pageSize = this.targetOptions.pageSize ?? 1000;
|
||||||
this.batchSize = Math.max(1, this.targetOptions.batchSize ?? 50);
|
this.batchSize = Math.max(1, this.targetOptions.batchSize ?? 50);
|
||||||
|
this.resume = !!this.targetOptions.resume;
|
||||||
console.log(
|
console.log(
|
||||||
`Engine initialized with pageSize=${this.pageSize} and batchSize=${this.batchSize}`,
|
`Engine initialized with pageSize=${this.pageSize} and batchSize=${this.batchSize} (resume=${this.resume})`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,11 +56,21 @@ class Engine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async import(table: string): Promise<number> {
|
async import(table: string): Promise<number> {
|
||||||
await this.reset(table);
|
await this.ensureProgressTable();
|
||||||
return await this.paste(table, this.copy(table));
|
|
||||||
|
let startOffset = 0;
|
||||||
|
if (this.resume) {
|
||||||
|
startOffset = await this.getProgressOffset(table);
|
||||||
|
console.log(`Resuming import for ${table} from offset=${startOffset}`);
|
||||||
|
} else {
|
||||||
|
await this.reset(table);
|
||||||
|
await this.setProgressOffset(table, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return await this.paste(table, this.copy(table, startOffset), startOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async *copy(table: string): AsyncGenerator<Record<string, unknown>> {
|
private async *copy(table: string, startOffset = 0): AsyncGenerator<Record<string, unknown>> {
|
||||||
const mysql = await import("mysql2/promise");
|
const mysql = await import("mysql2/promise");
|
||||||
|
|
||||||
const source = mysql.createPool({
|
const source = mysql.createPool({
|
||||||
@@ -70,12 +83,12 @@ class Engine {
|
|||||||
user: this.sourceOptions.user,
|
user: this.sourceOptions.user,
|
||||||
});
|
});
|
||||||
|
|
||||||
let offset = 0;
|
let offset = startOffset;
|
||||||
const size = this.pageSize;
|
const size = this.pageSize;
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
const [rows] = await source.query<RowDataPacket[]>(
|
const [rows] = await source.query<RowDataPacket[]>(
|
||||||
`SELECT * FROM \`${this.escapeBacktick(table)}\` LIMIT ? OFFSET ?`,
|
`SELECT * FROM \`${this.escapeBacktick(table)}\` ORDER BY \`id\` LIMIT ? OFFSET ?`,
|
||||||
[size, offset],
|
[size, offset],
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -98,12 +111,14 @@ class Engine {
|
|||||||
private async paste(
|
private async paste(
|
||||||
table: string,
|
table: string,
|
||||||
rows: AsyncGenerator<Record<string, unknown>>,
|
rows: AsyncGenerator<Record<string, unknown>>,
|
||||||
|
startOffset = 0,
|
||||||
): Promise<number> {
|
): Promise<number> {
|
||||||
const target = await this.target.connect();
|
const target = await this.target.connect();
|
||||||
let total = 0;
|
let total = 0;
|
||||||
let inBatch = 0;
|
let inBatch = 0;
|
||||||
let columns: string[] | null = null;
|
let columns: string[] | null = null;
|
||||||
let insertSql = "";
|
let insertSql = "";
|
||||||
|
let upsertSql = "";
|
||||||
|
|
||||||
const ignored = this.ignoredColumnsFor(table);
|
const ignored = this.ignoredColumnsFor(table);
|
||||||
const ignoredSet = new Set(ignored);
|
const ignoredSet = new Set(ignored);
|
||||||
@@ -131,6 +146,12 @@ class Engine {
|
|||||||
const placeholders = columns.map((_, i) => `$${i + 1}`).join(", ");
|
const placeholders = columns.map((_, i) => `$${i + 1}`).join(", ");
|
||||||
insertSql = `INSERT INTO ${this.quote(table)} (${colsSql}) VALUES (${placeholders})`;
|
insertSql = `INSERT INTO ${this.quote(table)} (${colsSql}) VALUES (${placeholders})`;
|
||||||
|
|
||||||
|
const updateCols = columns.filter((c) => c !== "id");
|
||||||
|
const setSql = updateCols
|
||||||
|
.map((c) => `${this.quote(c)} = EXCLUDED.${this.quote(c)}`)
|
||||||
|
.join(", ");
|
||||||
|
upsertSql = `${insertSql} ON CONFLICT ("id") DO UPDATE SET ${setSql}`;
|
||||||
|
|
||||||
await target.query("BEGIN");
|
await target.query("BEGIN");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,14 +160,14 @@ class Engine {
|
|||||||
const params = columns!.map((c) => this.valueForColumn(c, transformed));
|
const params = columns!.map((c) => this.valueForColumn(c, transformed));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await target.query(insertSql, params);
|
await target.query(upsertSql, params);
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
// Fallback: coerce all *_at params to now() and retry once
|
// Fallback: coerce all *_at params to now() and retry once
|
||||||
// This will never happen in production but anyway let's keep it safe
|
// This will never happen in production but anyway let's keep it safe
|
||||||
const msg = String((err as Error)?.message ?? "");
|
const msg = String((err as Error)?.message ?? "");
|
||||||
if (msg.includes("invalid input syntax for type timestamp")) {
|
if (msg.includes("invalid input syntax for type timestamp")) {
|
||||||
const fixed = columns!.map((c, i) => (c.endsWith("_at") ? new Date() : params[i]));
|
const fixed = columns!.map((c, i) => (c.endsWith("_at") ? new Date() : params[i]));
|
||||||
await target.query(insertSql, fixed);
|
await target.query(upsertSql, fixed);
|
||||||
} else {
|
} else {
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
@@ -155,6 +176,7 @@ class Engine {
|
|||||||
inBatch++;
|
inBatch++;
|
||||||
|
|
||||||
if (inBatch >= this.batchSize) {
|
if (inBatch >= this.batchSize) {
|
||||||
|
await this.setProgressOffset(table, startOffset + total, target);
|
||||||
await target.query("COMMIT");
|
await target.query("COMMIT");
|
||||||
inBatch = 0;
|
inBatch = 0;
|
||||||
await target.query("BEGIN");
|
await target.query("BEGIN");
|
||||||
@@ -163,6 +185,7 @@ class Engine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inBatch > 0) {
|
if (inBatch > 0) {
|
||||||
|
await this.setProgressOffset(table, startOffset + total, target);
|
||||||
await target.query("COMMIT");
|
await target.query("COMMIT");
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -385,6 +408,56 @@ class Engine {
|
|||||||
private escapeBacktick(id: string) {
|
private escapeBacktick(id: string) {
|
||||||
return id.replaceAll("`", "``");
|
return id.replaceAll("`", "``");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async ensureProgressTable() {
|
||||||
|
const client = await this.target.connect();
|
||||||
|
try {
|
||||||
|
await client.query(
|
||||||
|
`CREATE TABLE IF NOT EXISTS "__sync_state" (
|
||||||
|
table_name text PRIMARY KEY,
|
||||||
|
last_offset integer NOT NULL DEFAULT 0,
|
||||||
|
updated_at timestamp NOT NULL DEFAULT NOW()
|
||||||
|
)`,
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getProgressOffset(table: string): Promise<number> {
|
||||||
|
const client = await this.target.connect();
|
||||||
|
try {
|
||||||
|
const { rows } = await client.query<{ last_offset: number }>(
|
||||||
|
`SELECT last_offset FROM "__sync_state" WHERE table_name = $1`,
|
||||||
|
[this.normalizedName(table)],
|
||||||
|
);
|
||||||
|
return rows[0]?.last_offset ?? 0;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async setProgressOffset(
|
||||||
|
table: string,
|
||||||
|
offset: number,
|
||||||
|
client?: PoolClient,
|
||||||
|
): Promise<void> {
|
||||||
|
const run = async (c: PoolClient) => {
|
||||||
|
await c.query(
|
||||||
|
`INSERT INTO "__sync_state" (table_name, last_offset, updated_at)
|
||||||
|
VALUES ($1, $2, NOW())
|
||||||
|
ON CONFLICT (table_name) DO UPDATE SET last_offset = EXCLUDED.last_offset, updated_at = NOW()`,
|
||||||
|
[this.normalizedName(table), offset],
|
||||||
|
);
|
||||||
|
};
|
||||||
|
if (client) return run(client);
|
||||||
|
const c = await this.target.connect();
|
||||||
|
try {
|
||||||
|
await run(c);
|
||||||
|
} finally {
|
||||||
|
c.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function bufferToUuid(buf: Buffer): string {
|
function bufferToUuid(buf: Buffer): string {
|
||||||
@@ -410,6 +483,10 @@ async function safeRollback(client: PoolClient) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
const argv = process.argv.slice(2);
|
||||||
|
const resume = argv.includes("--resume") || argv.includes("-r");
|
||||||
|
const tables = argv.filter((a) => !a.startsWith("-"));
|
||||||
|
|
||||||
const engine = new Engine(
|
const engine = new Engine(
|
||||||
{
|
{
|
||||||
database: env("BASANGO_SOURCE_DATABASE_NAME"),
|
database: env("BASANGO_SOURCE_DATABASE_NAME"),
|
||||||
@@ -419,11 +496,11 @@ async function main() {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
database: env("BASANGO_DATABASE_URL"),
|
database: env("BASANGO_DATABASE_URL"),
|
||||||
|
resume,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const tables = process.argv.slice(2);
|
|
||||||
if (tables.length === 0) tables.push("user", "source", "article");
|
if (tables.length === 0) tables.push("user", "source", "article");
|
||||||
for (const t of tables) {
|
for (const t of tables) {
|
||||||
const count = await engine.import(t);
|
const count = await engine.import(t);
|
||||||
|
|||||||
Reference in New Issue
Block a user