migrate article embeddings to support multi-model architecture and enhance data integrity

This commit is contained in:
ImBenji 2026-04-18 17:32:21 +01:00
parent 4616998064
commit d0f0495c5c

View file

@ -111,6 +111,29 @@ function rebuildVec0IfModelChanged() {
SELECT 1 FROM article_embedding_meta WHERE model != ? LIMIT 1
`).get(EMBEDDING_MODEL);
// also sync any store entries that didn't make it into vec0
const missing = db.prepare(`
SELECT article_id, embedding FROM article_embedding_store
WHERE model = ?
AND NOT EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = article_embedding_store.article_id)
`).all(EMBEDDING_MODEL);
if (missing.length > 0) {
const insertVec = db.prepare(`INSERT OR IGNORE INTO article_embeddings (article_id, embedding) VALUES (?, ?)`);
const insertMeta = db.prepare(`INSERT OR IGNORE INTO article_embedding_meta (article_id, model) VALUES (?, ?)`);
const sync = db.transaction(() => {
for (const row of missing) {
const vals = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
insertVec.run(BigInt(row.article_id), padEmbeddingForVec0(vals));
insertMeta.run(row.article_id, EMBEDDING_MODEL);
}
});
sync();
console.log(`synced ${missing.length} store embeddings into vec0`);
}
if (!stale) return;
const storeCount = db.prepare(`