migrate article embeddings to support multi-model architecture and enhance data integrity
This commit is contained in:
parent
4616998064
commit
d0f0495c5c
1 changed files with 23 additions and 0 deletions
|
|
@ -111,6 +111,29 @@ function rebuildVec0IfModelChanged() {
|
||||||
SELECT 1 FROM article_embedding_meta WHERE model != ? LIMIT 1
|
SELECT 1 FROM article_embedding_meta WHERE model != ? LIMIT 1
|
||||||
`).get(EMBEDDING_MODEL);
|
`).get(EMBEDDING_MODEL);
|
||||||
|
|
||||||
|
// also sync any store entries that didn't make it into vec0
|
||||||
|
const missing = db.prepare(`
|
||||||
|
SELECT article_id, embedding FROM article_embedding_store
|
||||||
|
WHERE model = ?
|
||||||
|
AND NOT EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = article_embedding_store.article_id)
|
||||||
|
`).all(EMBEDDING_MODEL);
|
||||||
|
|
||||||
|
if (missing.length > 0) {
|
||||||
|
const insertVec = db.prepare(`INSERT OR IGNORE INTO article_embeddings (article_id, embedding) VALUES (?, ?)`);
|
||||||
|
const insertMeta = db.prepare(`INSERT OR IGNORE INTO article_embedding_meta (article_id, model) VALUES (?, ?)`);
|
||||||
|
|
||||||
|
const sync = db.transaction(() => {
|
||||||
|
for (const row of missing) {
|
||||||
|
const vals = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
||||||
|
insertVec.run(BigInt(row.article_id), padEmbeddingForVec0(vals));
|
||||||
|
insertMeta.run(row.article_id, EMBEDDING_MODEL);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
sync();
|
||||||
|
console.log(`synced ${missing.length} store embeddings into vec0`);
|
||||||
|
}
|
||||||
|
|
||||||
if (!stale) return;
|
if (!stale) return;
|
||||||
|
|
||||||
const storeCount = db.prepare(`
|
const storeCount = db.prepare(`
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue