migrate article embeddings to support multi-model architecture and enhance data integrity
This commit is contained in:
parent
4616998064
commit
d0f0495c5c
1 changed files with 23 additions and 0 deletions
|
|
@ -111,6 +111,29 @@ function rebuildVec0IfModelChanged() {
|
|||
SELECT 1 FROM article_embedding_meta WHERE model != ? LIMIT 1
|
||||
`).get(EMBEDDING_MODEL);
|
||||
|
||||
// also sync any store entries that didn't make it into vec0
|
||||
const missing = db.prepare(`
|
||||
SELECT article_id, embedding FROM article_embedding_store
|
||||
WHERE model = ?
|
||||
AND NOT EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = article_embedding_store.article_id)
|
||||
`).all(EMBEDDING_MODEL);
|
||||
|
||||
if (missing.length > 0) {
|
||||
const insertVec = db.prepare(`INSERT OR IGNORE INTO article_embeddings (article_id, embedding) VALUES (?, ?)`);
|
||||
const insertMeta = db.prepare(`INSERT OR IGNORE INTO article_embedding_meta (article_id, model) VALUES (?, ?)`);
|
||||
|
||||
const sync = db.transaction(() => {
|
||||
for (const row of missing) {
|
||||
const vals = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
||||
insertVec.run(BigInt(row.article_id), padEmbeddingForVec0(vals));
|
||||
insertMeta.run(row.article_id, EMBEDDING_MODEL);
|
||||
}
|
||||
});
|
||||
|
||||
sync();
|
||||
console.log(`synced ${missing.length} store embeddings into vec0`);
|
||||
}
|
||||
|
||||
if (!stale) return;
|
||||
|
||||
const storeCount = db.prepare(`
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue