migrate article embeddings to support multi-model architecture and enhance data integrity
This commit is contained in:
parent
3e74bd7286
commit
1e442df426
1 changed files with 41 additions and 0 deletions
|
|
@ -117,6 +117,47 @@ try {
|
|||
console.error('embedding store backfill failed:', err);
|
||||
}
|
||||
|
||||
// probe the API to get the real dimension count for the current model, then purge
|
||||
// any store entries that don't match — handles the case where old embeddings
|
||||
// got stamped with the wrong model name during migration
|
||||
async function purgeWrongSizeEmbeddings() {
|
||||
const apiKey = config.openRouter && config.openRouter.apiKey
|
||||
? String(config.openRouter.apiKey).trim()
|
||||
: '';
|
||||
|
||||
if (!apiKey) return;
|
||||
|
||||
try {
|
||||
const probe = await requestEmbedding('probe');
|
||||
const expectedBytes = probe.length * 4;
|
||||
|
||||
const stale = db.prepare(`
|
||||
SELECT article_id FROM article_embedding_store
|
||||
WHERE model = ? AND LENGTH(embedding) != ?
|
||||
`).all(EMBEDDING_MODEL, expectedBytes);
|
||||
|
||||
if (stale.length === 0) return;
|
||||
|
||||
const deleteStore = db.prepare(`DELETE FROM article_embedding_store WHERE article_id = ? AND model = ?`);
|
||||
const deleteMeta = db.prepare(`DELETE FROM article_embedding_meta WHERE article_id = ?`);
|
||||
const deleteVec = db.prepare(`DELETE FROM article_embeddings WHERE article_id = ?`);
|
||||
|
||||
db.transaction(() => {
|
||||
for (const row of stale) {
|
||||
deleteStore.run(row.article_id, EMBEDDING_MODEL);
|
||||
deleteMeta.run(row.article_id);
|
||||
deleteVec.run(BigInt(row.article_id));
|
||||
}
|
||||
})();
|
||||
|
||||
console.log(`purged ${stale.length} wrong-size embeddings for model ${EMBEDDING_MODEL} (expected ${probe.length} dims)`);
|
||||
} catch (err) {
|
||||
console.error('embedding size validation failed:', err);
|
||||
}
|
||||
}
|
||||
|
||||
purgeWrongSizeEmbeddings();
|
||||
|
||||
// if the config model changed, rebuild the vec0 search index from store.
|
||||
// only proceeds if the store actually has embeddings for the new model.
|
||||
function rebuildVec0IfModelChanged() {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue