migrate article embeddings to support multi-model architecture and enhance data integrity

This commit is contained in:
ImBenji 2026-04-18 18:00:00 +01:00
parent d0f0495c5c
commit bdc7325387

View file

@ -85,6 +85,19 @@ const upsertQueryEmbedding = db.prepare(`
created_at = datetime('now')
`);
const VEC0_DIM = 8192;
function serializeEmbedding(values) {
return Buffer.from(new Float32Array(values).buffer);
}
function padEmbeddingForVec0(values) {
if (values.length === VEC0_DIM) return serializeEmbedding(values);
const padded = new Float32Array(VEC0_DIM);
padded.set(values);
return Buffer.from(padded.buffer);
}
// backfill store from vec0 for any embeddings that predate multi-model support.
// only runs when store is completely empty, so we never stamp the wrong model on existing data.
try {
@ -193,19 +206,6 @@ function buildEmbeddingInput(article) {
return [title, description, content].join('\n\n');
}
const VEC0_DIM = 8192;
function serializeEmbedding(values) {
return Buffer.from(new Float32Array(values).buffer);
}
function padEmbeddingForVec0(values) {
if (values.length === VEC0_DIM) return serializeEmbedding(values);
const padded = new Float32Array(VEC0_DIM);
padded.set(values);
return Buffer.from(padded.buffer);
}
function normalizeQuery(input) {
return String(input || '')
.trim()