From bdc7325387c979d9bd858eab31c3830bf9726d61 Mon Sep 17 00:00:00 2001 From: ImBenji Date: Sat, 18 Apr 2026 18:00:00 +0100 Subject: [PATCH] migrate article embeddings to support multi-model architecture and enhance data integrity --- src/embeddings.js | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/embeddings.js b/src/embeddings.js index c76676b..ff2f5f6 100644 --- a/src/embeddings.js +++ b/src/embeddings.js @@ -85,6 +85,19 @@ const upsertQueryEmbedding = db.prepare(` created_at = datetime('now') `); +const VEC0_DIM = 8192; + +function serializeEmbedding(values) { + return Buffer.from(new Float32Array(values).buffer); +} + +function padEmbeddingForVec0(values) { + if (values.length === VEC0_DIM) return serializeEmbedding(values); + const padded = new Float32Array(VEC0_DIM); + padded.set(values); + return Buffer.from(padded.buffer); +} + // backfill store from vec0 for any embeddings that predate multi-model support. // only runs when store is completely empty, so we never stamp the wrong model on existing data. try { @@ -193,19 +206,6 @@ function buildEmbeddingInput(article) { return [title, description, content].join('\n\n'); } -const VEC0_DIM = 8192; - -function serializeEmbedding(values) { - return Buffer.from(new Float32Array(values).buffer); -} - -function padEmbeddingForVec0(values) { - if (values.length === VEC0_DIM) return serializeEmbedding(values); - const padded = new Float32Array(VEC0_DIM); - padded.set(values); - return Buffer.from(padded.buffer); -} - function normalizeQuery(input) { return String(input || '') .trim()