155 lines
4.7 KiB
JavaScript
155 lines
4.7 KiB
JavaScript
const db = require('../db');
|
|
const {
|
|
findArticlesByEmbedding,
|
|
findSimilarArticles,
|
|
getEmbeddingBuffer,
|
|
getOrCreateQueryEmbedding,
|
|
} = require('../embeddings');
|
|
|
|
function buildArticlesQuery(query) {
|
|
const conditions = [];
|
|
const params = [];
|
|
const includeEmbedding = String(query.include_embedding || '').toLowerCase() === 'true';
|
|
|
|
if (query.keyword) {
|
|
conditions.push('(title LIKE ? OR description LIKE ? OR content LIKE ?)');
|
|
const keyword = `%${query.keyword}%`;
|
|
params.push(keyword, keyword, keyword);
|
|
}
|
|
|
|
if (query.source) {
|
|
conditions.push('source = ?');
|
|
params.push(query.source);
|
|
}
|
|
|
|
if (query.from) {
|
|
conditions.push('pub_date >= ?');
|
|
params.push(query.from);
|
|
}
|
|
|
|
if (query.to) {
|
|
conditions.push('pub_date <= ?');
|
|
params.push(query.to);
|
|
}
|
|
|
|
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
const limit = Number.parseInt(query.limit, 10);
|
|
const offset = Number.parseInt(query.offset, 10);
|
|
|
|
params.push(Number.isFinite(limit) && limit > 0 ? Math.min(limit, 100) : 20);
|
|
params.push(Number.isFinite(offset) && offset >= 0 ? offset : 0);
|
|
|
|
return {
|
|
sql: `
|
|
SELECT id, title, description, content, image, ${includeEmbedding ? 'embedding,' : ''} url, normalized_title, source, pub_date, ingested_at
|
|
FROM articles
|
|
${whereClause}
|
|
ORDER BY COALESCE(pub_date, ingested_at) DESC, id DESC
|
|
LIMIT ? OFFSET ?
|
|
`,
|
|
params,
|
|
};
|
|
}
|
|
|
|
async function articleRoutes(fastify) {
|
|
fastify.get('/articles', async (request, reply) => {
|
|
const query = request.query || {};
|
|
if (query.include_embedding) {
|
|
reply.code(400);
|
|
return { error: 'Embeddings are not returned directly. Use similar_to_article for vector search.' };
|
|
}
|
|
|
|
if (query.semantic !== undefined) {
|
|
const limit = Number.parseInt(query.limit, 10);
|
|
const embedding = await getOrCreateQueryEmbedding(query.semantic);
|
|
|
|
if (!embedding) {
|
|
reply.code(400);
|
|
return { error: 'Semantic query must not be empty' };
|
|
}
|
|
|
|
const neighbors = findArticlesByEmbedding(
|
|
embedding,
|
|
Number.isFinite(limit) && limit > 0 ? Math.min(limit, 100) : 20
|
|
);
|
|
const ids = neighbors.map((row) => row.articleId);
|
|
if (ids.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const placeholders = ids.map(() => '?').join(', ');
|
|
const articles = db.prepare(`
|
|
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
|
|
FROM articles
|
|
WHERE id IN (${placeholders})
|
|
`).all(...ids);
|
|
const byId = new Map(articles.map((article) => [article.id, article]));
|
|
|
|
return neighbors
|
|
.map((row) => {
|
|
const article = byId.get(row.articleId);
|
|
return article ? { ...article, distance: row.distance } : null;
|
|
})
|
|
.filter(Boolean);
|
|
}
|
|
|
|
if (query.similar_to_article) {
|
|
const limit = Number.parseInt(query.limit, 10);
|
|
const articleId = Number.parseInt(query.similar_to_article, 10);
|
|
const neighbors = findSimilarArticles(
|
|
articleId,
|
|
Number.isFinite(limit) && limit > 0 ? Math.min(limit, 100) : 20
|
|
);
|
|
|
|
if (neighbors.length === 0 && !getEmbeddingBuffer(articleId)) {
|
|
reply.code(404);
|
|
return { error: 'Embedding not found for article' };
|
|
}
|
|
|
|
const ids = neighbors.map((row) => row.articleId);
|
|
if (ids.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const placeholders = ids.map(() => '?').join(', ');
|
|
const articles = db.prepare(`
|
|
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
|
|
FROM articles
|
|
WHERE id IN (${placeholders})
|
|
`).all(...ids);
|
|
const byId = new Map(articles.map((article) => [article.id, article]));
|
|
|
|
return neighbors
|
|
.map((row) => {
|
|
const article = byId.get(row.articleId);
|
|
return article ? { ...article, distance: row.distance } : null;
|
|
})
|
|
.filter(Boolean);
|
|
}
|
|
|
|
const { sql, params } = buildArticlesQuery(query);
|
|
return db.prepare(sql).all(...params);
|
|
});
|
|
|
|
fastify.get('/articles/:id', async (request, reply) => {
|
|
if (String((request.query || {}).include_embedding || '').toLowerCase() === 'true') {
|
|
reply.code(400);
|
|
return { error: 'Embeddings are not returned directly. Use similar_to_article for vector search.' };
|
|
}
|
|
|
|
const article = db.prepare(`
|
|
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
|
|
FROM articles
|
|
WHERE id = ?
|
|
`).get(request.params.id);
|
|
|
|
if (!article) {
|
|
reply.code(404);
|
|
return { error: 'Article not found' };
|
|
}
|
|
|
|
return article;
|
|
});
|
|
}
|
|
|
|
module.exports = articleRoutes;
|