Duriin-API/src/routes/articles.js

155 lines
4.7 KiB
JavaScript

const db = require('../db');
const {
findArticlesByEmbedding,
findSimilarArticles,
getEmbeddingBuffer,
getOrCreateQueryEmbedding,
} = require('../embeddings');
function buildArticlesQuery(query) {
const conditions = [];
const params = [];
const includeEmbedding = String(query.include_embedding || '').toLowerCase() === 'true';
if (query.keyword) {
conditions.push('(title LIKE ? OR description LIKE ? OR content LIKE ?)');
const keyword = `%${query.keyword}%`;
params.push(keyword, keyword, keyword);
}
if (query.source) {
conditions.push('source = ?');
params.push(query.source);
}
if (query.from) {
conditions.push('pub_date >= ?');
params.push(query.from);
}
if (query.to) {
conditions.push('pub_date <= ?');
params.push(query.to);
}
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
const limit = Number.parseInt(query.limit, 10);
const offset = Number.parseInt(query.offset, 10);
params.push(Number.isFinite(limit) && limit > 0 ? Math.min(limit, 100) : 20);
params.push(Number.isFinite(offset) && offset >= 0 ? offset : 0);
return {
sql: `
SELECT id, title, description, content, image, ${includeEmbedding ? 'embedding,' : ''} url, normalized_title, source, pub_date, ingested_at
FROM articles
${whereClause}
ORDER BY COALESCE(pub_date, ingested_at) DESC, id DESC
LIMIT ? OFFSET ?
`,
params,
};
}
async function articleRoutes(fastify) {
fastify.get('/articles', async (request, reply) => {
const query = request.query || {};
if (query.include_embedding) {
reply.code(400);
return { error: 'Embeddings are not returned directly. Use similar_to_article for vector search.' };
}
if (query.semantic !== undefined) {
const limit = Number.parseInt(query.limit, 10);
const embedding = await getOrCreateQueryEmbedding(query.semantic);
if (!embedding) {
reply.code(400);
return { error: 'Semantic query must not be empty' };
}
const neighbors = findArticlesByEmbedding(
embedding,
Number.isFinite(limit) && limit > 0 ? Math.min(limit, 100) : 20
);
const ids = neighbors.map((row) => row.articleId);
if (ids.length === 0) {
return [];
}
const placeholders = ids.map(() => '?').join(', ');
const articles = db.prepare(`
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
FROM articles
WHERE id IN (${placeholders})
`).all(...ids);
const byId = new Map(articles.map((article) => [article.id, article]));
return neighbors
.map((row) => {
const article = byId.get(row.articleId);
return article ? { ...article, distance: row.distance } : null;
})
.filter(Boolean);
}
if (query.similar_to_article) {
const limit = Number.parseInt(query.limit, 10);
const articleId = Number.parseInt(query.similar_to_article, 10);
const neighbors = findSimilarArticles(
articleId,
Number.isFinite(limit) && limit > 0 ? Math.min(limit, 100) : 20
);
if (neighbors.length === 0 && !getEmbeddingBuffer(articleId)) {
reply.code(404);
return { error: 'Embedding not found for article' };
}
const ids = neighbors.map((row) => row.articleId);
if (ids.length === 0) {
return [];
}
const placeholders = ids.map(() => '?').join(', ');
const articles = db.prepare(`
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
FROM articles
WHERE id IN (${placeholders})
`).all(...ids);
const byId = new Map(articles.map((article) => [article.id, article]));
return neighbors
.map((row) => {
const article = byId.get(row.articleId);
return article ? { ...article, distance: row.distance } : null;
})
.filter(Boolean);
}
const { sql, params } = buildArticlesQuery(query);
return db.prepare(sql).all(...params);
});
fastify.get('/articles/:id', async (request, reply) => {
if (String((request.query || {}).include_embedding || '').toLowerCase() === 'true') {
reply.code(400);
return { error: 'Embeddings are not returned directly. Use similar_to_article for vector search.' };
}
const article = db.prepare(`
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
FROM articles
WHERE id = ?
`).get(request.params.id);
if (!article) {
reply.code(404);
return { error: 'Article not found' };
}
return article;
});
}
module.exports = articleRoutes;