enhance article query capabilities by supporting multiple keywords and dynamic ordering

This commit is contained in:
ImBenji
2026-04-21 11:42:21 +01:00
parent 8805d3a3fc
commit cb819e77ee
2 changed files with 134 additions and 240 deletions
+61 -12
View File
@@ -12,9 +12,15 @@ function buildArticlesQuery(query) {
const includeEmbedding = String(query.include_embedding || '').toLowerCase() === 'true';
if (query.keyword) {
conditions.push('(title LIKE ? OR description LIKE ? OR content LIKE ?)');
const keyword = `%${query.keyword}%`;
params.push(keyword, keyword, keyword);
const keywords = [].concat(query.keyword).map((k) => k.trim()).filter(Boolean);
const mode = String(query.keyword_mode || '').toLowerCase() === 'or' ? 'OR' : 'AND';
const clauses = keywords.map(() => '(title LIKE ? OR description LIKE ? OR content LIKE ?)');
conditions.push(`(${clauses.join(` ${mode} `)})`);
for (const kw of keywords) {
const like = `%${kw}%`;
params.push(like, like, like);
}
}
if (query.source) {
@@ -36,6 +42,14 @@ function buildArticlesQuery(query) {
conditions.push('is_index_page = 0');
conditions.push('has_embedding = 1');
const ORDERS = {
newest: 'pub_date_effective DESC, id DESC',
oldest: 'pub_date_effective ASC, id ASC',
ingested_newest: 'ingested_at DESC, id DESC',
ingested_oldest: 'ingested_at ASC, id ASC',
};
const orderBy = ORDERS[query.order] || ORDERS.newest;
const whereClause = `WHERE ${conditions.join(' AND ')}`;
const limit = Number.parseInt(query.limit, 10);
const offset = Number.parseInt(query.offset, 10);
@@ -48,7 +62,7 @@ function buildArticlesQuery(query) {
SELECT id, title, description, content, ${includeEmbedding ? 'embedding,' : ''} url, normalized_title, source, pub_date, ingested_at
FROM articles
${whereClause}
ORDER BY pub_date_effective DESC, id DESC
ORDER BY ${orderBy}
LIMIT ? OFFSET ?
`,
params,
@@ -64,23 +78,58 @@ function shouldExcludeIndexPages(query) {
return String(query.exclude_index_pages || '').toLowerCase() !== 'false';
}
function mapNeighborsToArticles(neighbors, excludeIndexPages, limit) {
function mapNeighborsToArticles(neighbors, excludeIndexPages, limit, query = {}) {
const ids = neighbors.map((row) => row.articleId);
if (ids.length === 0) {
return [];
}
const placeholders = ids.map(() => '?').join(', ');
const conditions = [];
const params = [...ids];
conditions.push(`id IN (${placeholders})`);
conditions.push("content IS NOT NULL AND content != ''");
conditions.push('has_embedding = 1');
if (excludeIndexPages) conditions.push('is_index_page = 0');
if (query.source) {
conditions.push('source = ?');
params.push(query.source);
}
if (query.from) {
conditions.push('pub_date >= ?');
params.push(query.from);
}
if (query.to) {
conditions.push('pub_date <= ?');
params.push(query.to);
}
if (query.keyword) {
const keywords = [].concat(query.keyword).map((k) => k.trim()).filter(Boolean);
const mode = String(query.keyword_mode || '').toLowerCase() === 'or' ? 'OR' : 'AND';
const clauses = keywords.map(() => '(title LIKE ? OR description LIKE ? OR content LIKE ?)');
conditions.push(`(${clauses.join(` ${mode} `)})`);
for (const kw of keywords) {
const like = `%${kw}%`;
params.push(like, like, like);
}
}
const articles = db.prepare(`
SELECT id, title, description, content, url, normalized_title, source, pub_date, ingested_at
FROM articles
WHERE id IN (${placeholders})
AND content IS NOT NULL AND content != ''
AND has_embedding = 1
${excludeIndexPages ? 'AND is_index_page = 0' : ''}
`).all(...ids);
WHERE ${conditions.join(' AND ')}
`).all(...params);
const byId = new Map(articles.map((article) => [article.id, article]));
// preserve distance ordering from the vector search
return neighbors
.map((row) => {
const article = byId.get(row.articleId);
@@ -113,7 +162,7 @@ async function articleRoutes(fastify) {
Math.min(limit * 5, 500)
);
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit);
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit, query);
}
if (query.similar_to_article) {
@@ -130,7 +179,7 @@ async function articleRoutes(fastify) {
return { error: 'Embedding not found for article' };
}
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit);
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit, query);
}
const { sql, params } = buildArticlesQuery(query);