enhance article query capabilities by supporting multiple keywords and dynamic ordering
This commit is contained in:
+61
-12
@@ -12,9 +12,15 @@ function buildArticlesQuery(query) {
|
||||
const includeEmbedding = String(query.include_embedding || '').toLowerCase() === 'true';
|
||||
|
||||
if (query.keyword) {
|
||||
conditions.push('(title LIKE ? OR description LIKE ? OR content LIKE ?)');
|
||||
const keyword = `%${query.keyword}%`;
|
||||
params.push(keyword, keyword, keyword);
|
||||
const keywords = [].concat(query.keyword).map((k) => k.trim()).filter(Boolean);
|
||||
const mode = String(query.keyword_mode || '').toLowerCase() === 'or' ? 'OR' : 'AND';
|
||||
const clauses = keywords.map(() => '(title LIKE ? OR description LIKE ? OR content LIKE ?)');
|
||||
|
||||
conditions.push(`(${clauses.join(` ${mode} `)})`);
|
||||
for (const kw of keywords) {
|
||||
const like = `%${kw}%`;
|
||||
params.push(like, like, like);
|
||||
}
|
||||
}
|
||||
|
||||
if (query.source) {
|
||||
@@ -36,6 +42,14 @@ function buildArticlesQuery(query) {
|
||||
conditions.push('is_index_page = 0');
|
||||
conditions.push('has_embedding = 1');
|
||||
|
||||
const ORDERS = {
|
||||
newest: 'pub_date_effective DESC, id DESC',
|
||||
oldest: 'pub_date_effective ASC, id ASC',
|
||||
ingested_newest: 'ingested_at DESC, id DESC',
|
||||
ingested_oldest: 'ingested_at ASC, id ASC',
|
||||
};
|
||||
const orderBy = ORDERS[query.order] || ORDERS.newest;
|
||||
|
||||
const whereClause = `WHERE ${conditions.join(' AND ')}`;
|
||||
const limit = Number.parseInt(query.limit, 10);
|
||||
const offset = Number.parseInt(query.offset, 10);
|
||||
@@ -48,7 +62,7 @@ function buildArticlesQuery(query) {
|
||||
SELECT id, title, description, content, ${includeEmbedding ? 'embedding,' : ''} url, normalized_title, source, pub_date, ingested_at
|
||||
FROM articles
|
||||
${whereClause}
|
||||
ORDER BY pub_date_effective DESC, id DESC
|
||||
ORDER BY ${orderBy}
|
||||
LIMIT ? OFFSET ?
|
||||
`,
|
||||
params,
|
||||
@@ -64,23 +78,58 @@ function shouldExcludeIndexPages(query) {
|
||||
return String(query.exclude_index_pages || '').toLowerCase() !== 'false';
|
||||
}
|
||||
|
||||
function mapNeighborsToArticles(neighbors, excludeIndexPages, limit) {
|
||||
function mapNeighborsToArticles(neighbors, excludeIndexPages, limit, query = {}) {
|
||||
const ids = neighbors.map((row) => row.articleId);
|
||||
if (ids.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const placeholders = ids.map(() => '?').join(', ');
|
||||
const conditions = [];
|
||||
const params = [...ids];
|
||||
|
||||
conditions.push(`id IN (${placeholders})`);
|
||||
conditions.push("content IS NOT NULL AND content != ''");
|
||||
conditions.push('has_embedding = 1');
|
||||
|
||||
if (excludeIndexPages) conditions.push('is_index_page = 0');
|
||||
|
||||
if (query.source) {
|
||||
conditions.push('source = ?');
|
||||
params.push(query.source);
|
||||
}
|
||||
|
||||
if (query.from) {
|
||||
conditions.push('pub_date >= ?');
|
||||
params.push(query.from);
|
||||
}
|
||||
|
||||
if (query.to) {
|
||||
conditions.push('pub_date <= ?');
|
||||
params.push(query.to);
|
||||
}
|
||||
|
||||
if (query.keyword) {
|
||||
const keywords = [].concat(query.keyword).map((k) => k.trim()).filter(Boolean);
|
||||
const mode = String(query.keyword_mode || '').toLowerCase() === 'or' ? 'OR' : 'AND';
|
||||
const clauses = keywords.map(() => '(title LIKE ? OR description LIKE ? OR content LIKE ?)');
|
||||
|
||||
conditions.push(`(${clauses.join(` ${mode} `)})`);
|
||||
for (const kw of keywords) {
|
||||
const like = `%${kw}%`;
|
||||
params.push(like, like, like);
|
||||
}
|
||||
}
|
||||
|
||||
const articles = db.prepare(`
|
||||
SELECT id, title, description, content, url, normalized_title, source, pub_date, ingested_at
|
||||
FROM articles
|
||||
WHERE id IN (${placeholders})
|
||||
AND content IS NOT NULL AND content != ''
|
||||
AND has_embedding = 1
|
||||
${excludeIndexPages ? 'AND is_index_page = 0' : ''}
|
||||
`).all(...ids);
|
||||
WHERE ${conditions.join(' AND ')}
|
||||
`).all(...params);
|
||||
|
||||
const byId = new Map(articles.map((article) => [article.id, article]));
|
||||
|
||||
// preserve distance ordering from the vector search
|
||||
return neighbors
|
||||
.map((row) => {
|
||||
const article = byId.get(row.articleId);
|
||||
@@ -113,7 +162,7 @@ async function articleRoutes(fastify) {
|
||||
Math.min(limit * 5, 500)
|
||||
);
|
||||
|
||||
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit);
|
||||
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit, query);
|
||||
}
|
||||
|
||||
if (query.similar_to_article) {
|
||||
@@ -130,7 +179,7 @@ async function articleRoutes(fastify) {
|
||||
return { error: 'Embedding not found for article' };
|
||||
}
|
||||
|
||||
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit);
|
||||
return mapNeighborsToArticles(neighbors, excludeIndexPages, limit, query);
|
||||
}
|
||||
|
||||
const { sql, params } = buildArticlesQuery(query);
|
||||
|
||||
Reference in New Issue
Block a user