refine article filtering to ensure only usable articles are returned
This commit is contained in:
+11
-2
@@ -32,7 +32,11 @@ function buildArticlesQuery(query) {
|
||||
params.push(query.to);
|
||||
}
|
||||
|
||||
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||
conditions.push('content IS NOT NULL AND content != \'\'');
|
||||
conditions.push('is_index_page = 0');
|
||||
conditions.push('EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = articles.id)');
|
||||
|
||||
const whereClause = `WHERE ${conditions.join(' AND ')}`;
|
||||
const limit = Number.parseInt(query.limit, 10);
|
||||
const offset = Number.parseInt(query.offset, 10);
|
||||
|
||||
@@ -71,7 +75,9 @@ function mapNeighborsToArticles(neighbors, excludeIndexPages, limit) {
|
||||
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
|
||||
FROM articles
|
||||
WHERE id IN (${placeholders})
|
||||
${excludeIndexPages ? 'AND is_index_page = 0' : ''}
|
||||
AND content IS NOT NULL AND content != ''
|
||||
AND EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = articles.id)
|
||||
${excludeIndexPages ? 'AND is_index_page = 0' : ''}
|
||||
`).all(...ids);
|
||||
const byId = new Map(articles.map((article) => [article.id, article]));
|
||||
|
||||
@@ -141,6 +147,9 @@ async function articleRoutes(fastify) {
|
||||
SELECT id, title, description, content, image, url, normalized_title, source, pub_date, ingested_at
|
||||
FROM articles
|
||||
WHERE id = ?
|
||||
AND content IS NOT NULL AND content != ''
|
||||
AND is_index_page = 0
|
||||
AND EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = articles.id)
|
||||
`).get(request.params.id);
|
||||
|
||||
if (!article) {
|
||||
|
||||
+26
-18
@@ -3,30 +3,38 @@ const { getLastIngestionBySource } = require('../state');
|
||||
|
||||
async function statusRoutes(fastify) {
|
||||
fastify.get('/status', async () => {
|
||||
const total = db.prepare('SELECT COUNT(*) AS count FROM articles').get().count;
|
||||
const bySourceRows = db.prepare('SELECT source, COUNT(*) AS count FROM articles GROUP BY source ORDER BY source').all();
|
||||
const contentCoverage = db.prepare(`
|
||||
const bySourceRows = db.prepare(`
|
||||
SELECT
|
||||
source,
|
||||
COUNT(*) AS total,
|
||||
SUM(CASE WHEN content IS NOT NULL AND TRIM(content) <> '' THEN 1 ELSE 0 END) AS with_content,
|
||||
SUM(CASE WHEN image IS NOT NULL AND TRIM(image) <> '' THEN 1 ELSE 0 END) AS with_image
|
||||
|
||||
SUM(CASE
|
||||
WHEN content IS NOT NULL AND content != ''
|
||||
AND is_index_page = 0
|
||||
AND EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = articles.id)
|
||||
THEN 1 ELSE 0
|
||||
END) AS usable
|
||||
FROM articles
|
||||
`).get();
|
||||
const embeddingCoverage = db.prepare('SELECT COUNT(*) AS count FROM article_embeddings').get();
|
||||
GROUP BY source
|
||||
ORDER BY source
|
||||
`).all();
|
||||
|
||||
const totals = bySourceRows.reduce(
|
||||
(acc, row) => {
|
||||
acc.total += row.total;
|
||||
acc.usable += row.usable;
|
||||
return acc;
|
||||
},
|
||||
{ total: 0, usable: 0 }
|
||||
);
|
||||
|
||||
return {
|
||||
totalArticles: total,
|
||||
countsBySource: Object.fromEntries(bySourceRows.map((row) => [row.source, row.count])),
|
||||
total: totals.total,
|
||||
usable: totals.usable,
|
||||
lastIngestionBySource: getLastIngestionBySource(),
|
||||
contentFetchCoverage: {
|
||||
withContent: contentCoverage.with_content || 0,
|
||||
withImage: contentCoverage.with_image || 0,
|
||||
withEmbedding: embeddingCoverage.count || 0,
|
||||
total: contentCoverage.total || 0,
|
||||
contentRatio: contentCoverage.total ? Number((contentCoverage.with_content / contentCoverage.total).toFixed(4)) : 0,
|
||||
imageRatio: contentCoverage.total ? Number((contentCoverage.with_image / contentCoverage.total).toFixed(4)) : 0,
|
||||
embeddingRatio: contentCoverage.total ? Number((embeddingCoverage.count / contentCoverage.total).toFixed(4)) : 0,
|
||||
},
|
||||
bySource: Object.fromEntries(
|
||||
bySourceRows.map((row) => [row.source, { total: row.total, usable: row.usable }])
|
||||
),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user