refactor admin data queries; consolidate multiple count queries and optimize article fetching

This commit is contained in:
ImBenji 2026-04-24 01:14:36 +01:00
parent 0c852da511
commit a48b1eef67

View file

@ -335,10 +335,14 @@ async function adminRoutes(fastify) {
if (!db) return { available: false }; if (!db) return { available: false };
const queue = db.prepare(`SELECT status, COUNT(*) as n FROM article_queue GROUP BY status`).all(); const queue = db.prepare(`SELECT status, COUNT(*) as n FROM article_queue GROUP BY status`).all();
const knowledge = db.prepare(`SELECT COUNT(*) as n FROM event_knowledge`).get().n; const icounts = db.prepare(`
const predictions = db.prepare(`SELECT COUNT(*) as n FROM event_predictions`).get().n; SELECT
const companies = db.prepare(`SELECT COUNT(*) as n FROM tracked_companies`).get().n; (SELECT COUNT(*) FROM event_knowledge) as knowledge,
const embeddings = db.prepare(`SELECT COUNT(*) as n FROM company_embeddings`).get().n; (SELECT COUNT(*) FROM event_predictions) as predictions,
(SELECT COUNT(*) FROM tracked_companies) as companies,
(SELECT COUNT(*) FROM company_embeddings) as embeddings
`).get();
const { knowledge, predictions, companies, embeddings } = icounts;
let workerRates = []; let workerRates = [];
try { try {
@ -488,20 +492,34 @@ async function adminRoutes(fastify) {
return []; return [];
} }
// pick the most recent event_date out of the predictions that fed each signal // collect all prediction ids across all signals in one pass, then
const latestEventDateStmt = db.prepare(` // fetch event_dates in a single query instead of one per signal
SELECT MAX(event_date) as latest const allPredIds = new Set();
FROM event_predictions for (const row of rows) {
WHERE id IN (SELECT value FROM json_each(?)) let ids = [];
AND event_date IS NOT NULL try { ids = JSON.parse(row.supporting_prediction_ids || '[]'); } catch (_) {}
`); for (const id of ids) allPredIds.add(id);
}
const predEventDates = new Map();
if (allPredIds.size > 0) {
const ids = [...allPredIds];
const ph = ids.map(() => '?').join(',');
const predRows = db.prepare(`
SELECT id, event_date FROM event_predictions
WHERE id IN (${ph}) AND event_date IS NOT NULL
`).all(...ids);
for (const p of predRows) predEventDates.set(p.id, p.event_date);
}
for (const row of rows) { for (const row of rows) {
let ids = [];
try { ids = JSON.parse(row.supporting_prediction_ids || '[]'); } catch (_) {}
let latest = null; let latest = null;
try { for (const id of ids) {
const res = latestEventDateStmt.get(row.supporting_prediction_ids || "[]"); const d = predEventDates.get(id);
latest = res?.latest || null; if (d && (!latest || d > latest)) latest = d;
} catch (_) {} }
row.latest_event_date = latest; row.latest_event_date = latest;
} }
@ -565,18 +583,24 @@ async function adminRoutes(fastify) {
ORDER BY created_at DESC ORDER BY created_at DESC
`).all(...eventIds); `).all(...eventIds);
const artStmt = db.prepare(` const artRows = db.prepare(`
SELECT id, title, source, pub_date, url SELECT id, title, source, pub_date, url, event_id
FROM articles FROM articles
WHERE event_id = ? WHERE event_id IN (${eventPh})
ORDER BY COALESCE(pub_date, ingested_at) DESC ORDER BY COALESCE(pub_date, ingested_at) DESC
LIMIT 5 `).all(...eventIds);
`);
const artsByEvent = new Map();
for (const a of artRows) {
if (!artsByEvent.has(a.event_id)) artsByEvent.set(a.event_id, []);
const list = artsByEvent.get(a.event_id);
if (list.length < 5) list.push(a);
}
const events = eventRows.map(ev => ({ const events = eventRows.map(ev => ({
...ev, ...ev,
event_date: eventMeta.get(ev.id) || null, event_date: eventMeta.get(ev.id) || null,
articles: artStmt.all(ev.id), articles: artsByEvent.get(ev.id) || [],
})); }));
return { events }; return { events };
@ -665,16 +689,22 @@ async function adminRoutes(fastify) {
ORDER BY created_at DESC ORDER BY created_at DESC
`).all(...ids); `).all(...ids);
const artStmt = db.prepare(` const edgeArtRows = db.prepare(`
SELECT id, title, source, pub_date, url SELECT id, title, source, pub_date, url, event_id
FROM articles FROM articles
WHERE event_id = ? WHERE event_id IN (${placeholders})
ORDER BY COALESCE(pub_date, ingested_at) DESC ORDER BY COALESCE(pub_date, ingested_at) DESC
LIMIT 8 `).all(...ids);
`);
const edgeArtsByEvent = new Map();
for (const a of edgeArtRows) {
if (!edgeArtsByEvent.has(a.event_id)) edgeArtsByEvent.set(a.event_id, []);
const list = edgeArtsByEvent.get(a.event_id);
if (list.length < 8) list.push(a);
}
for (const ev of eventRows) { for (const ev of eventRows) {
events.push({ ...ev, articles: artStmt.all(ev.id) }); events.push({ ...ev, articles: edgeArtsByEvent.get(ev.id) || [] });
} }
} }
@ -719,10 +749,15 @@ async function adminRoutes(fastify) {
fastify.get('/admin/api/stats', async (request, reply) => { fastify.get('/admin/api/stats', async (request, reply) => {
if (!checkAuth(request, reply)) return; if (!checkAuth(request, reply)) return;
const total = db.prepare(`SELECT COUNT(*) as n FROM articles`).get().n; const counts = db.prepare(`
const withContent = db.prepare(`SELECT COUNT(*) as n FROM articles WHERE content IS NOT NULL AND content != ''`).get().n; SELECT
const withEmbedding = db.prepare(`SELECT COUNT(*) as n FROM articles WHERE has_embedding = 1`).get().n; (SELECT COUNT(*) FROM articles) as total,
const eventCount = db.prepare(`SELECT COUNT(*) as n FROM events`).get().n; (SELECT COUNT(*) FROM articles WHERE content IS NOT NULL AND content != '') as withContent,
(SELECT COUNT(*) FROM articles WHERE has_embedding = 1) as withEmbedding,
(SELECT COUNT(*) FROM events) as eventCount,
(SELECT COUNT(*) FROM articles WHERE ingested_at >= datetime('now', '-1 hour')) as ingestedPerHour,
(SELECT COUNT(*) FROM articles WHERE content_attempted_at >= datetime('now', '-1 hour')) as contentPerHour
`).get();
const bySource = db.prepare(` const bySource = db.prepare(`
SELECT source, COUNT(*) as n FROM articles GROUP BY source ORDER BY n DESC SELECT source, COUNT(*) as n FROM articles GROUP BY source ORDER BY n DESC
@ -733,14 +768,6 @@ async function adminRoutes(fastify) {
FROM articles GROUP BY content_status ORDER BY n DESC FROM articles GROUP BY content_status ORDER BY n DESC
`).all(); `).all();
const ingestedPerHour = db.prepare(`
SELECT COUNT(*) as n FROM articles WHERE ingested_at >= datetime('now', '-1 hour')
`).get().n;
const contentPerHour = db.prepare(`
SELECT COUNT(*) as n FROM articles WHERE content_attempted_at >= datetime('now', '-1 hour')
`).get().n;
let embeddingsPerHour = 0; let embeddingsPerHour = 0;
try { try {
embeddingsPerHour = db.prepare(` embeddingsPerHour = db.prepare(`
@ -748,7 +775,7 @@ async function adminRoutes(fastify) {
`).get().n; `).get().n;
} catch (_) {} } catch (_) {}
return { total, withContent, withEmbedding, eventCount, bySource, byStatus, ingestedPerHour, contentPerHour, embeddingsPerHour }; return { ...counts, bySource, byStatus, embeddingsPerHour };
}); });
} }