refactor admin data queries; consolidate multiple count queries and optimize article fetching

This commit is contained in:
ImBenji 2026-04-24 01:14:36 +01:00
parent 0c852da511
commit a48b1eef67

View file

@ -335,10 +335,14 @@ async function adminRoutes(fastify) {
if (!db) return { available: false };
const queue = db.prepare(`SELECT status, COUNT(*) as n FROM article_queue GROUP BY status`).all();
const knowledge = db.prepare(`SELECT COUNT(*) as n FROM event_knowledge`).get().n;
const predictions = db.prepare(`SELECT COUNT(*) as n FROM event_predictions`).get().n;
const companies = db.prepare(`SELECT COUNT(*) as n FROM tracked_companies`).get().n;
const embeddings = db.prepare(`SELECT COUNT(*) as n FROM company_embeddings`).get().n;
const icounts = db.prepare(`
SELECT
(SELECT COUNT(*) FROM event_knowledge) as knowledge,
(SELECT COUNT(*) FROM event_predictions) as predictions,
(SELECT COUNT(*) FROM tracked_companies) as companies,
(SELECT COUNT(*) FROM company_embeddings) as embeddings
`).get();
const { knowledge, predictions, companies, embeddings } = icounts;
let workerRates = [];
try {
@ -488,20 +492,34 @@ async function adminRoutes(fastify) {
return [];
}
// pick the most recent event_date out of the predictions that fed each signal
const latestEventDateStmt = db.prepare(`
SELECT MAX(event_date) as latest
FROM event_predictions
WHERE id IN (SELECT value FROM json_each(?))
AND event_date IS NOT NULL
`);
// collect all prediction ids across all signals in one pass, then
// fetch event_dates in a single query instead of one per signal
const allPredIds = new Set();
for (const row of rows) {
let ids = [];
try { ids = JSON.parse(row.supporting_prediction_ids || '[]'); } catch (_) {}
for (const id of ids) allPredIds.add(id);
}
const predEventDates = new Map();
if (allPredIds.size > 0) {
const ids = [...allPredIds];
const ph = ids.map(() => '?').join(',');
const predRows = db.prepare(`
SELECT id, event_date FROM event_predictions
WHERE id IN (${ph}) AND event_date IS NOT NULL
`).all(...ids);
for (const p of predRows) predEventDates.set(p.id, p.event_date);
}
for (const row of rows) {
let ids = [];
try { ids = JSON.parse(row.supporting_prediction_ids || '[]'); } catch (_) {}
let latest = null;
try {
const res = latestEventDateStmt.get(row.supporting_prediction_ids || "[]");
latest = res?.latest || null;
} catch (_) {}
for (const id of ids) {
const d = predEventDates.get(id);
if (d && (!latest || d > latest)) latest = d;
}
row.latest_event_date = latest;
}
@ -565,18 +583,24 @@ async function adminRoutes(fastify) {
ORDER BY created_at DESC
`).all(...eventIds);
const artStmt = db.prepare(`
SELECT id, title, source, pub_date, url
const artRows = db.prepare(`
SELECT id, title, source, pub_date, url, event_id
FROM articles
WHERE event_id = ?
WHERE event_id IN (${eventPh})
ORDER BY COALESCE(pub_date, ingested_at) DESC
LIMIT 5
`);
`).all(...eventIds);
const artsByEvent = new Map();
for (const a of artRows) {
if (!artsByEvent.has(a.event_id)) artsByEvent.set(a.event_id, []);
const list = artsByEvent.get(a.event_id);
if (list.length < 5) list.push(a);
}
const events = eventRows.map(ev => ({
...ev,
event_date: eventMeta.get(ev.id) || null,
articles: artStmt.all(ev.id),
articles: artsByEvent.get(ev.id) || [],
}));
return { events };
@ -665,16 +689,22 @@ async function adminRoutes(fastify) {
ORDER BY created_at DESC
`).all(...ids);
const artStmt = db.prepare(`
SELECT id, title, source, pub_date, url
const edgeArtRows = db.prepare(`
SELECT id, title, source, pub_date, url, event_id
FROM articles
WHERE event_id = ?
WHERE event_id IN (${placeholders})
ORDER BY COALESCE(pub_date, ingested_at) DESC
LIMIT 8
`);
`).all(...ids);
const edgeArtsByEvent = new Map();
for (const a of edgeArtRows) {
if (!edgeArtsByEvent.has(a.event_id)) edgeArtsByEvent.set(a.event_id, []);
const list = edgeArtsByEvent.get(a.event_id);
if (list.length < 8) list.push(a);
}
for (const ev of eventRows) {
events.push({ ...ev, articles: artStmt.all(ev.id) });
events.push({ ...ev, articles: edgeArtsByEvent.get(ev.id) || [] });
}
}
@ -719,10 +749,15 @@ async function adminRoutes(fastify) {
fastify.get('/admin/api/stats', async (request, reply) => {
if (!checkAuth(request, reply)) return;
const total = db.prepare(`SELECT COUNT(*) as n FROM articles`).get().n;
const withContent = db.prepare(`SELECT COUNT(*) as n FROM articles WHERE content IS NOT NULL AND content != ''`).get().n;
const withEmbedding = db.prepare(`SELECT COUNT(*) as n FROM articles WHERE has_embedding = 1`).get().n;
const eventCount = db.prepare(`SELECT COUNT(*) as n FROM events`).get().n;
const counts = db.prepare(`
SELECT
(SELECT COUNT(*) FROM articles) as total,
(SELECT COUNT(*) FROM articles WHERE content IS NOT NULL AND content != '') as withContent,
(SELECT COUNT(*) FROM articles WHERE has_embedding = 1) as withEmbedding,
(SELECT COUNT(*) FROM events) as eventCount,
(SELECT COUNT(*) FROM articles WHERE ingested_at >= datetime('now', '-1 hour')) as ingestedPerHour,
(SELECT COUNT(*) FROM articles WHERE content_attempted_at >= datetime('now', '-1 hour')) as contentPerHour
`).get();
const bySource = db.prepare(`
SELECT source, COUNT(*) as n FROM articles GROUP BY source ORDER BY n DESC
@ -733,14 +768,6 @@ async function adminRoutes(fastify) {
FROM articles GROUP BY content_status ORDER BY n DESC
`).all();
const ingestedPerHour = db.prepare(`
SELECT COUNT(*) as n FROM articles WHERE ingested_at >= datetime('now', '-1 hour')
`).get().n;
const contentPerHour = db.prepare(`
SELECT COUNT(*) as n FROM articles WHERE content_attempted_at >= datetime('now', '-1 hour')
`).get().n;
let embeddingsPerHour = 0;
try {
embeddingsPerHour = db.prepare(`
@ -748,7 +775,7 @@ async function adminRoutes(fastify) {
`).get().n;
} catch (_) {}
return { total, withContent, withEmbedding, eventCount, bySource, byStatus, ingestedPerHour, contentPerHour, embeddingsPerHour };
return { ...counts, bySource, byStatus, embeddingsPerHour };
});
}