diff --git a/README.md b/README.md index 49975f2..294dfbf 100644 --- a/README.md +++ b/README.md @@ -110,15 +110,25 @@ Returns one article by numeric ID. Same usability filter as the list endpoint ### `GET /events` -Returns a single event and its articles. +Without `id` — returns a paginated list of events. With `id` — returns a single event and its articles. #### Query params | Param | Description | |---|---| -| `id` | Event ID (required) | +| `id` | Event ID. If present, returns that event with its articles instead of the list | +| `limit` | Rows to return (list mode only). Default `20`, max `100` | +| `offset` | Pagination offset (list mode only). Default `0` | -#### Response shape +#### List response shape + +```json +[ + { "id": 1, "title": "...", "created_at": "2025-01-01T12:35:10.000Z" } +] +``` + +#### Single event response shape ```json { diff --git a/config.json b/config.json index e2f51f2..025dd19 100644 --- a/config.json +++ b/config.json @@ -19,7 +19,8 @@ "tickers": [] }, "openRouter": { - "apiKey": "[OFF]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", + "enabled": false, + "apiKey": "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", "embeddingModel": "qwen/qwen3-embedding-8b" }, "gdelt": { diff --git a/src/contentValidation.js b/src/contentValidation.js index d69fddd..a9541ef 100644 --- a/src/contentValidation.js +++ b/src/contentValidation.js @@ -72,6 +72,9 @@ const BODY_PREFIX_BLOCKLIST = [ // yahoo finance serves its global nav when the article body is js-rendered // and the plain fetch only gets the static shell "today's news us politics world weather", + + // cnbc paywall shell — no article body, just site nav + "subscribe to cnbc pro subscribe to investing club", ]; diff --git a/src/embeddings.js b/src/embeddings.js index 238275b..4e87171 100644 --- a/src/embeddings.js +++ b/src/embeddings.js @@ -91,6 +91,12 @@ const upsertQueryEmbedding = db.prepare(` const VEC0_DIM = 8192; +function isOpenRouterEnabled() { + if (!config.openRouter) return false; + if (config.openRouter.enabled === false) return false; + return Boolean(config.openRouter.apiKey && String(config.openRouter.apiKey).trim()); +} + function serializeEmbedding(values) { return Buffer.from(new Float32Array(values).buffer); } @@ -279,11 +285,7 @@ async function requestEmbedding(input) { } async function generateAndStoreEmbedding(id) { - const apiKey = config.openRouter && config.openRouter.apiKey - ? String(config.openRouter.apiKey).trim() - : ''; - - if (!apiKey) { + if (!isOpenRouterEnabled()) { return { stored: false, shouldPauseBatch: false }; } @@ -368,11 +370,7 @@ async function backfillMissingEmbeddings(limit = 256, batchSize = 16) { return { processed: 0, paused: false }; } - const apiKey = config.openRouter && config.openRouter.apiKey - ? String(config.openRouter.apiKey).trim() - : ''; - - if (!apiKey) { + if (!isOpenRouterEnabled()) { return { processed: 0, paused: false }; } diff --git a/src/routes/events.js b/src/routes/events.js index 36e9fca..574281e 100644 --- a/src/routes/events.js +++ b/src/routes/events.js @@ -1,36 +1,67 @@ const db = require('../db'); +function parseLimit(value) { + const n = Number.parseInt(value, 10); + return Number.isFinite(n) && n > 0 ? Math.min(n, 100) : 20; +} + +function parseOffset(value) { + const n = Number.parseInt(value, 10); + return Number.isFinite(n) && n >= 0 ? n : 0; +} + async function eventRoutes(fastify) { fastify.get('/events', async (request, reply) => { const query = request.query || {}; - if (!query.id) { - reply.code(400); - return { error: 'id is required' }; + if (query.id) { + const id = Number.parseInt(query.id, 10); + if (!Number.isFinite(id)) { + reply.code(400); + return { error: 'id must be a number' }; + } + + const event = db.prepare(`SELECT id, title, created_at FROM events WHERE id = ?`).get(id); + if (!event) { + reply.code(404); + return { error: 'Event not found' }; + } + + const articles = db.prepare(` + SELECT id, title, description, content, url, normalized_title, source, pub_date, ingested_at + FROM articles + WHERE event_id = ? + AND content IS NOT NULL AND content != '' + AND is_index_page = 0 + ORDER BY pub_date_effective DESC, id DESC + `).all(id); + + return { ...event, articles }; } - const id = Number.parseInt(query.id, 10); - if (!Number.isFinite(id)) { - reply.code(400); - return { error: 'id must be a number' }; - } + const limit = parseLimit(query.limit); + const offset = parseOffset(query.offset); - const event = db.prepare(`SELECT id, title, created_at FROM events WHERE id = ?`).get(id); - if (!event) { - reply.code(404); - return { error: 'Event not found' }; - } + const SORT_COLUMNS = { + created_at: 'e.created_at', + id: 'e.id', + article_count: 'article_count', + }; - const articles = db.prepare(` - SELECT id, title, description, content, url, normalized_title, source, pub_date, ingested_at - FROM articles - WHERE event_id = ? - AND content IS NOT NULL AND content != '' - AND is_index_page = 0 - ORDER BY pub_date_effective DESC, id DESC - `).all(id); + const sortBy = SORT_COLUMNS[query.sort_by] || SORT_COLUMNS.created_at; + const order = String(query.order || '').toLowerCase() === 'asc' ? 'ASC' : 'DESC'; - return { ...event, articles }; + return db.prepare(` + SELECT e.id, e.title, e.created_at, + COUNT(a.id) AS article_count + FROM events e + LEFT JOIN articles a ON a.event_id = e.id + AND a.content IS NOT NULL AND a.content != '' + AND a.is_index_page = 0 + GROUP BY e.id + ORDER BY ${sortBy} ${order}, e.id ${order} + LIMIT ? OFFSET ? + `).all(limit, offset); }); }