const db = require("./db"); // thresholds — kept in code rather than config because tuning these without // understanding the consequences is a recipe for either a thundering herd // against blocked domains or wasted plain-fetch attempts forever const PLAIN_FAILURE_THRESHOLD = 5; const BROWSER_FAILURE_THRESHOLD = 5; const BROWSER_ONLY_TTL_MS = 7 * 24 * 60 * 60 * 1000; const BLOCKED_TTL_MS = 24 * 60 * 60 * 1000; const selectPolicy = db.prepare(` SELECT domain, policy, consecutive_plain_failures, consecutive_browser_failures, plain_success_count, browser_success_count, expires_at, updated_at FROM domain_fetch_policy WHERE domain = ? `); const upsertPolicy = db.prepare(` INSERT INTO domain_fetch_policy ( domain, policy, consecutive_plain_failures, consecutive_browser_failures, plain_success_count, browser_success_count, expires_at, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now')) ON CONFLICT(domain) DO UPDATE SET policy = excluded.policy, consecutive_plain_failures = excluded.consecutive_plain_failures, consecutive_browser_failures = excluded.consecutive_browser_failures, plain_success_count = excluded.plain_success_count, browser_success_count = excluded.browser_success_count, expires_at = excluded.expires_at, updated_at = datetime('now') `); function getDomain(url) { try { return new URL(url).hostname.toLowerCase(); } catch { return ""; } } function loadRow(domain) { if (!domain) return null; return selectPolicy.get(domain) || null; } function isExpired(row) { if (!row || !row.expires_at) return false; return new Date(row.expires_at).getTime() <= Date.now(); } // returns the effective policy for a domain right now. expired entries // silently revert to "auto" so we re-probe — we dont mutate the row here // since reads happen on every fetch and writes are expensive function getEffectivePolicy(url) { const domain = getDomain(url); const row = loadRow(domain); if (!row) { return { domain, policy: "auto" }; } if (isExpired(row)) { return { domain, policy: "auto", wasExpired: true, previous: row.policy }; } return { domain, policy: row.policy }; } function writeRow(domain, updates) { const existing = loadRow(domain) || { policy: "auto", consecutive_plain_failures: 0, consecutive_browser_failures: 0, plain_success_count: 0, browser_success_count: 0, expires_at: null, }; const merged = { policy: updates.policy ?? existing.policy, consecutive_plain_failures: updates.consecutive_plain_failures ?? existing.consecutive_plain_failures, consecutive_browser_failures: updates.consecutive_browser_failures ?? existing.consecutive_browser_failures, plain_success_count: updates.plain_success_count ?? existing.plain_success_count, browser_success_count: updates.browser_success_count ?? existing.browser_success_count, expires_at: updates.expires_at !== undefined ? updates.expires_at : existing.expires_at, }; upsertPolicy.run( domain, merged.policy, merged.consecutive_plain_failures, merged.consecutive_browser_failures, merged.plain_success_count, merged.browser_success_count, merged.expires_at ); } function recordPlainSuccess(url) { const domain = getDomain(url); if (!domain) return; const existing = loadRow(domain); writeRow(domain, { policy: "auto", consecutive_plain_failures: 0, plain_success_count: (existing?.plain_success_count || 0) + 1, expires_at: null, }); } function recordPlainFailure(url) { const domain = getDomain(url); if (!domain) return; const existing = loadRow(domain); const failures = (existing?.consecutive_plain_failures || 0) + 1; if (failures >= PLAIN_FAILURE_THRESHOLD) { writeRow(domain, { policy: "browser_only", consecutive_plain_failures: failures, expires_at: new Date(Date.now() + BROWSER_ONLY_TTL_MS).toISOString(), }); return; } writeRow(domain, { consecutive_plain_failures: failures, }); } function recordBrowserSuccess(url) { const domain = getDomain(url); if (!domain) return; const existing = loadRow(domain); // a browser success doesnt reset the plain-failure counter — plain fetch // is still broken for this domain, we just confirmed the browser path works. // policy stays browser_only until the ttl expires and we re-probe plain writeRow(domain, { consecutive_browser_failures: 0, browser_success_count: (existing?.browser_success_count || 0) + 1, }); } function recordBrowserFailure(url) { const domain = getDomain(url); if (!domain) return; const existing = loadRow(domain); const failures = (existing?.consecutive_browser_failures || 0) + 1; if (failures >= BROWSER_FAILURE_THRESHOLD) { writeRow(domain, { policy: "blocked", consecutive_browser_failures: failures, expires_at: new Date(Date.now() + BLOCKED_TTL_MS).toISOString(), }); return; } writeRow(domain, { consecutive_browser_failures: failures, }); } module.exports = { getEffectivePolicy, recordPlainSuccess, recordPlainFailure, recordBrowserSuccess, recordBrowserFailure, };