181 lines
5.1 KiB
JavaScript
181 lines
5.1 KiB
JavaScript
const db = require("./db");
|
|
|
|
|
|
// thresholds — kept in code rather than config because tuning these without
|
|
// understanding the consequences is a recipe for either a thundering herd
|
|
// against blocked domains or wasted plain-fetch attempts forever
|
|
const PLAIN_FAILURE_THRESHOLD = 5;
|
|
const BROWSER_FAILURE_THRESHOLD = 5;
|
|
const BROWSER_ONLY_TTL_MS = 7 * 24 * 60 * 60 * 1000;
|
|
const BLOCKED_TTL_MS = 24 * 60 * 60 * 1000;
|
|
|
|
|
|
const selectPolicy = db.prepare(`
|
|
SELECT domain, policy, consecutive_plain_failures, consecutive_browser_failures,
|
|
plain_success_count, browser_success_count, expires_at, updated_at
|
|
FROM domain_fetch_policy
|
|
WHERE domain = ?
|
|
`);
|
|
|
|
const upsertPolicy = db.prepare(`
|
|
INSERT INTO domain_fetch_policy (
|
|
domain, policy, consecutive_plain_failures, consecutive_browser_failures,
|
|
plain_success_count, browser_success_count, expires_at, updated_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|
ON CONFLICT(domain) DO UPDATE SET
|
|
policy = excluded.policy,
|
|
consecutive_plain_failures = excluded.consecutive_plain_failures,
|
|
consecutive_browser_failures = excluded.consecutive_browser_failures,
|
|
plain_success_count = excluded.plain_success_count,
|
|
browser_success_count = excluded.browser_success_count,
|
|
expires_at = excluded.expires_at,
|
|
updated_at = datetime('now')
|
|
`);
|
|
|
|
|
|
function getDomain(url) {
|
|
try {
|
|
return new URL(url).hostname.toLowerCase();
|
|
} catch {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
function loadRow(domain) {
|
|
if (!domain) return null;
|
|
return selectPolicy.get(domain) || null;
|
|
}
|
|
|
|
function isExpired(row) {
|
|
if (!row || !row.expires_at) return false;
|
|
return new Date(row.expires_at).getTime() <= Date.now();
|
|
}
|
|
|
|
|
|
// returns the effective policy for a domain right now. expired entries
|
|
// silently revert to "auto" so we re-probe — we dont mutate the row here
|
|
// since reads happen on every fetch and writes are expensive
|
|
function getEffectivePolicy(url) {
|
|
const domain = getDomain(url);
|
|
const row = loadRow(domain);
|
|
|
|
if (!row) {
|
|
return { domain, policy: "auto" };
|
|
}
|
|
|
|
if (isExpired(row)) {
|
|
return { domain, policy: "auto", wasExpired: true, previous: row.policy };
|
|
}
|
|
|
|
return { domain, policy: row.policy };
|
|
}
|
|
|
|
|
|
function writeRow(domain, updates) {
|
|
const existing = loadRow(domain) || {
|
|
policy: "auto",
|
|
consecutive_plain_failures: 0,
|
|
consecutive_browser_failures: 0,
|
|
plain_success_count: 0,
|
|
browser_success_count: 0,
|
|
expires_at: null,
|
|
};
|
|
|
|
const merged = {
|
|
policy: updates.policy ?? existing.policy,
|
|
consecutive_plain_failures: updates.consecutive_plain_failures ?? existing.consecutive_plain_failures,
|
|
consecutive_browser_failures: updates.consecutive_browser_failures ?? existing.consecutive_browser_failures,
|
|
plain_success_count: updates.plain_success_count ?? existing.plain_success_count,
|
|
browser_success_count: updates.browser_success_count ?? existing.browser_success_count,
|
|
expires_at: updates.expires_at !== undefined ? updates.expires_at : existing.expires_at,
|
|
};
|
|
|
|
upsertPolicy.run(
|
|
domain,
|
|
merged.policy,
|
|
merged.consecutive_plain_failures,
|
|
merged.consecutive_browser_failures,
|
|
merged.plain_success_count,
|
|
merged.browser_success_count,
|
|
merged.expires_at
|
|
);
|
|
}
|
|
|
|
|
|
function recordPlainSuccess(url) {
|
|
const domain = getDomain(url);
|
|
if (!domain) return;
|
|
const existing = loadRow(domain);
|
|
|
|
writeRow(domain, {
|
|
policy: "auto",
|
|
consecutive_plain_failures: 0,
|
|
plain_success_count: (existing?.plain_success_count || 0) + 1,
|
|
expires_at: null,
|
|
});
|
|
}
|
|
|
|
function recordPlainFailure(url) {
|
|
const domain = getDomain(url);
|
|
if (!domain) return;
|
|
const existing = loadRow(domain);
|
|
|
|
const failures = (existing?.consecutive_plain_failures || 0) + 1;
|
|
|
|
if (failures >= PLAIN_FAILURE_THRESHOLD) {
|
|
writeRow(domain, {
|
|
policy: "browser_only",
|
|
consecutive_plain_failures: failures,
|
|
expires_at: new Date(Date.now() + BROWSER_ONLY_TTL_MS).toISOString(),
|
|
});
|
|
return;
|
|
}
|
|
|
|
writeRow(domain, {
|
|
consecutive_plain_failures: failures,
|
|
});
|
|
}
|
|
|
|
function recordBrowserSuccess(url) {
|
|
const domain = getDomain(url);
|
|
if (!domain) return;
|
|
const existing = loadRow(domain);
|
|
|
|
// a browser success doesnt reset the plain-failure counter — plain fetch
|
|
// is still broken for this domain, we just confirmed the browser path works.
|
|
// policy stays browser_only until the ttl expires and we re-probe plain
|
|
writeRow(domain, {
|
|
consecutive_browser_failures: 0,
|
|
browser_success_count: (existing?.browser_success_count || 0) + 1,
|
|
});
|
|
}
|
|
|
|
function recordBrowserFailure(url) {
|
|
const domain = getDomain(url);
|
|
if (!domain) return;
|
|
const existing = loadRow(domain);
|
|
|
|
const failures = (existing?.consecutive_browser_failures || 0) + 1;
|
|
|
|
if (failures >= BROWSER_FAILURE_THRESHOLD) {
|
|
writeRow(domain, {
|
|
policy: "blocked",
|
|
consecutive_browser_failures: failures,
|
|
expires_at: new Date(Date.now() + BLOCKED_TTL_MS).toISOString(),
|
|
});
|
|
return;
|
|
}
|
|
|
|
writeRow(domain, {
|
|
consecutive_browser_failures: failures,
|
|
});
|
|
}
|
|
|
|
|
|
module.exports = {
|
|
getEffectivePolicy,
|
|
recordPlainSuccess,
|
|
recordPlainFailure,
|
|
recordBrowserSuccess,
|
|
recordBrowserFailure,
|
|
};
|