Duriin-API/src/domainPolicy.js

181 lines
5.1 KiB
JavaScript

const db = require("./db");
// thresholds — kept in code rather than config because tuning these without
// understanding the consequences is a recipe for either a thundering herd
// against blocked domains or wasted plain-fetch attempts forever
const PLAIN_FAILURE_THRESHOLD = 5;
const BROWSER_FAILURE_THRESHOLD = 5;
const BROWSER_ONLY_TTL_MS = 7 * 24 * 60 * 60 * 1000;
const BLOCKED_TTL_MS = 24 * 60 * 60 * 1000;
const selectPolicy = db.prepare(`
SELECT domain, policy, consecutive_plain_failures, consecutive_browser_failures,
plain_success_count, browser_success_count, expires_at, updated_at
FROM domain_fetch_policy
WHERE domain = ?
`);
const upsertPolicy = db.prepare(`
INSERT INTO domain_fetch_policy (
domain, policy, consecutive_plain_failures, consecutive_browser_failures,
plain_success_count, browser_success_count, expires_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
ON CONFLICT(domain) DO UPDATE SET
policy = excluded.policy,
consecutive_plain_failures = excluded.consecutive_plain_failures,
consecutive_browser_failures = excluded.consecutive_browser_failures,
plain_success_count = excluded.plain_success_count,
browser_success_count = excluded.browser_success_count,
expires_at = excluded.expires_at,
updated_at = datetime('now')
`);
function getDomain(url) {
try {
return new URL(url).hostname.toLowerCase();
} catch {
return "";
}
}
function loadRow(domain) {
if (!domain) return null;
return selectPolicy.get(domain) || null;
}
function isExpired(row) {
if (!row || !row.expires_at) return false;
return new Date(row.expires_at).getTime() <= Date.now();
}
// returns the effective policy for a domain right now. expired entries
// silently revert to "auto" so we re-probe — we dont mutate the row here
// since reads happen on every fetch and writes are expensive
function getEffectivePolicy(url) {
const domain = getDomain(url);
const row = loadRow(domain);
if (!row) {
return { domain, policy: "auto" };
}
if (isExpired(row)) {
return { domain, policy: "auto", wasExpired: true, previous: row.policy };
}
return { domain, policy: row.policy };
}
function writeRow(domain, updates) {
const existing = loadRow(domain) || {
policy: "auto",
consecutive_plain_failures: 0,
consecutive_browser_failures: 0,
plain_success_count: 0,
browser_success_count: 0,
expires_at: null,
};
const merged = {
policy: updates.policy ?? existing.policy,
consecutive_plain_failures: updates.consecutive_plain_failures ?? existing.consecutive_plain_failures,
consecutive_browser_failures: updates.consecutive_browser_failures ?? existing.consecutive_browser_failures,
plain_success_count: updates.plain_success_count ?? existing.plain_success_count,
browser_success_count: updates.browser_success_count ?? existing.browser_success_count,
expires_at: updates.expires_at !== undefined ? updates.expires_at : existing.expires_at,
};
upsertPolicy.run(
domain,
merged.policy,
merged.consecutive_plain_failures,
merged.consecutive_browser_failures,
merged.plain_success_count,
merged.browser_success_count,
merged.expires_at
);
}
function recordPlainSuccess(url) {
const domain = getDomain(url);
if (!domain) return;
const existing = loadRow(domain);
writeRow(domain, {
policy: "auto",
consecutive_plain_failures: 0,
plain_success_count: (existing?.plain_success_count || 0) + 1,
expires_at: null,
});
}
function recordPlainFailure(url) {
const domain = getDomain(url);
if (!domain) return;
const existing = loadRow(domain);
const failures = (existing?.consecutive_plain_failures || 0) + 1;
if (failures >= PLAIN_FAILURE_THRESHOLD) {
writeRow(domain, {
policy: "browser_only",
consecutive_plain_failures: failures,
expires_at: new Date(Date.now() + BROWSER_ONLY_TTL_MS).toISOString(),
});
return;
}
writeRow(domain, {
consecutive_plain_failures: failures,
});
}
function recordBrowserSuccess(url) {
const domain = getDomain(url);
if (!domain) return;
const existing = loadRow(domain);
// a browser success doesnt reset the plain-failure counter — plain fetch
// is still broken for this domain, we just confirmed the browser path works.
// policy stays browser_only until the ttl expires and we re-probe plain
writeRow(domain, {
consecutive_browser_failures: 0,
browser_success_count: (existing?.browser_success_count || 0) + 1,
});
}
function recordBrowserFailure(url) {
const domain = getDomain(url);
if (!domain) return;
const existing = loadRow(domain);
const failures = (existing?.consecutive_browser_failures || 0) + 1;
if (failures >= BROWSER_FAILURE_THRESHOLD) {
writeRow(domain, {
policy: "blocked",
consecutive_browser_failures: failures,
expires_at: new Date(Date.now() + BLOCKED_TTL_MS).toISOString(),
});
return;
}
writeRow(domain, {
consecutive_browser_failures: failures,
});
}
module.exports = {
getEffectivePolicy,
recordPlainSuccess,
recordPlainFailure,
recordBrowserSuccess,
recordBrowserFailure,
};