863 lines
24 KiB
Dart
863 lines
24 KiB
Dart
import "dart:convert";
|
|
import "dart:io";
|
|
import "dart:typed_data";
|
|
|
|
import "package:html/dom.dart" as dom;
|
|
import "package:html/parser.dart" as html_parser;
|
|
import "package:path/path.dart" as path;
|
|
|
|
import "../api/openrouter_client.dart";
|
|
import "base_tool.dart";
|
|
|
|
const int _maxUrlLength = 2000;
|
|
const int _maxFetchBytes = 10 * 1024 * 1024;
|
|
const int _maxPromptContentChars = 100000;
|
|
const int _maxRedirects = 10;
|
|
const Duration _cacheTtl = Duration(minutes: 15);
|
|
const int _maxCacheEntries = 64;
|
|
|
|
final List<_CacheKey> _cacheOrder = <_CacheKey>[];
|
|
final Map<_CacheKey, _CacheEntry> _cache = <_CacheKey, _CacheEntry>{};
|
|
|
|
const Set<String> _preapprovedHosts = {
|
|
"platform.claude.com",
|
|
"code.claude.com",
|
|
"modelcontextprotocol.io",
|
|
"docs.python.org",
|
|
"en.cppreference.com",
|
|
"docs.oracle.com",
|
|
"learn.microsoft.com",
|
|
"developer.mozilla.org",
|
|
"go.dev",
|
|
"pkg.go.dev",
|
|
"www.php.net",
|
|
"docs.swift.org",
|
|
"kotlinlang.org",
|
|
"ruby-doc.org",
|
|
"doc.rust-lang.org",
|
|
"www.typescriptlang.org",
|
|
"react.dev",
|
|
"angular.io",
|
|
"vuejs.org",
|
|
"nextjs.org",
|
|
"expressjs.com",
|
|
"nodejs.org",
|
|
"bun.sh",
|
|
"getbootstrap.com",
|
|
"tailwindcss.com",
|
|
"redux.js.org",
|
|
"webpack.js.org",
|
|
"jestjs.io",
|
|
"reactrouter.com",
|
|
"docs.djangoproject.com",
|
|
"flask.palletsprojects.com",
|
|
"fastapi.tiangolo.com",
|
|
"pandas.pydata.org",
|
|
"numpy.org",
|
|
"www.tensorflow.org",
|
|
"pytorch.org",
|
|
"scikit-learn.org",
|
|
"matplotlib.org",
|
|
"requests.readthedocs.io",
|
|
"jupyter.org",
|
|
"laravel.com",
|
|
"symfony.com",
|
|
"wordpress.org",
|
|
"docs.spring.io",
|
|
"hibernate.org",
|
|
"tomcat.apache.org",
|
|
"gradle.org",
|
|
"maven.apache.org",
|
|
"asp.net",
|
|
"dotnet.microsoft.com",
|
|
"nuget.org",
|
|
"reactnative.dev",
|
|
"docs.flutter.dev",
|
|
"developer.apple.com",
|
|
"developer.android.com",
|
|
"keras.io",
|
|
"spark.apache.org",
|
|
"huggingface.co",
|
|
"www.kaggle.com",
|
|
"www.mongodb.com",
|
|
"redis.io",
|
|
"www.postgresql.org",
|
|
"dev.mysql.com",
|
|
"www.sqlite.org",
|
|
"graphql.org",
|
|
"prisma.io",
|
|
"docs.aws.amazon.com",
|
|
"cloud.google.com",
|
|
"kubernetes.io",
|
|
"www.docker.com",
|
|
"www.terraform.io",
|
|
"www.ansible.com",
|
|
"docs.netlify.com",
|
|
"devcenter.heroku.com",
|
|
"cypress.io",
|
|
"selenium.dev",
|
|
"docs.unity.com",
|
|
"docs.unrealengine.com",
|
|
"git-scm.com",
|
|
"nginx.org",
|
|
"httpd.apache.org",
|
|
};
|
|
|
|
const Map<String, List<String>> _preapprovedPathPrefixes = {
|
|
"github.com": <String>["/anthropics"],
|
|
"vercel.com": <String>["/docs"],
|
|
};
|
|
|
|
class WebFetchTool extends BaseTool {
|
|
@override
|
|
final String name = "WebFetch";
|
|
|
|
@override
|
|
final String description =
|
|
"Fetch content from a URL, convert it to readable markdown-like text, and answer a prompt about that page.";
|
|
|
|
@override
|
|
Future<String> execute(Map<String, dynamic> input) async {
|
|
final rawUrl = requireString(input, "url").trim();
|
|
final prompt = requireString(input, "prompt").trim();
|
|
final apiKey = optionalString(input, "_api_key") ?? "";
|
|
final model = optionalString(input, "_model") ?? "openrouter/auto";
|
|
final permissionMode = optionalString(input, "_permission_mode") ?? "default";
|
|
final allowRules = _readStringList(input["_allow_rules"]);
|
|
final askRules = _readStringList(input["_ask_rules"]);
|
|
final denyRules = _readStringList(input["_deny_rules"]);
|
|
|
|
if (prompt.isEmpty) {
|
|
throw ArgumentError("prompt must not be empty");
|
|
}
|
|
if (apiKey.isEmpty) {
|
|
throw StateError("WebFetch requires an OpenRouter API key");
|
|
}
|
|
|
|
final url = _normalizeUrl(rawUrl);
|
|
final uri = Uri.parse(url);
|
|
_enforcePermissions(
|
|
uri: uri,
|
|
permissionMode: permissionMode,
|
|
allowRules: allowRules,
|
|
askRules: askRules,
|
|
denyRules: denyRules,
|
|
);
|
|
|
|
final isPreapproved = _isPreapprovedUrl(url);
|
|
final startTime = DateTime.now();
|
|
final fetched = await _fetchUrl(url);
|
|
final durationMs = DateTime.now().difference(startTime).inMilliseconds;
|
|
|
|
if (fetched.isRedirectNotice) {
|
|
return _formatOutput(
|
|
fetched: fetched,
|
|
durationMs: durationMs,
|
|
result: fetched.content,
|
|
);
|
|
}
|
|
|
|
final result = await _summarizeFetchedContent(
|
|
apiKey: apiKey,
|
|
model: model,
|
|
fetched: fetched,
|
|
prompt: prompt,
|
|
isPreapproved: isPreapproved,
|
|
);
|
|
|
|
return _formatOutput(
|
|
fetched: fetched,
|
|
durationMs: durationMs,
|
|
result: result,
|
|
);
|
|
}
|
|
|
|
String _normalizeUrl(String rawUrl) {
|
|
if (rawUrl.isEmpty || rawUrl.length > _maxUrlLength) {
|
|
throw ArgumentError("Invalid URL");
|
|
}
|
|
|
|
Uri uri;
|
|
try {
|
|
uri = Uri.parse(rawUrl);
|
|
} catch (_) {
|
|
throw ArgumentError("Invalid URL: $rawUrl");
|
|
}
|
|
|
|
if (!uri.hasScheme) {
|
|
throw ArgumentError("URL must include a scheme");
|
|
}
|
|
if (uri.userInfo.isNotEmpty || uri.host.isEmpty) {
|
|
throw ArgumentError("Invalid URL");
|
|
}
|
|
if (uri.scheme == "http") {
|
|
uri = uri.replace(scheme: "https");
|
|
}
|
|
if (uri.scheme != "https") {
|
|
throw ArgumentError("Only https URLs are supported");
|
|
}
|
|
|
|
final host = uri.host.toLowerCase();
|
|
if (_isLocalOrPrivateHost(host)) {
|
|
throw ArgumentError("Fetching local or private network URLs is not allowed");
|
|
}
|
|
|
|
return uri.toString();
|
|
}
|
|
|
|
void _enforcePermissions({
|
|
required Uri uri,
|
|
required String permissionMode,
|
|
required List<String> allowRules,
|
|
required List<String> askRules,
|
|
required List<String> denyRules,
|
|
}) {
|
|
final bypassModes = <String>{"bypassPermissions", "dontAsk"};
|
|
if (bypassModes.contains(permissionMode)) {
|
|
return;
|
|
}
|
|
|
|
final domainRule = "domain:${uri.host.toLowerCase()}";
|
|
final matchingDeny = denyRules.where((rule) => _matchesRule(rule, uri)).toList();
|
|
if (matchingDeny.isNotEmpty) {
|
|
throw StateError("WebFetch denied access to $domainRule.");
|
|
}
|
|
|
|
final matchingAllow = allowRules.where((rule) => _matchesRule(rule, uri)).toList();
|
|
if (matchingAllow.isNotEmpty) {
|
|
return;
|
|
}
|
|
|
|
final matchingAsk = askRules.where((rule) => _matchesRule(rule, uri)).toList();
|
|
if (matchingAsk.isNotEmpty) {
|
|
throw StateError(
|
|
"WebFetch requires permission for $domainRule. Add an allow rule to proceed.",
|
|
);
|
|
}
|
|
}
|
|
|
|
bool _matchesRule(String rawRule, Uri uri) {
|
|
var rule = rawRule.trim();
|
|
if (rule.startsWith("WebFetch(") && rule.endsWith(")")) {
|
|
rule = rule.substring("WebFetch(".length, rule.length - 1);
|
|
}
|
|
if (!rule.startsWith("domain:")) {
|
|
return false;
|
|
}
|
|
|
|
final pattern = rule.substring("domain:".length).toLowerCase();
|
|
final host = uri.host.toLowerCase();
|
|
if (pattern.isEmpty) {
|
|
return false;
|
|
}
|
|
if (pattern == host) {
|
|
return true;
|
|
}
|
|
if (pattern.startsWith("*.")) {
|
|
final suffix = pattern.substring(1);
|
|
return host.endsWith(suffix);
|
|
}
|
|
if (pattern.endsWith(".*")) {
|
|
final prefix = pattern.substring(0, pattern.length - 1);
|
|
return host.startsWith(prefix);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Future<_FetchedContent> _fetchUrl(String originalUrl) async {
|
|
_pruneCache();
|
|
final cacheKey = _CacheKey(originalUrl);
|
|
final cached = _cache[cacheKey];
|
|
if (cached != null && DateTime.now().difference(cached.fetchedAt) < _cacheTtl) {
|
|
_touchCacheEntry(cacheKey);
|
|
return cached.content;
|
|
}
|
|
|
|
final httpClient = HttpClient()..connectionTimeout = const Duration(seconds: 60);
|
|
try {
|
|
var currentUrl = Uri.parse(originalUrl);
|
|
final originalComparableHost = _stripWww(currentUrl.host);
|
|
|
|
for (var redirectCount = 0; redirectCount <= _maxRedirects; redirectCount++) {
|
|
final request = await httpClient.getUrl(currentUrl);
|
|
request.headers.set("Accept", "text/markdown, text/html, text/plain, */*");
|
|
request.headers.set("User-Agent", "clawd_code/0.1.0 (WebFetch)");
|
|
|
|
final response = await request.close().timeout(const Duration(seconds: 60));
|
|
final statusCode = response.statusCode;
|
|
final statusText = response.reasonPhrase;
|
|
final location = response.headers.value(HttpHeaders.locationHeader);
|
|
|
|
if (_isRedirect(statusCode) && location != null) {
|
|
final redirectUrl = currentUrl.resolve(location);
|
|
if (_stripWww(redirectUrl.host) != originalComparableHost) {
|
|
final redirectNotice = _FetchedContent(
|
|
finalUrl: currentUrl.toString(),
|
|
statusCode: statusCode,
|
|
reasonPhrase: statusText,
|
|
bytes: 0,
|
|
contentType: "text/plain",
|
|
content:
|
|
"REDIRECT DETECTED: The URL redirects to a different host.\n\n"
|
|
"Original URL: $currentUrl\n"
|
|
"Redirect URL: $redirectUrl\n"
|
|
"Status: $statusCode $statusText\n\n"
|
|
"To complete your request, use WebFetch again with these parameters:\n"
|
|
"- url: \"$redirectUrl\"",
|
|
isRedirectNotice: true,
|
|
);
|
|
_storeCacheEntry(cacheKey, redirectNotice);
|
|
return redirectNotice;
|
|
}
|
|
|
|
currentUrl = redirectUrl;
|
|
continue;
|
|
}
|
|
|
|
final bytes = await _readResponseBytes(response);
|
|
final contentType =
|
|
response.headers.contentType?.mimeType ?? "application/octet-stream";
|
|
final isBinary = _looksBinary(contentType, bytes);
|
|
final persistedBinary = isBinary
|
|
? await _persistBinaryContent(bytes, contentType)
|
|
: null;
|
|
final decodedText = _decodeBody(bytes, isBinary: isBinary);
|
|
final readableContent = _extractReadableContent(
|
|
decodedText,
|
|
contentType: contentType,
|
|
url: currentUrl.toString(),
|
|
);
|
|
|
|
final fetched = _FetchedContent(
|
|
finalUrl: currentUrl.toString(),
|
|
statusCode: statusCode,
|
|
reasonPhrase: statusText,
|
|
bytes: bytes.length,
|
|
contentType: contentType,
|
|
content: readableContent,
|
|
persistedBinaryPath: persistedBinary?.path,
|
|
persistedBinarySize: persistedBinary?.size,
|
|
);
|
|
_storeCacheEntry(cacheKey, fetched);
|
|
return fetched;
|
|
}
|
|
|
|
throw StateError("Too many redirects");
|
|
} finally {
|
|
httpClient.close();
|
|
}
|
|
}
|
|
|
|
Future<List<int>> _readResponseBytes(HttpClientResponse response) async {
|
|
final builder = BytesBuilder(copy: false);
|
|
await for (final chunk in response) {
|
|
builder.add(chunk);
|
|
if (builder.length > _maxFetchBytes) {
|
|
throw StateError("Response exceeded ${_maxFetchBytes} bytes");
|
|
}
|
|
}
|
|
return builder.takeBytes();
|
|
}
|
|
|
|
String _decodeBody(List<int> bytes, {required bool isBinary}) {
|
|
if (isBinary) {
|
|
return latin1.decode(bytes, allowInvalid: true);
|
|
}
|
|
|
|
try {
|
|
return utf8.decode(bytes);
|
|
} catch (_) {
|
|
return latin1.decode(bytes, allowInvalid: true);
|
|
}
|
|
}
|
|
|
|
Future<_PersistedBinary?> _persistBinaryContent(
|
|
List<int> bytes,
|
|
String contentType,
|
|
) async {
|
|
try {
|
|
final extension = _extensionForMimeType(contentType);
|
|
final fileName =
|
|
"webfetch-${DateTime.now().millisecondsSinceEpoch}-${_randomSuffix()}$extension";
|
|
final file = File(path.join(Directory.systemTemp.path, fileName));
|
|
await file.writeAsBytes(bytes, flush: true);
|
|
return _PersistedBinary(path: file.path, size: bytes.length);
|
|
} catch (_) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
String _extractReadableContent(
|
|
String rawContent, {
|
|
required String contentType,
|
|
required String url,
|
|
}) {
|
|
if (contentType.contains("markdown") || contentType.contains("plain")) {
|
|
return _truncateContent(rawContent.trim());
|
|
}
|
|
|
|
final document = html_parser.parse(rawContent);
|
|
document.querySelectorAll("script,style,noscript,svg,iframe").forEach((node) {
|
|
node.remove();
|
|
});
|
|
|
|
final title = document.querySelector("title")?.text.trim();
|
|
final description = document
|
|
.querySelector('meta[name="description"], meta[property="og:description"]')
|
|
?.attributes["content"]
|
|
?.trim();
|
|
|
|
final root =
|
|
document.querySelector("article") ??
|
|
document.querySelector("main") ??
|
|
document.body ??
|
|
document.documentElement;
|
|
|
|
if (root == null) {
|
|
throw StateError("No readable content found at $url");
|
|
}
|
|
|
|
final buffer = StringBuffer();
|
|
if (title != null && title.isNotEmpty) {
|
|
buffer.writeln("# $title");
|
|
buffer.writeln();
|
|
}
|
|
if (description != null && description.isNotEmpty) {
|
|
buffer.writeln(description);
|
|
buffer.writeln();
|
|
}
|
|
|
|
for (final node in root.nodes) {
|
|
_writeNode(node, buffer, listDepth: 0, inPre: false);
|
|
}
|
|
|
|
var result = buffer.toString();
|
|
result = result.replaceAll(RegExp(r"\n{3,}"), "\n\n").trim();
|
|
result = _decodeHtmlEntities(result);
|
|
if (result.isEmpty) {
|
|
throw StateError("No readable content found at $url");
|
|
}
|
|
|
|
return _truncateContent(result);
|
|
}
|
|
|
|
void _writeNode(
|
|
dom.Node node,
|
|
StringBuffer buffer, {
|
|
required int listDepth,
|
|
required bool inPre,
|
|
}) {
|
|
if (node is dom.Text) {
|
|
final text = inPre
|
|
? node.text
|
|
: node.text.replaceAll(RegExp(r"\s+"), " ");
|
|
if (text.trim().isNotEmpty) {
|
|
buffer.write(text);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (node is! dom.Element) {
|
|
return;
|
|
}
|
|
|
|
final tag = node.localName?.toLowerCase() ?? "";
|
|
switch (tag) {
|
|
case "h1":
|
|
case "h2":
|
|
case "h3":
|
|
case "h4":
|
|
case "h5":
|
|
case "h6":
|
|
final level = int.tryParse(tag.substring(1)) ?? 1;
|
|
buffer
|
|
..writeln()
|
|
..write("${"#" * level} ${node.text.trim()}")
|
|
..writeln()
|
|
..writeln();
|
|
return;
|
|
case "p":
|
|
_writeChildren(node, buffer, listDepth: listDepth, inPre: false);
|
|
buffer.writeln();
|
|
buffer.writeln();
|
|
return;
|
|
case "br":
|
|
buffer.writeln();
|
|
return;
|
|
case "pre":
|
|
final code = node.text.trimRight();
|
|
if (code.isNotEmpty) {
|
|
buffer
|
|
..writeln()
|
|
..writeln("```")
|
|
..writeln(code)
|
|
..writeln("```")
|
|
..writeln();
|
|
}
|
|
return;
|
|
case "code":
|
|
final code = node.text.replaceAll(RegExp(r"\s+"), " ").trim();
|
|
if (code.isNotEmpty) {
|
|
buffer.write("`$code`");
|
|
}
|
|
return;
|
|
case "ul":
|
|
case "ol":
|
|
buffer.writeln();
|
|
var index = 1;
|
|
for (final child in node.children.where((child) => child.localName == "li")) {
|
|
final prefix = tag == "ol" ? "${index++}." : "-";
|
|
buffer.write("${" " * listDepth}$prefix ");
|
|
_writeChildren(child, buffer, listDepth: listDepth + 1, inPre: false);
|
|
buffer.writeln();
|
|
}
|
|
buffer.writeln();
|
|
return;
|
|
case "li":
|
|
_writeChildren(node, buffer, listDepth: listDepth, inPre: false);
|
|
return;
|
|
case "a":
|
|
final label = node.text.replaceAll(RegExp(r"\s+"), " ").trim();
|
|
final href = node.attributes["href"]?.trim();
|
|
if (label.isNotEmpty && href != null && href.isNotEmpty) {
|
|
buffer.write("[$label]($href)");
|
|
} else {
|
|
_writeChildren(node, buffer, listDepth: listDepth, inPre: false);
|
|
}
|
|
return;
|
|
case "blockquote":
|
|
final quote = node.text.trim();
|
|
if (quote.isNotEmpty) {
|
|
buffer
|
|
..writeln()
|
|
..writeln("> ${quote.replaceAll("\n", "\n> ")}")
|
|
..writeln();
|
|
}
|
|
return;
|
|
case "table":
|
|
final tableText = node.text.replaceAll(RegExp(r"\s+"), " ").trim();
|
|
if (tableText.isNotEmpty) {
|
|
buffer
|
|
..writeln()
|
|
..writeln(tableText)
|
|
..writeln();
|
|
}
|
|
return;
|
|
case "hr":
|
|
buffer
|
|
..writeln()
|
|
..writeln("---")
|
|
..writeln();
|
|
return;
|
|
default:
|
|
final blockTags = <String>{
|
|
"article",
|
|
"section",
|
|
"main",
|
|
"div",
|
|
"header",
|
|
"footer",
|
|
"nav",
|
|
"aside",
|
|
};
|
|
final wasBlock = blockTags.contains(tag);
|
|
if (wasBlock) {
|
|
buffer.writeln();
|
|
}
|
|
_writeChildren(node, buffer, listDepth: listDepth, inPre: inPre);
|
|
if (wasBlock) {
|
|
buffer.writeln();
|
|
}
|
|
}
|
|
}
|
|
|
|
void _writeChildren(
|
|
dom.Element element,
|
|
StringBuffer buffer, {
|
|
required int listDepth,
|
|
required bool inPre,
|
|
}) {
|
|
for (final child in element.nodes) {
|
|
_writeNode(child, buffer, listDepth: listDepth, inPre: inPre);
|
|
}
|
|
}
|
|
|
|
Future<String> _summarizeFetchedContent({
|
|
required String apiKey,
|
|
required String model,
|
|
required _FetchedContent fetched,
|
|
required String prompt,
|
|
required bool isPreapproved,
|
|
}) async {
|
|
if (isPreapproved &&
|
|
fetched.contentType.contains("markdown") &&
|
|
fetched.content.length <= _maxPromptContentChars) {
|
|
return fetched.content;
|
|
}
|
|
|
|
final client = await OpenRouterClientFactory.create(apiKey: apiKey);
|
|
try {
|
|
final response = await client.createMessage(
|
|
model: model,
|
|
maxTokens: 2048,
|
|
messages: <Map<String, dynamic>>[
|
|
<String, dynamic>{
|
|
"role": "system",
|
|
"content": isPreapproved
|
|
? "Provide a concise response based on the fetched content. Include relevant details and code examples when present."
|
|
: "Provide a concise response based only on the fetched content. Use short quotes only when necessary.",
|
|
},
|
|
<String, dynamic>{
|
|
"role": "user",
|
|
"content":
|
|
"URL: ${fetched.finalUrl}\n"
|
|
"Content-Type: ${fetched.contentType}\n\n"
|
|
"Web page content:\n---\n${fetched.content}\n---\n\n"
|
|
"$prompt",
|
|
},
|
|
],
|
|
);
|
|
|
|
final parts = <String>[];
|
|
for (final block in response.content) {
|
|
if (block is Map<String, dynamic> && block["type"] == "text") {
|
|
final text = block["text"];
|
|
if (text is String && text.isNotEmpty) {
|
|
parts.add(text);
|
|
}
|
|
}
|
|
}
|
|
|
|
final result = parts.join("\n").trim();
|
|
return result.isEmpty ? "No response from model." : result;
|
|
} finally {
|
|
client.close();
|
|
}
|
|
}
|
|
|
|
String _formatOutput({
|
|
required _FetchedContent fetched,
|
|
required int durationMs,
|
|
required String result,
|
|
}) {
|
|
final lines = <String>[
|
|
"URL: ${fetched.finalUrl}",
|
|
"Status: ${fetched.statusCode} ${fetched.reasonPhrase}",
|
|
"Bytes: ${fetched.bytes}",
|
|
"Duration: ${_formatDuration(durationMs)}",
|
|
];
|
|
|
|
if (fetched.persistedBinaryPath != null && fetched.persistedBinarySize != null) {
|
|
lines.add(
|
|
"Binary content saved: ${fetched.persistedBinaryPath} (${fetched.persistedBinarySize} bytes)",
|
|
);
|
|
}
|
|
|
|
lines
|
|
..add("")
|
|
..add(result.trim());
|
|
|
|
return lines.join("\n");
|
|
}
|
|
|
|
void _pruneCache() {
|
|
final now = DateTime.now();
|
|
_cacheOrder.removeWhere((key) {
|
|
final entry = _cache[key];
|
|
final expired = entry == null || now.difference(entry.fetchedAt) >= _cacheTtl;
|
|
if (expired) {
|
|
_cache.remove(key);
|
|
}
|
|
return expired;
|
|
});
|
|
}
|
|
|
|
void _storeCacheEntry(_CacheKey key, _FetchedContent content) {
|
|
_cache[key] = _CacheEntry(content: content, fetchedAt: DateTime.now());
|
|
_touchCacheEntry(key);
|
|
while (_cacheOrder.length > _maxCacheEntries) {
|
|
final removed = _cacheOrder.removeAt(0);
|
|
_cache.remove(removed);
|
|
}
|
|
}
|
|
|
|
void _touchCacheEntry(_CacheKey key) {
|
|
_cacheOrder.remove(key);
|
|
_cacheOrder.add(key);
|
|
}
|
|
|
|
bool _isPreapprovedUrl(String url) {
|
|
final uri = Uri.parse(url);
|
|
final host = uri.host.toLowerCase();
|
|
if (_preapprovedHosts.contains(host)) {
|
|
return true;
|
|
}
|
|
final prefixes = _preapprovedPathPrefixes[host];
|
|
if (prefixes == null) {
|
|
return false;
|
|
}
|
|
final pathName = uri.path;
|
|
return prefixes.any(
|
|
(prefix) => pathName == prefix || pathName.startsWith("$prefix/"),
|
|
);
|
|
}
|
|
|
|
bool _isLocalOrPrivateHost(String host) {
|
|
final lower = host.toLowerCase();
|
|
if (lower == "localhost" || !lower.contains(".")) {
|
|
return true;
|
|
}
|
|
if (lower.endsWith(".local")) {
|
|
return true;
|
|
}
|
|
|
|
final ipv4 = RegExp(r"^(\d{1,3}\.){3}\d{1,3}$");
|
|
if (ipv4.hasMatch(lower)) {
|
|
final parts = lower.split(".").map(int.parse).toList();
|
|
if (parts.any((part) => part < 0 || part > 255)) {
|
|
return true;
|
|
}
|
|
return parts[0] == 10 ||
|
|
parts[0] == 127 ||
|
|
(parts[0] == 172 && parts[1] >= 16 && parts[1] <= 31) ||
|
|
(parts[0] == 192 && parts[1] == 168) ||
|
|
(parts[0] == 169 && parts[1] == 254);
|
|
}
|
|
|
|
if (lower == "::1" || lower.startsWith("fc") || lower.startsWith("fd")) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool _isRedirect(int statusCode) {
|
|
return statusCode == 301 ||
|
|
statusCode == 302 ||
|
|
statusCode == 307 ||
|
|
statusCode == 308;
|
|
}
|
|
|
|
bool _looksBinary(String contentType, List<int> bytes) {
|
|
if (contentType.startsWith("text/") ||
|
|
contentType.contains("json") ||
|
|
contentType.contains("xml") ||
|
|
contentType.contains("javascript") ||
|
|
contentType.contains("xhtml")) {
|
|
return false;
|
|
}
|
|
|
|
for (final byte in bytes.take(256)) {
|
|
if (byte == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
String _truncateContent(String content) {
|
|
if (content.length <= _maxPromptContentChars) {
|
|
return content;
|
|
}
|
|
return "${content.substring(0, _maxPromptContentChars)}\n\n[Content truncated due to length...]";
|
|
}
|
|
|
|
String _decodeHtmlEntities(String text) {
|
|
return text
|
|
.replaceAll(" ", " ")
|
|
.replaceAll("&", "&")
|
|
.replaceAll("<", "<")
|
|
.replaceAll(">", ">")
|
|
.replaceAll(""", "\"")
|
|
.replaceAll("'", "'")
|
|
.replaceAll("'", "'")
|
|
.replaceAll("'", "'");
|
|
}
|
|
|
|
String _extensionForMimeType(String contentType) {
|
|
if (contentType.contains("pdf")) return ".pdf";
|
|
if (contentType.contains("zip")) return ".zip";
|
|
if (contentType.contains("png")) return ".png";
|
|
if (contentType.contains("jpeg")) return ".jpg";
|
|
if (contentType.contains("gif")) return ".gif";
|
|
if (contentType.contains("webp")) return ".webp";
|
|
if (contentType.contains("json")) return ".json";
|
|
return ".bin";
|
|
}
|
|
|
|
List<String> _readStringList(Object? value) {
|
|
if (value is! List) {
|
|
return const <String>[];
|
|
}
|
|
return value
|
|
.whereType<String>()
|
|
.map((item) => item.trim())
|
|
.where((item) => item.isNotEmpty)
|
|
.toList(growable: false);
|
|
}
|
|
|
|
String _randomSuffix() {
|
|
final radix = DateTime.now().microsecondsSinceEpoch.toRadixString(36);
|
|
return radix.substring(radix.length - 6);
|
|
}
|
|
|
|
String _stripWww(String host) => host.replaceFirst(RegExp(r"^www\."), "");
|
|
|
|
String _formatDuration(int durationMs) {
|
|
if (durationMs < 1000) {
|
|
return "${durationMs}ms";
|
|
}
|
|
return "${(durationMs / 1000).toStringAsFixed(1)}s";
|
|
}
|
|
}
|
|
|
|
class _FetchedContent {
|
|
const _FetchedContent({
|
|
required this.finalUrl,
|
|
required this.statusCode,
|
|
required this.reasonPhrase,
|
|
required this.bytes,
|
|
required this.contentType,
|
|
required this.content,
|
|
this.persistedBinaryPath,
|
|
this.persistedBinarySize,
|
|
this.isRedirectNotice = false,
|
|
});
|
|
|
|
final String finalUrl;
|
|
final int statusCode;
|
|
final String reasonPhrase;
|
|
final int bytes;
|
|
final String contentType;
|
|
final String content;
|
|
final String? persistedBinaryPath;
|
|
final int? persistedBinarySize;
|
|
final bool isRedirectNotice;
|
|
}
|
|
|
|
class _PersistedBinary {
|
|
const _PersistedBinary({required this.path, required this.size});
|
|
|
|
final String path;
|
|
final int size;
|
|
}
|
|
|
|
class _CacheEntry {
|
|
const _CacheEntry({required this.content, required this.fetchedAt});
|
|
|
|
final _FetchedContent content;
|
|
final DateTime fetchedAt;
|
|
}
|
|
|
|
class _CacheKey {
|
|
const _CacheKey(this.url);
|
|
|
|
final String url;
|
|
|
|
@override
|
|
bool operator ==(Object other) =>
|
|
identical(this, other) || other is _CacheKey && other.url == url;
|
|
|
|
@override
|
|
int get hashCode => url.hashCode;
|
|
}
|