Files
Auger/lib/utils/agrigator.dart

268 lines
8.9 KiB
Dart

import 'dart:math';
import 'package:http/http.dart' as http;
import 'package:xml/xml.dart';
import 'openai.dart';
const String KEYWORDS = "Business news corporate earnings revenue profit stock market trading equity shares NYSE NASDAQ stock prices quarterly results annual reports CEO announcements executive leadership management changes board directors company strategy mergers acquisitions takeovers buyouts partnerships joint ventures business deals IPO initial public offerings venture capital funding investment rounds valuation startup unicorn enterprise technology product launches innovation R&D research development market expansion international business global markets trade agreements tariffs import export supply chain logistics manufacturing production operations facilities factories plants workforce hiring layoffs restructuring downsizing labor unions strikes employee relations workplace compensation benefits corporate governance shareholder activism proxy fights dividends stock buybacks analyst ratings price targets market capitalization revenue growth profit margins EBITDA cash flow debt financing credit ratings bonds corporate strategy competitive advantage market share industry trends sector analysis retail consumer goods e-commerce technology software hardware semiconductors pharmaceuticals biotech healthcare energy oil gas renewables automotive electric vehicles aerospace defense banking financial services insurance real estate construction infrastructure telecommunications media entertainment streaming gaming hospitality travel transportation logistics shipping airlines regulatory compliance antitrust competition policy lawsuits litigation settlements data breaches cybersecurity intellectual property patents trademarks brand value customer acquisition market positioning business models revenue streams profitability sustainability ESG environmental social governance";
List<double>? KEYWORD_EMBEDDINGS;
class FeedItem {
final String title;
final String description;
final String link;
List<double>? embedding;
FeedItem({
required this.title,
required this.description,
required this.link,
this.embedding,
});
@override
String toString() {
return "FeedItem(title: $title, link: $link)";
}
FeedItem.fromJson(Map<String, dynamic> json)
: title = json["title"],
description = json["description"],
link = json["link"],
embedding = json["embedding"] != null
? (json["embedding"] as List).map<double>((e) => (e as num).toDouble()).toList()
: null;
Map<String, dynamic> toJson() {
return {
"title": title,
"description": description,
"link": link,
if (embedding != null) "embedding": embedding,
};
}
}
List<FeedItem> parseRssFeed(String rssXml) {
final document = XmlDocument.parse(rssXml);
// find items in the RSS structre
final items = document.findAllElements("item");
return items.map((item) {
final title = item.findElements("title").firstOrNull?.innerText.trim() ?? "Untitled";
final link = item.findElements("link").firstOrNull?.innerText ?? "";
final description = item.findElements("description").firstOrNull?.innerText.trim() ?? "";
return FeedItem(
title: title,
link: link,
description: description,
);
}).toList();
}
List<FeedItem> parseAtomFeed(String atomXml) {
final document = XmlDocument.parse(atomXml);
// find entrys in atom feed
final entries = document.findAllElements("entry");
return entries.map((entry) {
final title = entry.findElements("title").firstOrNull?.innerText.trim() ?? "Untitled";
final linkElement = entry.findElements("link").firstOrNull;
final link = linkElement?.getAttribute("href") ?? "";
final summary = entry.findElements("summary").firstOrNull?.innerText.trim();
final content = entry.findElements("content").firstOrNull?.innerText.trim();
final description = (summary ?? content ?? "").trim();
return FeedItem(
title: title,
link: link,
description: description,
);
}).toList();
}
List<FeedItem> parseFeed(String feedXml) {
final document = XmlDocument.parse(feedXml);
// Check if it's an Atom feed
if (document.findAllElements('feed').isNotEmpty) {
return parseAtomFeed(feedXml);
}
// Check if it's an RSS feed
if (document.findAllElements('rss').isNotEmpty ||
document.findAllElements('channel').isNotEmpty) {
return parseRssFeed(feedXml);
}
// Unknown feed format
throw FormatException('Unknown feed format. Expected RSS or Atom.');
}
Future<List<FeedItem>> fetchFeed(Uri feedUri) async {
final response = await http.get(feedUri);
if (response.statusCode != 200) {
throw Exception("Failed to fetch feed: ${response.statusCode}");
}
// parse the XML response
return parseFeed(response.body);
}
Future<List<FeedItem>> fetchFeeds(List<Uri> feedUris) async {
List<FeedItem> allItems = [];
final results = await Future.wait(
feedUris.map((uri) => fetchFeed(uri).catchError((e) {
print("Error fetching feed $uri: $e");
return <FeedItem>[];
}))
);
for (final items in results) {
allItems.addAll(items);
}
return allItems;
}
// generete embeddng for a feed item
Future<void> generateEmbedding(FeedItem item, String apiKey) async {
final openai = OpenAI(apiKey: apiKey);
// combine tittle and descriptin
final textToEmbed = "${item.title} ${item.description}";
try {
final response = await openai.embeddings.create(
model: "text-embedding-3-small",
input: textToEmbed,
);
if (response.data.isNotEmpty) {
item.embedding = response.data.first.embedding;
}
} catch (e) {
print("Error generatng embedding: $e");
} finally {
openai.dispose();
}
}
// generate embedings for multiple feed items
Future<void> generateEmbeddings(List<FeedItem> items, String apiKey) async {
await Future.wait(
items.map((item) => generateEmbedding(item, apiKey))
);
}
Future<void> generateKeywordEmbeddings(String apiKey) async {
if (KEYWORD_EMBEDDINGS != null) {
return; // already generated
}
final openai = OpenAI(apiKey: apiKey);
try {
final response = await openai.embeddings.create(
model: "text-embedding-3-small",
input: KEYWORDS,
);
if (response.data.isNotEmpty) {
KEYWORD_EMBEDDINGS = response.data.first.embedding;
}
} catch (e) {
print("Error generating keyword embeddings: $e");
} finally {
openai.dispose();
}
}
bool isFeedItemRelevant(FeedItem item, [double threshold = 0.25]) {
if (item.embedding == null || KEYWORD_EMBEDDINGS == null) {
throw Exception("Embeddings not available for comparison.");
}
double similarity = cosineSimilarity(item.embedding!, KEYWORD_EMBEDDINGS!);
return similarity >= threshold;
}
double cosineSimilarity(List<double> vecA, List<double> vecB) {
if (vecA.length != vecB.length) {
throw ArgumentError("Vectors must be of the same length");
}
double dotProduct = 0.0;
double magnitudeA = 0.0;
double magnitudeB = 0.0;
for (int i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
magnitudeA += vecA[i] * vecA[i];
magnitudeB += vecB[i] * vecB[i];
}
if (magnitudeA == 0 || magnitudeB == 0) {
return 0.0;
}
return dotProduct / (sqrt(magnitudeA) * sqrt(magnitudeB));
}
List<List<FeedItem>> groupFeedItemsByEvent(List<FeedItem> items, [double similarityThreshold = 0.7]) {
// Track which group each item belongs to and with what similarity
Map<int, ({int groupIndex, double similarity})> itemGrouping = {};
List<List<FeedItem>> groupedItems = [];
for (int i = 0; i < items.length; i++) {
// Create a new group with item i as the anchor
List<FeedItem> currentGroup = [items[i]];
int currentGroupIndex = groupedItems.length;
// item i belongs to its own group with similarity 1.0
itemGrouping[i] = (groupIndex: currentGroupIndex, similarity: 1.0);
// Check all later items
for (int j = i + 1; j < items.length; j++) {
double similarity = cosineSimilarity(
items[i].embedding!,
items[j].embedding!,
);
if (similarity >= similarityThreshold) {
// Check if j should join this group
if (!itemGrouping.containsKey(j)) {
// j hasn't been grouped yet, add it
currentGroup.add(items[j]);
itemGrouping[j] = (groupIndex: currentGroupIndex, similarity: similarity);
} else if (similarity > itemGrouping[j]!.similarity) {
// j is in another group but this is a better match
// Remove from old group
int oldGroupIndex = itemGrouping[j]!.groupIndex;
groupedItems[oldGroupIndex].remove(items[j]);
// Add to this group
currentGroup.add(items[j]);
itemGrouping[j] = (groupIndex: currentGroupIndex, similarity: similarity);
}
}
}
groupedItems.add(currentGroup);
}
// Filter out empty groups (items may have been moved out)
return groupedItems.where((group) => group.isNotEmpty).toList();
}