Refactor project structure and enhance stock watchlist functionality
This commit is contained in:
+36
-244
@@ -1,268 +1,60 @@
|
||||
import 'dart:math';
|
||||
|
||||
import 'package:http/http.dart' as http;
|
||||
import 'package:xml/xml.dart';
|
||||
import 'openai.dart';
|
||||
|
||||
const String KEYWORDS = "Business news corporate earnings revenue profit stock market trading equity shares NYSE NASDAQ stock prices quarterly results annual reports CEO announcements executive leadership management changes board directors company strategy mergers acquisitions takeovers buyouts partnerships joint ventures business deals IPO initial public offerings venture capital funding investment rounds valuation startup unicorn enterprise technology product launches innovation R&D research development market expansion international business global markets trade agreements tariffs import export supply chain logistics manufacturing production operations facilities factories plants workforce hiring layoffs restructuring downsizing labor unions strikes employee relations workplace compensation benefits corporate governance shareholder activism proxy fights dividends stock buybacks analyst ratings price targets market capitalization revenue growth profit margins EBITDA cash flow debt financing credit ratings bonds corporate strategy competitive advantage market share industry trends sector analysis retail consumer goods e-commerce technology software hardware semiconductors pharmaceuticals biotech healthcare energy oil gas renewables automotive electric vehicles aerospace defense banking financial services insurance real estate construction infrastructure telecommunications media entertainment streaming gaming hospitality travel transportation logistics shipping airlines regulatory compliance antitrust competition policy lawsuits litigation settlements data breaches cybersecurity intellectual property patents trademarks brand value customer acquisition market positioning business models revenue streams profitability sustainability ESG environmental social governance";
|
||||
List<double>? KEYWORD_EMBEDDINGS;
|
||||
|
||||
class FeedItem {
|
||||
final int? id;
|
||||
final String title;
|
||||
final String description;
|
||||
|
||||
// the extracted article body — may be empty for older rows or sources
|
||||
// that havent been backfilled yet.
|
||||
final String content;
|
||||
|
||||
final String link;
|
||||
List<double>? embedding;
|
||||
final String? source;
|
||||
final DateTime? pubDate;
|
||||
|
||||
FeedItem({
|
||||
this.id,
|
||||
required this.title,
|
||||
required this.description,
|
||||
this.content = "",
|
||||
required this.link,
|
||||
this.embedding,
|
||||
this.source,
|
||||
this.pubDate,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return "FeedItem(title: $title, link: $link)";
|
||||
}
|
||||
String toString() => 'FeedItem(title: $title, link: $link)';
|
||||
|
||||
FeedItem.fromJson(Map<String, dynamic> json)
|
||||
: title = json["title"],
|
||||
description = json["description"],
|
||||
link = json["link"],
|
||||
embedding = json["embedding"] != null
|
||||
? (json["embedding"] as List).map<double>((e) => (e as num).toDouble()).toList()
|
||||
: null;
|
||||
: id = json['id'] is int ? json['id'] as int : int.tryParse('${json['id']}'),
|
||||
title = json['title'] ?? '',
|
||||
description = json['description'] ?? '',
|
||||
content = (json['content'] ?? '').toString(),
|
||||
link = json['link'] ?? '',
|
||||
source = json['source'],
|
||||
pubDate = json['pub_date'] != null ? DateTime.tryParse(json['pub_date']) : null;
|
||||
|
||||
Map<String, dynamic> toJson() {
|
||||
return {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"link": link,
|
||||
if (embedding != null) "embedding": embedding,
|
||||
if (id != null) 'id': id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
if (content.isNotEmpty) 'content': content,
|
||||
'link': link,
|
||||
if (source != null) 'source': source,
|
||||
if (pubDate != null) 'pub_date': pubDate!.toIso8601String(),
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
List<FeedItem> parseRssFeed(String rssXml) {
|
||||
final document = XmlDocument.parse(rssXml);
|
||||
|
||||
// find items in the RSS structre
|
||||
final items = document.findAllElements("item");
|
||||
|
||||
return items.map((item) {
|
||||
final title = item.findElements("title").firstOrNull?.innerText.trim() ?? "Untitled";
|
||||
final link = item.findElements("link").firstOrNull?.innerText ?? "";
|
||||
final description = item.findElements("description").firstOrNull?.innerText.trim() ?? "";
|
||||
|
||||
|
||||
return FeedItem(
|
||||
title: title,
|
||||
link: link,
|
||||
description: description,
|
||||
);
|
||||
}).toList();
|
||||
}
|
||||
|
||||
List<FeedItem> parseAtomFeed(String atomXml) {
|
||||
final document = XmlDocument.parse(atomXml);
|
||||
|
||||
// find entrys in atom feed
|
||||
final entries = document.findAllElements("entry");
|
||||
|
||||
return entries.map((entry) {
|
||||
final title = entry.findElements("title").firstOrNull?.innerText.trim() ?? "Untitled";
|
||||
final linkElement = entry.findElements("link").firstOrNull;
|
||||
final link = linkElement?.getAttribute("href") ?? "";
|
||||
final summary = entry.findElements("summary").firstOrNull?.innerText.trim();
|
||||
final content = entry.findElements("content").firstOrNull?.innerText.trim();
|
||||
|
||||
final description = (summary ?? content ?? "").trim();
|
||||
|
||||
return FeedItem(
|
||||
title: title,
|
||||
link: link,
|
||||
description: description,
|
||||
);
|
||||
}).toList();
|
||||
}
|
||||
|
||||
List<FeedItem> parseFeed(String feedXml) {
|
||||
final document = XmlDocument.parse(feedXml);
|
||||
|
||||
// Check if it's an Atom feed
|
||||
if (document.findAllElements('feed').isNotEmpty) {
|
||||
return parseAtomFeed(feedXml);
|
||||
}
|
||||
|
||||
// Check if it's an RSS feed
|
||||
if (document.findAllElements('rss').isNotEmpty ||
|
||||
document.findAllElements('channel').isNotEmpty) {
|
||||
return parseRssFeed(feedXml);
|
||||
}
|
||||
|
||||
// Unknown feed format
|
||||
throw FormatException('Unknown feed format. Expected RSS or Atom.');
|
||||
}
|
||||
|
||||
Future<List<FeedItem>> fetchFeed(Uri feedUri) async {
|
||||
final response = await http.get(feedUri);
|
||||
|
||||
|
||||
if (response.statusCode != 200) {
|
||||
throw Exception("Failed to fetch feed: ${response.statusCode}");
|
||||
}
|
||||
|
||||
// parse the XML response
|
||||
return parseFeed(response.body);
|
||||
}
|
||||
|
||||
Future<List<FeedItem>> fetchFeeds(List<Uri> feedUris) async {
|
||||
List<FeedItem> allItems = [];
|
||||
|
||||
final results = await Future.wait(
|
||||
feedUris.map((uri) => fetchFeed(uri).catchError((e) {
|
||||
print("Error fetching feed $uri: $e");
|
||||
return <FeedItem>[];
|
||||
}))
|
||||
);
|
||||
|
||||
for (final items in results) {
|
||||
allItems.addAll(items);
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
|
||||
// generete embeddng for a feed item
|
||||
Future<void> generateEmbedding(FeedItem item, String apiKey) async {
|
||||
final openai = OpenAI(apiKey: apiKey);
|
||||
// median pub date of a cluster — falls back to now if none have dates
|
||||
DateTime medianPubDate(List<FeedItem> articles) {
|
||||
final dates = articles
|
||||
.where((a) => a.pubDate != null)
|
||||
.map((a) => a.pubDate!)
|
||||
.toList()
|
||||
..sort();
|
||||
|
||||
// combine tittle and descriptin
|
||||
final textToEmbed = "${item.title} ${item.description}";
|
||||
|
||||
try {
|
||||
final response = await openai.embeddings.create(
|
||||
model: "text-embedding-3-small",
|
||||
input: textToEmbed,
|
||||
);
|
||||
|
||||
if (response.data.isNotEmpty) {
|
||||
item.embedding = response.data.first.embedding;
|
||||
}
|
||||
} catch (e) {
|
||||
print("Error generatng embedding: $e");
|
||||
} finally {
|
||||
openai.dispose();
|
||||
}
|
||||
if (dates.isEmpty) return DateTime.now();
|
||||
return dates[dates.length ~/ 2];
|
||||
}
|
||||
|
||||
// generate embedings for multiple feed items
|
||||
Future<void> generateEmbeddings(List<FeedItem> items, String apiKey) async {
|
||||
await Future.wait(
|
||||
items.map((item) => generateEmbedding(item, apiKey))
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> generateKeywordEmbeddings(String apiKey) async {
|
||||
|
||||
if (KEYWORD_EMBEDDINGS != null) {
|
||||
return; // already generated
|
||||
}
|
||||
|
||||
final openai = OpenAI(apiKey: apiKey);
|
||||
|
||||
try {
|
||||
final response = await openai.embeddings.create(
|
||||
model: "text-embedding-3-small",
|
||||
input: KEYWORDS,
|
||||
);
|
||||
|
||||
if (response.data.isNotEmpty) {
|
||||
KEYWORD_EMBEDDINGS = response.data.first.embedding;
|
||||
}
|
||||
} catch (e) {
|
||||
print("Error generating keyword embeddings: $e");
|
||||
} finally {
|
||||
openai.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
bool isFeedItemRelevant(FeedItem item, [double threshold = 0.25]) {
|
||||
if (item.embedding == null || KEYWORD_EMBEDDINGS == null) {
|
||||
throw Exception("Embeddings not available for comparison.");
|
||||
}
|
||||
|
||||
double similarity = cosineSimilarity(item.embedding!, KEYWORD_EMBEDDINGS!);
|
||||
|
||||
return similarity >= threshold;
|
||||
}
|
||||
|
||||
double cosineSimilarity(List<double> vecA, List<double> vecB) {
|
||||
if (vecA.length != vecB.length) {
|
||||
throw ArgumentError("Vectors must be of the same length");
|
||||
}
|
||||
|
||||
double dotProduct = 0.0;
|
||||
double magnitudeA = 0.0;
|
||||
double magnitudeB = 0.0;
|
||||
|
||||
for (int i = 0; i < vecA.length; i++) {
|
||||
dotProduct += vecA[i] * vecB[i];
|
||||
magnitudeA += vecA[i] * vecA[i];
|
||||
magnitudeB += vecB[i] * vecB[i];
|
||||
}
|
||||
|
||||
if (magnitudeA == 0 || magnitudeB == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return dotProduct / (sqrt(magnitudeA) * sqrt(magnitudeB));
|
||||
}
|
||||
|
||||
List<List<FeedItem>> groupFeedItemsByEvent(List<FeedItem> items, [double similarityThreshold = 0.7]) {
|
||||
// Track which group each item belongs to and with what similarity
|
||||
Map<int, ({int groupIndex, double similarity})> itemGrouping = {};
|
||||
List<List<FeedItem>> groupedItems = [];
|
||||
|
||||
for (int i = 0; i < items.length; i++) {
|
||||
// Create a new group with item i as the anchor
|
||||
List<FeedItem> currentGroup = [items[i]];
|
||||
int currentGroupIndex = groupedItems.length;
|
||||
|
||||
// item i belongs to its own group with similarity 1.0
|
||||
itemGrouping[i] = (groupIndex: currentGroupIndex, similarity: 1.0);
|
||||
|
||||
// Check all later items
|
||||
for (int j = i + 1; j < items.length; j++) {
|
||||
double similarity = cosineSimilarity(
|
||||
items[i].embedding!,
|
||||
items[j].embedding!,
|
||||
);
|
||||
|
||||
if (similarity >= similarityThreshold) {
|
||||
// Check if j should join this group
|
||||
if (!itemGrouping.containsKey(j)) {
|
||||
// j hasn't been grouped yet, add it
|
||||
currentGroup.add(items[j]);
|
||||
itemGrouping[j] = (groupIndex: currentGroupIndex, similarity: similarity);
|
||||
} else if (similarity > itemGrouping[j]!.similarity) {
|
||||
// j is in another group but this is a better match
|
||||
// Remove from old group
|
||||
int oldGroupIndex = itemGrouping[j]!.groupIndex;
|
||||
groupedItems[oldGroupIndex].remove(items[j]);
|
||||
|
||||
// Add to this group
|
||||
currentGroup.add(items[j]);
|
||||
itemGrouping[j] = (groupIndex: currentGroupIndex, similarity: similarity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
groupedItems.add(currentGroup);
|
||||
}
|
||||
|
||||
// Filter out empty groups (items may have been moved out)
|
||||
return groupedItems.where((group) => group.isNotEmpty).toList();
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
import "package:capstone_project/services/duriin_service.dart";
|
||||
import "package:capstone_project/utils/agrigator.dart";
|
||||
|
||||
|
||||
// one event cluster built from a seed article + its semantic neighbours.
|
||||
// distances are measured from the seed; the seed itself has distance 0.
|
||||
class EventCluster {
|
||||
final FeedItem seed;
|
||||
final List<FeedItem> articles;
|
||||
|
||||
// article.id -> distance from seed. missing id / missing distance means
|
||||
// we dont have a number for it (falls back to nulls in the stats).
|
||||
final Map<int, double> distancesFromSeed;
|
||||
|
||||
EventCluster({
|
||||
required this.seed,
|
||||
required this.articles,
|
||||
required this.distancesFromSeed,
|
||||
});
|
||||
|
||||
|
||||
// convenience: summary stats over the (known) distances in this cluster.
|
||||
// returns nulls if we have no distances to report (eg a singleton cluster).
|
||||
({double? min, double? avg, double? max}) distanceStats() {
|
||||
final vals = distancesFromSeed.values.toList();
|
||||
if (vals.isEmpty) return (min: null, avg: null, max: null);
|
||||
|
||||
double lo = vals.first;
|
||||
double hi = vals.first;
|
||||
double sum = 0.0;
|
||||
for (final v in vals) {
|
||||
if (v < lo) lo = v;
|
||||
if (v > hi) hi = v;
|
||||
sum += v;
|
||||
}
|
||||
|
||||
return (min: lo, avg: sum / vals.length, max: hi);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class EventClusterer {
|
||||
final DuriinService _duriin;
|
||||
|
||||
|
||||
// neighbours whose distance from the seed is strictly greater than this
|
||||
// are dropped as off-topic. calibrated against observed api distances:
|
||||
// genuinely-same-event pairs land around 0.58–0.62, different-event-same-
|
||||
// topic pairs start around 0.70+. tighten if clusters start merging distinct
|
||||
// events, loosen if obvious same-event stories end up as singletons.
|
||||
final double distanceThreshold;
|
||||
|
||||
// hard cap on articles per cluster — keeps prompt size predictable
|
||||
final int maxClusterSize;
|
||||
|
||||
// how many neighbours to ask the api for per seed
|
||||
final int neighbourFetchLimit;
|
||||
|
||||
EventClusterer({
|
||||
DuriinService? duriin,
|
||||
this.distanceThreshold = 0.60,
|
||||
this.maxClusterSize = 10,
|
||||
this.neighbourFetchLimit = 25,
|
||||
}) : _duriin = duriin ?? DuriinService();
|
||||
|
||||
|
||||
Future<List<EventCluster>> cluster(List<FeedItem> articles) async {
|
||||
if (articles.isEmpty) return [];
|
||||
|
||||
// index by id for fast membership checks when neighbours come back
|
||||
final byId = <int, FeedItem>{};
|
||||
final withoutId = <FeedItem>[];
|
||||
|
||||
for (final a in articles) {
|
||||
if (a.id != null) {
|
||||
byId[a.id!] = a;
|
||||
} else {
|
||||
withoutId.add(a);
|
||||
}
|
||||
}
|
||||
|
||||
// work through newest first so the first signal surfaced is the freshest
|
||||
final queue = byId.values.toList()
|
||||
..sort((a, b) {
|
||||
final da = a.pubDate ?? DateTime.fromMillisecondsSinceEpoch(0);
|
||||
final db = b.pubDate ?? DateTime.fromMillisecondsSinceEpoch(0);
|
||||
return db.compareTo(da);
|
||||
});
|
||||
|
||||
final clustered = <int>{};
|
||||
final clusters = <EventCluster>[];
|
||||
|
||||
for (final seed in queue) {
|
||||
if (clustered.contains(seed.id)) continue;
|
||||
|
||||
final neighbours = await _duriin.findSimilar(
|
||||
seed.id!,
|
||||
limit: neighbourFetchLimit,
|
||||
);
|
||||
|
||||
|
||||
// keep only neighbours we actually fetched (same ticker / window)
|
||||
// and that are close enough to count as the same event.
|
||||
final members = <FeedItem>[seed];
|
||||
final distances = <int, double>{};
|
||||
|
||||
// dedupe just in case the api returns the seed in its own neighbour list
|
||||
final memberIds = <int>{seed.id!};
|
||||
|
||||
for (final hit in neighbours) {
|
||||
final nid = hit.item.id;
|
||||
if (nid == null) continue;
|
||||
if (nid == seed.id) continue;
|
||||
if (memberIds.contains(nid)) continue;
|
||||
|
||||
final inWindow = byId[nid];
|
||||
if (inWindow == null) continue;
|
||||
|
||||
final d = hit.distance;
|
||||
if (d == null) continue;
|
||||
if (d > distanceThreshold) continue;
|
||||
|
||||
members.add(inWindow);
|
||||
distances[nid] = d;
|
||||
memberIds.add(nid);
|
||||
|
||||
if (members.length >= maxClusterSize) break;
|
||||
}
|
||||
|
||||
for (final id in memberIds) {
|
||||
clustered.add(id);
|
||||
}
|
||||
|
||||
clusters.add(EventCluster(
|
||||
seed: seed,
|
||||
articles: members,
|
||||
distancesFromSeed: distances,
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
// articles with no id (shouldnt happen post-api-update, but just in case)
|
||||
// each becomes its own singleton cluster so we never silently drop them.
|
||||
for (final orphan in withoutId) {
|
||||
clusters.add(EventCluster(
|
||||
seed: orphan,
|
||||
articles: [orphan],
|
||||
distancesFromSeed: const {},
|
||||
));
|
||||
}
|
||||
|
||||
return clusters;
|
||||
}
|
||||
}
|
||||
@@ -1,23 +1,19 @@
|
||||
import 'dart:async';
|
||||
import 'dart:convert';
|
||||
import 'package:http/http.dart' as http;
|
||||
|
||||
/// OpenAI API client for Dart
|
||||
class OpenAI {
|
||||
/// OpenRouter API client for Dart
|
||||
class OpenRouter {
|
||||
final String apiKey;
|
||||
final String baseUrl;
|
||||
final http.Client _client;
|
||||
|
||||
OpenAI({
|
||||
OpenRouter({
|
||||
required this.apiKey,
|
||||
this.baseUrl = 'https://api.openai.com/v1',
|
||||
this.baseUrl = 'https://openrouter.ai/api/v1',
|
||||
http.Client? client,
|
||||
}) : _client = client ?? http.Client();
|
||||
|
||||
/// Access to chat completions API
|
||||
ChatCompletions get chat => ChatCompletions(this);
|
||||
|
||||
/// Access to embeddings API
|
||||
Embeddings get embeddings => Embeddings(this);
|
||||
|
||||
void dispose() {
|
||||
@@ -25,26 +21,19 @@ class OpenAI {
|
||||
}
|
||||
}
|
||||
|
||||
/// Chat completions API
|
||||
class ChatCompletions {
|
||||
final OpenAI _openai;
|
||||
final OpenRouter _openRouter;
|
||||
|
||||
ChatCompletions(this._openai);
|
||||
ChatCompletions(this._openRouter);
|
||||
|
||||
/// Access to completions endpoint
|
||||
Completions get completions => Completions(_openai);
|
||||
Completions get completions => Completions(_openRouter);
|
||||
}
|
||||
|
||||
/// Completions endpoint
|
||||
class Completions {
|
||||
final OpenAI _openai;
|
||||
final OpenRouter _openRouter;
|
||||
|
||||
Completions(this._openai);
|
||||
Completions(this._openRouter);
|
||||
|
||||
/// Create a chat completion
|
||||
///
|
||||
/// If [stream] is true, returns a Stream of ChatCompletionChunk
|
||||
/// If [stream] is false, returns a single ChatCompletion
|
||||
Future<dynamic> create({
|
||||
required String model,
|
||||
required List<dynamic> messages,
|
||||
@@ -84,17 +73,17 @@ class Completions {
|
||||
}
|
||||
|
||||
Future<ChatCompletion> _createCompletion(Map<String, dynamic> body) async {
|
||||
final response = await _openai._client.post(
|
||||
Uri.parse('${_openai.baseUrl}/chat/completions'),
|
||||
final response = await _openRouter._client.post(
|
||||
Uri.parse('${_openRouter.baseUrl}/chat/completions'),
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ${_openai.apiKey}',
|
||||
'Authorization': 'Bearer ${_openRouter.apiKey}',
|
||||
},
|
||||
body: jsonEncode(body),
|
||||
);
|
||||
|
||||
if (response.statusCode != 200) {
|
||||
throw OpenAIException(
|
||||
throw OpenRouterException(
|
||||
statusCode: response.statusCode,
|
||||
message: response.body,
|
||||
);
|
||||
@@ -107,21 +96,21 @@ class Completions {
|
||||
Map<String, dynamic> body) async* {
|
||||
final request = http.Request(
|
||||
'POST',
|
||||
Uri.parse('${_openai.baseUrl}/chat/completions'),
|
||||
Uri.parse('${_openRouter.baseUrl}/chat/completions'),
|
||||
);
|
||||
|
||||
request.headers.addAll({
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ${_openai.apiKey}',
|
||||
'Authorization': 'Bearer ${_openRouter.apiKey}',
|
||||
});
|
||||
|
||||
request.body = jsonEncode(body);
|
||||
|
||||
final streamedResponse = await _openai._client.send(request);
|
||||
final streamedResponse = await _openRouter._client.send(request);
|
||||
|
||||
if (streamedResponse.statusCode != 200) {
|
||||
final body = await streamedResponse.stream.bytesToString();
|
||||
throw OpenAIException(
|
||||
throw OpenRouterException(
|
||||
statusCode: streamedResponse.statusCode,
|
||||
message: body,
|
||||
);
|
||||
@@ -133,10 +122,10 @@ class Completions {
|
||||
|
||||
await for (final line in stream) {
|
||||
if (line.isEmpty) continue;
|
||||
if (line.startsWith(':')) continue; // Skip comments
|
||||
if (line.startsWith(':')) continue;
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
|
||||
final data = line.substring(6); // Remove 'data: ' prefix
|
||||
final data = line.substring(6);
|
||||
|
||||
if (data == '[DONE]') {
|
||||
break;
|
||||
@@ -146,14 +135,12 @@ class Completions {
|
||||
final json = jsonDecode(data);
|
||||
yield ChatCompletionChunk.fromJson(json);
|
||||
} catch (e) {
|
||||
// Skip malformed chunks
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Chat message
|
||||
class ChatMessage {
|
||||
final String role;
|
||||
final String content;
|
||||
@@ -183,7 +170,6 @@ class ChatMessage {
|
||||
ChatMessage(role: 'assistant', content: content);
|
||||
}
|
||||
|
||||
/// Stream options
|
||||
class StreamOptions {
|
||||
final bool includeUsage;
|
||||
|
||||
@@ -194,7 +180,6 @@ class StreamOptions {
|
||||
};
|
||||
}
|
||||
|
||||
/// Chat completion response (non-streaming)
|
||||
class ChatCompletion {
|
||||
final String id;
|
||||
final String object;
|
||||
@@ -224,7 +209,6 @@ class ChatCompletion {
|
||||
);
|
||||
}
|
||||
|
||||
/// Chat completion chunk (streaming)
|
||||
class ChatCompletionChunk {
|
||||
final String id;
|
||||
final String object;
|
||||
@@ -255,7 +239,6 @@ class ChatCompletionChunk {
|
||||
);
|
||||
}
|
||||
|
||||
/// Choice in non-streaming response
|
||||
class Choice {
|
||||
final int index;
|
||||
final ChatMessage message;
|
||||
@@ -274,7 +257,6 @@ class Choice {
|
||||
);
|
||||
}
|
||||
|
||||
/// Choice in streaming response
|
||||
class ChunkChoice {
|
||||
final int index;
|
||||
final Delta? delta;
|
||||
@@ -293,7 +275,6 @@ class ChunkChoice {
|
||||
);
|
||||
}
|
||||
|
||||
/// Delta content in streaming chunks
|
||||
class Delta {
|
||||
final String? role;
|
||||
final String? content;
|
||||
@@ -309,7 +290,6 @@ class Delta {
|
||||
);
|
||||
}
|
||||
|
||||
/// Token usage information
|
||||
class Usage {
|
||||
final int? promptTokens;
|
||||
final int? completionTokens;
|
||||
@@ -334,32 +314,29 @@ class Usage {
|
||||
};
|
||||
}
|
||||
|
||||
/// OpenAI API exception
|
||||
class OpenAIException implements Exception {
|
||||
class OpenRouterException implements Exception {
|
||||
final int statusCode;
|
||||
final String message;
|
||||
|
||||
OpenAIException({
|
||||
OpenRouterException({
|
||||
required this.statusCode,
|
||||
required this.message,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() => 'OpenAIException($statusCode): $message';
|
||||
String toString() => 'OpenRouterException($statusCode): $message';
|
||||
}
|
||||
|
||||
/// Embeddings API
|
||||
class Embeddings {
|
||||
final OpenAI _openai;
|
||||
final OpenRouter _openRouter;
|
||||
|
||||
Embeddings(this._openai);
|
||||
Embeddings(this._openRouter);
|
||||
|
||||
/// Create embeddings for input text
|
||||
Future<EmbeddingResponse> create({
|
||||
required String model,
|
||||
required dynamic input, // String or List<String>
|
||||
required dynamic input,
|
||||
String? user,
|
||||
String? encodingFormat, // 'float' or 'base64'
|
||||
String? encodingFormat,
|
||||
int? dimensions,
|
||||
}) async {
|
||||
final body = {
|
||||
@@ -370,17 +347,17 @@ class Embeddings {
|
||||
if (dimensions != null) 'dimensions': dimensions,
|
||||
};
|
||||
|
||||
final response = await _openai._client.post(
|
||||
Uri.parse('${_openai.baseUrl}/embeddings'),
|
||||
final response = await _openRouter._client.post(
|
||||
Uri.parse('${_openRouter.baseUrl}/embeddings'),
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ${_openai.apiKey}',
|
||||
'Authorization': 'Bearer ${_openRouter.apiKey}',
|
||||
},
|
||||
body: jsonEncode(body),
|
||||
);
|
||||
|
||||
if (response.statusCode != 200) {
|
||||
throw OpenAIException(
|
||||
throw OpenRouterException(
|
||||
statusCode: response.statusCode,
|
||||
message: response.body,
|
||||
);
|
||||
@@ -390,7 +367,6 @@ class Embeddings {
|
||||
}
|
||||
}
|
||||
|
||||
/// Embedding response
|
||||
class EmbeddingResponse {
|
||||
final String object;
|
||||
final List<Embedding> data;
|
||||
@@ -406,14 +382,13 @@ class EmbeddingResponse {
|
||||
|
||||
factory EmbeddingResponse.fromJson(Map<String, dynamic> json) =>
|
||||
EmbeddingResponse(
|
||||
object: json['object'],
|
||||
data: (json['data'] as List).map((e) => Embedding.fromJson(e)).toList(),
|
||||
model: json['model'],
|
||||
object: (json['object'] ?? '').toString(),
|
||||
data: (json['data'] as List? ?? []).map((e) => Embedding.fromJson(e)).toList(),
|
||||
model: (json['model'] ?? '').toString(),
|
||||
usage: json['usage'] != null ? Usage.fromJson(json['usage']) : null,
|
||||
);
|
||||
}
|
||||
|
||||
/// Individual embedding
|
||||
class Embedding {
|
||||
final String object;
|
||||
final int index;
|
||||
@@ -426,9 +401,10 @@ class Embedding {
|
||||
});
|
||||
|
||||
factory Embedding.fromJson(Map<String, dynamic> json) => Embedding(
|
||||
object: json['object'],
|
||||
index: json['index'],
|
||||
embedding: (json['embedding'] as List).map<double>((e) => (e as num).toDouble()).toList(),
|
||||
);
|
||||
|
||||
}
|
||||
object: (json['object'] ?? '').toString(),
|
||||
index: (json['index'] ?? 0) as int,
|
||||
embedding: (json['embedding'] as List? ?? [])
|
||||
.map<double>((e) => (e as num).toDouble())
|
||||
.toList(),
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// bucket 0..1 signal scores (probability, impact) into human labels. the
|
||||
// llm outputs a continuous number but the ui shouldnt pretend that number
|
||||
// means much beyond a rough band — "12.3%" reads as false precision.
|
||||
//
|
||||
// thresholds are deliberately a bit generous at the bottom so that the
|
||||
// failure-fallback path (0.0) lands clearly in NONE.
|
||||
|
||||
enum SignalBucket { none, low, medium, high }
|
||||
|
||||
|
||||
SignalBucket bucketFor(double score) {
|
||||
if (score < 0.15) return SignalBucket.none;
|
||||
if (score < 0.40) return SignalBucket.low;
|
||||
if (score < 0.70) return SignalBucket.medium;
|
||||
return SignalBucket.high;
|
||||
}
|
||||
|
||||
|
||||
String bucketLabel(double score) {
|
||||
switch (bucketFor(score)) {
|
||||
case SignalBucket.none: return "None";
|
||||
case SignalBucket.low: return "Low";
|
||||
case SignalBucket.medium: return "Medium";
|
||||
case SignalBucket.high: return "High";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,235 @@
|
||||
import "dart:convert";
|
||||
|
||||
import "package:capstone_project/models/event_signal.dart";
|
||||
import "package:capstone_project/utils/agrigator.dart";
|
||||
import "package:capstone_project/utils/event_clusterer.dart";
|
||||
import "package:capstone_project/utils/openrouter.dart";
|
||||
|
||||
|
||||
// per-article content cap in the prompt. higher = more context, more tokens.
|
||||
// tuned so an 8-article cluster stays well under 20k input tokens.
|
||||
const int _kContentCharCap = 1500;
|
||||
|
||||
|
||||
class SignalGenerator {
|
||||
final String apiKey;
|
||||
|
||||
SignalGenerator({required this.apiKey});
|
||||
|
||||
Future<List<EventSignal>> generateSignals(
|
||||
List<EventCluster> clusters, {
|
||||
required String ticker,
|
||||
required String companyName,
|
||||
}) async {
|
||||
final filtered = clusters.where((c) => c.articles.isNotEmpty).toList();
|
||||
|
||||
final results = await Future.wait(
|
||||
filtered.map(
|
||||
(c) => generateSignal(c, ticker: ticker, companyName: companyName),
|
||||
),
|
||||
);
|
||||
|
||||
|
||||
// sort by impact descending — impact is "how much should i care about this"
|
||||
// probability is the credibility gate, not the headline number
|
||||
results.sort((a, b) => b.impact.compareTo(a.impact));
|
||||
return results;
|
||||
}
|
||||
|
||||
Future<EventSignal> generateSignal(
|
||||
EventCluster cluster, {
|
||||
required String ticker,
|
||||
required String companyName,
|
||||
}) async {
|
||||
final openRouter = OpenRouter(apiKey: apiKey);
|
||||
final articles = cluster.articles;
|
||||
final eventId = _eventIdFor(articles);
|
||||
|
||||
try {
|
||||
final response = await openRouter.chat.completions.create(
|
||||
model: "openai/gpt-4.1-mini",
|
||||
messages: [
|
||||
ChatMessage.system(_systemPrompt).toJson(),
|
||||
ChatMessage.user(_buildPrompt(cluster, ticker: ticker, companyName: companyName)).toJson(),
|
||||
],
|
||||
temperature: 0.2,
|
||||
) as ChatCompletion;
|
||||
|
||||
final rawContent = response.choices.first.message.content.trim();
|
||||
final parsed = jsonDecode(_extractJson(rawContent)) as Map<String, dynamic>;
|
||||
final signal = EventSignal.fromJson(parsed, articles, eventIdOverride: eventId);
|
||||
|
||||
// override createdAt with median pub date from the article cluster
|
||||
return EventSignal(
|
||||
eventId: signal.eventId,
|
||||
eventSummary: signal.eventSummary,
|
||||
direction: signal.direction,
|
||||
nature: signal.nature,
|
||||
probability: signal.probability,
|
||||
impact: signal.impact,
|
||||
rationale: signal.rationale,
|
||||
articles: signal.articles,
|
||||
createdAt: medianPubDate(articles),
|
||||
);
|
||||
} catch (e) {
|
||||
print("Error generating signal: $e");
|
||||
return EventSignal(
|
||||
eventId: eventId,
|
||||
eventSummary: _fallbackSummary(articles),
|
||||
direction: "neutral",
|
||||
probability: 0.0,
|
||||
impact: 0.0,
|
||||
rationale: "Signal generation failed.",
|
||||
articles: articles,
|
||||
createdAt: medianPubDate(articles),
|
||||
);
|
||||
} finally {
|
||||
openRouter.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
static const String _systemPrompt = """
|
||||
You analyze clusters of business and finance news articles that plausibly concern a single asset (a stock, commodity, etc). Each cluster has been assembled via semantic similarity search — the articles are believed to be reporting on the same underlying event, not merely from the same time window. For each cluster you estimate how likely the event is real, how much the asset's price is likely to move in the short term, and the direction of that move.
|
||||
|
||||
Return valid JSON only, with exactly these keys:
|
||||
event_summary, direction, nature, probability, impact, rationale
|
||||
|
||||
Field definitions:
|
||||
|
||||
- probability (number, 0.0–1.0): likelihood that the underlying event is real / actually happening as reported. Grounded primarily in coverage — number of articles, number of distinct publishers, reputability of those publishers, and how tightly the cluster hangs together semantically (lower avg distance = stronger corroboration). This is NOT the probability that the price moves, and it is NOT conditional on direction. Anchors:
|
||||
* 0.1 — single article, unknown blog, speculative framing, no corroboration
|
||||
* 0.5 — several articles from mid-tier outlets, or mixed/conflicting accounts across publishers
|
||||
* 0.9 — wide coverage: many articles across multiple major wire services or papers of record (Reuters, Bloomberg, AP, WSJ, FT, NYT, etc.) converging on the same core facts
|
||||
|
||||
- impact (number, 0.0–1.0): expected magnitude of the asset's immediate price reaction over roughly the next few trading days, ASSUMING the event is real. Reasoned from what the event actually is, applied to this specific asset. Coverage volume is only a weak prior here — loud news is not the same as impactful news, and long-term significance is not the same as short-term reaction. Anchors (illustrative, oil-related asset):
|
||||
* 0.1 — an OPEC minister makes a vague forward-looking comment about prices
|
||||
* 0.5 — a refinery outage in a secondary producing region; a mid-sized earnings beat
|
||||
* 0.9 — Strait of Hormuz closure; a surprise OPEC+ production cut of material size; a major sanctions announcement hitting the asset's supply or demand
|
||||
|
||||
- direction (string, enum): "positive" | "negative" | "neutral" — expected directional bias of the immediate price reaction for this specific asset. Kept separate from impact so a large negative and a large positive are both high-impact.
|
||||
|
||||
- nature (string, enum): "forecasting" if the cluster is predicting or anticipating a future event, "reactive" if it is reporting on something that already happened.
|
||||
|
||||
- event_summary (string): one neutral sentence describing the event itself. No hedging, no direction words.
|
||||
|
||||
- rationale (string): a short paragraph (2–4 sentences) covering two things, in this order:
|
||||
1. What's actually happening in the cluster — more substantive than event_summary. Pull out the concrete facts: who did what, specific numbers, quoted figures, timelines, named actors. This is the reader learning what the news IS.
|
||||
2. The causal chain from event → price reaction for THIS asset. Why does this move the asset in the chosen direction, and why by the chosen magnitude? Reference the mechanism (supply, demand, competition, margins, guidance, sentiment, regulatory exposure, etc).
|
||||
DO NOT restate probability, the nature label, publisher count, source reputability, corroboration strength, or semantic tightness. All of that is shown in the UI alongside the rationale — repeating it wastes the only place the reader learns anything new. Focus on event substance and causal reasoning, not meta-commentary about the input data.
|
||||
|
||||
Important:
|
||||
* probability is about the event being real, not about price movement.
|
||||
* impact is about magnitude of short-term reaction, not long-term significance.
|
||||
* direction is separate from impact.
|
||||
* Return only the JSON object, no prose, no code fences.
|
||||
""";
|
||||
|
||||
String _buildPrompt(
|
||||
EventCluster cluster, {
|
||||
required String ticker,
|
||||
required String companyName,
|
||||
}) {
|
||||
final articles = cluster.articles;
|
||||
|
||||
final publishers = articles
|
||||
.map((a) => (a.source ?? "").trim())
|
||||
.where((s) => s.isNotEmpty)
|
||||
.toSet()
|
||||
.toList();
|
||||
|
||||
final buffer = StringBuffer();
|
||||
buffer.writeln("Asset: $companyName ($ticker)");
|
||||
buffer.writeln();
|
||||
|
||||
buffer.writeln("Coverage stats (computed, do not recount):");
|
||||
buffer.writeln(" Articles: ${articles.length}");
|
||||
buffer.writeln(" Distinct publishers: ${publishers.length}");
|
||||
if (publishers.isNotEmpty) {
|
||||
buffer.writeln(" Publishers: ${publishers.join(", ")}");
|
||||
} else {
|
||||
buffer.writeln(" Publishers: (none identified)");
|
||||
}
|
||||
|
||||
final stats = cluster.distanceStats();
|
||||
if (stats.min != null) {
|
||||
buffer.writeln(
|
||||
" Semantic tightness (distance from seed, 0=identical): "
|
||||
"min ${stats.min!.toStringAsFixed(3)}, "
|
||||
"avg ${stats.avg!.toStringAsFixed(3)}, "
|
||||
"max ${stats.max!.toStringAsFixed(3)}",
|
||||
);
|
||||
} else {
|
||||
buffer.writeln(" Semantic tightness: singleton cluster (no neighbours)");
|
||||
}
|
||||
|
||||
buffer.writeln();
|
||||
|
||||
buffer.writeln("Articles:");
|
||||
for (int i = 0; i < articles.length; i++) {
|
||||
final article = articles[i];
|
||||
buffer.writeln("${i + 1}. Title: ${article.title}");
|
||||
if ((article.source ?? "").trim().isNotEmpty) {
|
||||
buffer.writeln(" Publisher: ${article.source}");
|
||||
}
|
||||
|
||||
final desc = article.description.trim();
|
||||
if (desc.isNotEmpty) {
|
||||
buffer.writeln(" Description: $desc");
|
||||
}
|
||||
|
||||
final body = _clipContent(article.content);
|
||||
if (body.isNotEmpty) {
|
||||
buffer.writeln(" Content: $body");
|
||||
}
|
||||
|
||||
buffer.writeln(" Link: ${article.link}");
|
||||
}
|
||||
|
||||
buffer.writeln();
|
||||
buffer.writeln("Return a single JSON object with keys: event_summary, direction, nature, probability, impact, rationale.");
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
|
||||
// trim + truncate article body to the char cap. returns empty string if
|
||||
// theres nothing useful to include.
|
||||
String _clipContent(String content) {
|
||||
final trimmed = content.trim();
|
||||
if (trimmed.isEmpty) return "";
|
||||
|
||||
if (trimmed.length <= _kContentCharCap) {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
return "${trimmed.substring(0, _kContentCharCap)}...";
|
||||
}
|
||||
|
||||
String _extractJson(String content) {
|
||||
final start = content.indexOf("{");
|
||||
final end = content.lastIndexOf("}");
|
||||
|
||||
if (start == -1 || end == -1 || end < start) {
|
||||
throw const FormatException("No JSON object found in model response.");
|
||||
}
|
||||
|
||||
return content.substring(start, end + 1);
|
||||
}
|
||||
|
||||
String _fallbackSummary(List<FeedItem> articles) {
|
||||
if (articles.isEmpty) {
|
||||
return "Unknown event";
|
||||
}
|
||||
|
||||
return articles.first.title;
|
||||
}
|
||||
|
||||
// deterministic id from the sorted link set — same cluster re-run produces
|
||||
// the same id, which is handy for dedupe later.
|
||||
String _eventIdFor(List<FeedItem> articles) {
|
||||
final links = articles.map((a) => a.link).toList()..sort();
|
||||
final joined = links.join("|");
|
||||
final h = joined.hashCode & 0x7FFFFFFF;
|
||||
return "evt_${h.toRadixString(36)}";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user