Add version files and update imports for trip model; enhance error handling

This commit is contained in:
ImBenji
2026-03-27 21:17:56 +00:00
parent e41e14e252
commit 427bcadc77
89 changed files with 9455 additions and 395 deletions
+106 -44
View File
@@ -1,6 +1,6 @@
import "dart:typed_data";
import "package:docx_to_text/docx_to_text.dart";
import "../models/trip.dart";
import "../models/operations/trip.dart";
import "../exceptions/schedule_parse_exception.dart";
import "schedule_parser.dart";
@@ -50,7 +50,9 @@ class ArrivaScheduleParser implements ScheduleParser {
print("=== FOUND ${documentSections.length} SECTIONS ===");
for (var section in documentSections) {
print("Section: ${section.direction}, ${section.stations.length} stations, ${section.tripLines.length} trips");
print(
"Section: ${section.direction}, ${section.stations.length} stations, ${section.tripLines.length} trips",
);
}
if (documentSections.isEmpty) {
@@ -62,7 +64,9 @@ class ArrivaScheduleParser implements ScheduleParser {
for (var section in documentSections) {
final sectionTrips = _parseSectionTrips(section);
trips.addAll(sectionTrips);
print("✓ Parsed ${sectionTrips.length} trips from ${section.direction} section");
print(
"✓ Parsed ${sectionTrips.length} trips from ${section.direction} section",
);
}
// Step 4: Sort by scheduled time
@@ -74,12 +78,13 @@ class ArrivaScheduleParser implements ScheduleParser {
String _extractTextFromDocx(Uint8List bytes) {
try {
return docxToText(bytes);
} catch (e) {
} catch (e, stackTrace) {
print("Arriva parser document read failed: $e");
print(stackTrace);
throw ScheduleParseException("Failed to read document: $e");
}
}
String _formatTime(String rawTime) {
if (rawTime.length != 4) {
throw FormatException("Invalid time format: $rawTime");
@@ -109,14 +114,17 @@ class ArrivaScheduleParser implements ScheduleParser {
tripLines: [],
);
print("Found station header at line $i with ${stations.length} stations");
print(" Stations: ${stations.take(3).join(", ")} ... ${stations.skip(stations.length - 2).join(", ")}");
print(
"Found station header at line $i with ${stations.length} stations",
);
print(
" Stations: ${stations.take(3).join(", ")} ... ${stations.skip(stations.length - 2).join(", ")}",
);
continue;
}
// Check if this is a trip line
if (currentSection != null && _isTripLine(line)) {
// Infer direction from first trip line if not yet set
if (currentSection.direction == "unknown") {
currentSection.direction = _inferDirectionFromTripLine(line);
@@ -147,22 +155,68 @@ class ArrivaScheduleParser implements ScheduleParser {
// Split by whitespace and filter for potential station codes (3-8 uppercase letters)
final parts = line.split(RegExp(r"\s+"));
final potentialStations = parts
.where((part) => part.length >= 3 &&
part.length <= 8 &&
RegExp(r"^[A-Z]+$").hasMatch(part))
.where(
(part) =>
part.length >= 3 &&
part.length <= 8 &&
RegExp(r"^[A-Z]+$").hasMatch(part),
)
.toList();
if (potentialStations.length < 8) return null;
// Filter out common metadata words that appear in headers
const nonStationWords = {
"TRP", "DUTY", "BUS", "START", "END", "GAR", "DEP", "ARR",
"DENOTES", "FINISHES", "RELIEF", "TRIP", "NEXT", "NO", "AT",
"SPELL", "HOURS", "TOTAL", "LAYOVER", "MILES", "LIVE", "DEAD",
"MILEAGE", "TIME", "SIGN", "FORM", "NXT", "THIS", "HAS", "OR",
"FOR", "CHANGE", "SERVICE", "POINT", "LSN", "MAN", "RUI", "SN",
"ROUTE", "RUNNING", "PREV", "FIN", "ENTOD", "SOALL", "USHRS",
"ADDTL", "CASH", "TODAYS", "REL", "IEF",
"TRP",
"DUTY",
"BUS",
"START",
"END",
"GAR",
"DEP",
"ARR",
"DENOTES",
"FINISHES",
"RELIEF",
"TRIP",
"NEXT",
"NO",
"AT",
"SPELL",
"HOURS",
"TOTAL",
"LAYOVER",
"MILES",
"LIVE",
"DEAD",
"MILEAGE",
"TIME",
"SIGN",
"FORM",
"NXT",
"THIS",
"HAS",
"OR",
"FOR",
"CHANGE",
"SERVICE",
"POINT",
"LSN",
"MAN",
"RUI",
"SN",
"ROUTE",
"RUNNING",
"PREV",
"FIN",
"ENTOD",
"SOALL",
"USHRS",
"ADDTL",
"CASH",
"TODAYS",
"REL",
"IEF",
};
final stations = potentialStations
@@ -199,7 +253,9 @@ class ArrivaScheduleParser implements ScheduleParser {
// Note: running number might be "N503 EC" (with spaces) or "N 503 EC" or just "503 EC"
// Outbound: starts with HHMM_HHMM
final isOutboundFormat = RegExp(r"^\d{4}_\d{4}").hasMatch(line);
final isInboundFormat = RegExp(r"^\d+\s+\d+\s+(?:[NRF]\d+\s+|[NRF]\s+\d+\s+|F\s+|\d+\s+)EC").hasMatch(line);
final isInboundFormat = RegExp(
r"^\d+\s+\d+\s+(?:[NRF]\d+\s+|[NRF]\s+\d+\s+|F\s+|\d+\s+)EC",
).hasMatch(line);
if (isOutboundFormat) {
trip = _parseOutboundTrip(line, section.stations);
@@ -210,25 +266,28 @@ class ArrivaScheduleParser implements ScheduleParser {
if (trip != null) {
trips.add(trip);
} else {
final format = isOutboundFormat ? "outbound" : isInboundFormat ? "inbound" : "unknown";
print("Failed to parse $format line: ${line.substring(0, line.length > 80 ? 80 : line.length)}...");
final format = isOutboundFormat
? "outbound"
: isInboundFormat
? "inbound"
: "unknown";
print(
"Failed to parse $format line: ${line.substring(0, line.length > 80 ? 80 : line.length)}...",
);
}
}
return trips;
}
Trip? _parseInboundTrip(
String line,
List<String> stations,
) {
Trip? _parseInboundTrip(String line, List<String> stations) {
// INBOUND: trip duty running EC### ... HHMM_HHMM times...
var match = _inboundPattern.firstMatch(line);
if (match != null) {
final tripNumber = match.group(1)!;
final dutyNumber = match.group(2)!;
final tripType = match.group(3) ?? "";
final runningNumber = match.group(4)!;
final busWorkNumber = match.group(4)!;
final firstTime = match.group(6)!;
final secondTime = match.group(7)!;
final timesString = match.group(8) ?? "";
@@ -244,13 +303,15 @@ class ArrivaScheduleParser implements ScheduleParser {
return Trip(
tripNumber: tripNumber,
dutyNumber: dutyNumber,
runningNumber: runningNumber,
busWorkNumber: busWorkNumber,
scheduledTime: scheduledTime,
tripType: tripType,
isFinishing: false,
stationTimes: stationTimes,
stationOrder: stations,
direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
direction: (int.tryParse(tripNumber) ?? 0).isOdd
? "inbound"
: "outbound",
);
}
@@ -273,23 +334,22 @@ class ArrivaScheduleParser implements ScheduleParser {
return Trip(
tripNumber: tripNumber,
dutyNumber: dutyNumber,
runningNumber: dutyNumber,
busWorkNumber: dutyNumber,
scheduledTime: scheduledTime,
tripType: "F",
isFinishing: true,
stationTimes: stationTimes,
stationOrder: stations,
direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
direction: (int.tryParse(tripNumber) ?? 0).isOdd
? "inbound"
: "outbound",
);
}
return null;
}
Trip? _parseOutboundTrip(
String line,
List<String> stations,
) {
Trip? _parseOutboundTrip(String line, List<String> stations) {
// OUTBOUND: HHMM_HHMM times... EC### duty running trip
var match = _outboundPattern.firstMatch(line);
if (match != null) {
@@ -298,7 +358,7 @@ class ArrivaScheduleParser implements ScheduleParser {
final timesString = match.group(3) ?? "";
final dutyNumber = match.group(5)!;
final tripType = match.group(6) ?? "";
final runningNumber = match.group(7)!;
final busWorkNumber = match.group(7)!;
final tripNumber = match.group(8)!;
// Build complete time array: first_time, second_time, then remaining times
@@ -311,13 +371,15 @@ class ArrivaScheduleParser implements ScheduleParser {
return Trip(
tripNumber: tripNumber,
dutyNumber: dutyNumber,
runningNumber: runningNumber,
busWorkNumber: busWorkNumber,
scheduledTime: scheduledTime,
tripType: tripType,
isFinishing: false,
stationTimes: stationTimes,
stationOrder: stations,
direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
direction: (int.tryParse(tripNumber) ?? 0).isOdd
? "inbound"
: "outbound",
);
}
@@ -336,15 +398,18 @@ class ArrivaScheduleParser implements ScheduleParser {
final scheduledTime = _formatTime(firstTime);
return Trip(
tripNumber: dutyNumber, // Finishing trips may not have separate trip number
tripNumber:
dutyNumber, // Finishing trips may not have separate trip number
dutyNumber: dutyNumber,
runningNumber: dutyNumber,
busWorkNumber: dutyNumber,
scheduledTime: scheduledTime,
tripType: "F",
isFinishing: true,
stationTimes: stationTimes,
stationOrder: stations,
direction: (int.tryParse(dutyNumber) ?? 0).isOdd ? "inbound" : "outbound",
direction: (int.tryParse(dutyNumber) ?? 0).isOdd
? "inbound"
: "outbound",
);
}
@@ -354,10 +419,7 @@ class ArrivaScheduleParser implements ScheduleParser {
List<String> _extractTimesFromString(String timesString) {
// Extract all 4-digit times from the string
final pattern = RegExp(r"\b(\d{4})\b");
return pattern
.allMatches(timesString)
.map((m) => m.group(1)!)
.toList();
return pattern.allMatches(timesString).map((m) => m.group(1)!).toList();
}
List<String> _extractAllTimes(String line) {
+1 -1
View File
@@ -1,5 +1,5 @@
import "dart:typed_data";
import "../models/trip.dart";
import "../models/operations/trip.dart";
abstract class ScheduleParser {
Future<List<Trip>> parseBytes(Uint8List bytes);
+104 -34
View File
@@ -1,6 +1,6 @@
import "dart:typed_data";
import "package:syncfusion_flutter_pdf/pdf.dart";
import "../models/trip.dart";
import "../models/operations/trip.dart";
import "../exceptions/schedule_parse_exception.dart";
import "schedule_parser.dart";
@@ -20,14 +20,24 @@ class StagecoachScheduleParser implements ScheduleParser {
// Syncfusion splits text with newlines everywhere.
// Collapse each page into single line, normalize all whitespace runs to single space
final pages = rawPages.map((p) =>
p.replaceAll("\n", " ").replaceAll(RegExp(r"\s+"), " ").trim()
).toList();
final pages = rawPages
.map(
(p) => p.replaceAll("\n", " ").replaceAll(RegExp(r"\s+"), " ").trim(),
)
.toList();
print("=== STAGECOACH: ${pages.length} pages ===");
for (var i = 0; i < rawPages.length; i++) {
print("=== RAW PDF PAGE ${i + 1} TEXT START ===");
print(rawPages[i]);
print("=== RAW PDF PAGE ${i + 1} TEXT END ===");
}
// Extract route name — "ROUTE 099CSATURDAY..." or "ROUTE CRL SATURDAY..."
final routeMatch = RegExp(r"ROUTE\s+(\w+?)(?:SATURDAY|SUNDAY|MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY)", caseSensitive: false).firstMatch(pages.first);
final routeMatch = RegExp(
r"ROUTE\s+(\w+?)(?:SATURDAY|SUNDAY|MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY)",
caseSensitive: false,
).firstMatch(pages.first);
if (routeMatch != null) {
parsedRouteName = routeMatch.group(1);
} else {
@@ -57,7 +67,9 @@ class StagecoachScheduleParser implements ScheduleParser {
if (seenKeys.contains(key)) continue;
seenKeys.add(key);
allTrips.add(t);
print(" TRIP: ${t.tripNumber} | ${t.scheduledTime} | ${t.direction} | duty=${t.dutyNumber} run=${t.runningNumber} | stations=${t.stationTimes}");
print(
" TRIP: ${t.tripNumber} | ${t.scheduledTime} | ${t.direction} | duty=${t.dutyNumber} busWork=${t.busWorkNumber} | stations=${t.stationTimes}",
);
}
}
@@ -75,13 +87,15 @@ class StagecoachScheduleParser implements ScheduleParser {
final doc = PdfDocument(inputBytes: bytes);
final pages = <String>[];
for (int i = 0; i < doc.pages.count; i++) {
pages.add(PdfTextExtractor(doc).extractText(
startPageIndex: i, endPageIndex: i,
));
pages.add(
PdfTextExtractor(doc).extractText(startPageIndex: i, endPageIndex: i),
);
}
doc.dispose();
return pages;
} catch (e) {
} catch (e, stackTrace) {
print("Stagecoach parser PDF read failed: $e");
print(stackTrace);
throw ScheduleParseException("Failed to read PDF: $e");
}
}
@@ -99,7 +113,9 @@ class StagecoachScheduleParser implements ScheduleParser {
if (isOutbound) {
// Between "DEPT GAR" and "RLF DEPT TIME" (or just before the first duty ID)
final match = RegExp(r"DEPT GAR (.+?) RLF DEPT TIME").firstMatch(pageText);
final match = RegExp(
r"DEPT GAR (.+?) RLF DEPT TIME",
).firstMatch(pageText);
if (match != null) stationBlock = match.group(1);
} else {
// Between "RLF STS:" and "ARR GAR"
@@ -122,8 +138,16 @@ class StagecoachScheduleParser implements ScheduleParser {
var block = match.group(1)!;
// Remove known non-station words
for (final word in ["DEPT GAR", "ARR GAR", "RLF DEPT TIME", "DUTY END",
"NEXT BUS", "NEXT TRIP", "DRV SPELL", "RLF"]) {
for (final word in [
"DEPT GAR",
"ARR GAR",
"RLF DEPT TIME",
"DUTY END",
"NEXT BUS",
"NEXT TRIP",
"DRV SPELL",
"RLF",
]) {
block = block.replaceAll(word, " ");
}
return _pairStationTokens(block.trim());
@@ -163,13 +187,18 @@ class StagecoachScheduleParser implements ScheduleParser {
return stations;
}
List<Trip> _parseTripsFromPage(String pageText, String direction, List<String> stations) {
List<Trip> _parseTripsFromPage(
String pageText,
String direction,
List<String> stations,
) {
final trips = <Trip>[];
// Find all trip-start positions: DUTY_ID followed by RUN(digits) and BUS(3 digits)
// This pattern marks the beginning of a real trip entry
final tripStartPattern = RegExp(r"\b((\d/)?\d[A-Z]\d{3})\s+(\d+)\s+(\d{3})\b");
final tripStartPattern = RegExp(
r"\b((\d/)?\d[A-Z]\d{3})\s+(\d+)\s+(\d{3})\b",
);
final starts = tripStartPattern.allMatches(pageText).toList();
print(" Trip-start matches: ${starts.length}");
@@ -180,20 +209,31 @@ class StagecoachScheduleParser implements ScheduleParser {
for (var i = 0; i < starts.length; i++) {
final segStart = starts[i].start;
// segment ends where the next trip starts, or at end of text
final segEnd = (i + 1 < starts.length) ? starts[i + 1].start : pageText.length;
final segEnd = (i + 1 < starts.length)
? starts[i + 1].start
: pageText.length;
final segment = pageText.substring(segStart, segEnd).trim();
final dutyId = starts[i].group(1)!;
final runNumber = starts[i].group(3)!;
final busWorkingNo = starts[i].group(4)!;
final trip = _parseTripFromSegment(segment, dutyId, runNumber, busWorkingNo, direction, stations);
final trip = _parseTripFromSegment(
segment,
dutyId,
runNumber,
busWorkingNo,
direction,
stations,
);
if (trip != null) {
trips.add(trip);
parsed++;
} else {
failed++;
final preview = segment.length > 120 ? segment.substring(0, 120) : segment;
final preview = segment.length > 120
? segment.substring(0, 120)
: segment;
print(" Skip: $preview");
}
}
@@ -204,20 +244,32 @@ class StagecoachScheduleParser implements ScheduleParser {
}
Trip? _parseTripFromSegment(
String segment, String dutyId, String runNumber, String busWorkingNo,
String direction, List<String> stations,
String segment,
String dutyId,
String runNumber,
String busWorkingNo,
String direction,
List<String> stations,
) {
// Skip header junk
if (segment.contains("TRIP NO") ||
segment.contains("DUTY START") || segment.contains("SCHEDULE DATE")) {
segment.contains("DUTY START") ||
segment.contains("SCHEDULE DATE")) {
return null;
}
// Skip deadhead/light runs — but only if LIGHT appears early in the segment
// (before station times), not in trailing text from the next concatenated entry
final lightMatch = RegExp(r"LIGHT", caseSensitive: false).firstMatch(segment);
final lightMatch = RegExp(
r"LIGHT",
caseSensitive: false,
).firstMatch(segment);
if (lightMatch != null) {
final headerEnd = RegExp(RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}").firstMatch(segment)?.end ?? 0;
final headerEnd =
RegExp(
RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}",
).firstMatch(segment)?.end ??
0;
// if LIGHT is within 40 chars of the header, its a genuine deadhead
if (lightMatch.start - headerEnd < 40) {
return null;
@@ -225,14 +277,21 @@ class StagecoachScheduleParser implements ScheduleParser {
}
// Strip the leading "DUTY RUN BUS" we already extracted
final afterHeader = segment.substring(
RegExp(RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}").firstMatch(segment)!.end
).trim();
final afterHeader = segment
.substring(
RegExp(
RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}",
).firstMatch(segment)!.end,
)
.trim();
// Strip relief/garage prefix to get to the times
var timesSection = afterHeader;
// remove leading relief markers and garage name
timesSection = timesSection.replaceFirst(RegExp(r"^(RR|FR|FRX|FX|R)\s*"), "");
timesSection = timesSection.replaceFirst(
RegExp(r"^(RR|FR|FRX|FX|R)\s*"),
"",
);
timesSection = timesSection.replaceFirst(RegExp(r"^HomeGara\s*"), "");
// Also remove leading text from inbound like just "R " or "F "
timesSection = timesSection.replaceFirst(RegExp(r"^(F|R)\s+"), "");
@@ -249,8 +308,18 @@ class StagecoachScheduleParser implements ScheduleParser {
if (times.isEmpty) return null;
// For garage trips, first time is the garage departure — skip it
final isFromGarage = afterHeader.contains("HomeGara");
// For garage-prefixed trips, first time is often a non-route garage departure.
// Stagecoach extracts vary ("HomeGara", "DEPT GAR", etc), so use multiple hints.
var isFromGarage = RegExp(
r"\b(HomeGara|DEPT\s*GAR|ARR\s*GAR)\b",
caseSensitive: false,
).hasMatch(afterHeader);
// Practical fallback: outbound rows commonly include one leading garage time.
if (!isFromGarage &&
direction == "outbound" &&
times.length > stations.length) {
isFromGarage = true;
}
int firstStationIdx = 0;
if (isFromGarage && times.length > 1) {
firstStationIdx = 1;
@@ -271,7 +340,8 @@ class StagecoachScheduleParser implements ScheduleParser {
final scheduledTime = _formatTime(times[firstStationIdx]);
String tripType = "";
if (afterHeader.startsWith("RR") || afterHeader.startsWith("R")) tripType = "R";
if (afterHeader.startsWith("RR") || afterHeader.startsWith("R"))
tripType = "R";
final actualDirection = dutyId.startsWith("2/") ? "inbound" : direction;
@@ -283,9 +353,9 @@ class StagecoachScheduleParser implements ScheduleParser {
return Trip(
scheduledTime: scheduledTime,
tripNumber: dutyId,
dutyNumber: busWorkingNo,
runningNumber: runNumber,
tripNumber: runNumber,
dutyNumber: dutyId,
busWorkNumber: busWorkingNo,
tripType: tripType,
isFinishing: segment.contains("BUS FIN"),
stationTimes: stationTimes,