Add version files and update imports for trip model; enhance error handling

2026-03-27 21:17:56 +00:00
parent e41e14e252
commit 427bcadc77
89 changed files with 9455 additions and 395 deletions
@@ -1,6 +1,6 @@
 import "dart:typed_data";
 import "package:docx_to_text/docx_to_text.dart";
-import "../models/trip.dart";
+import "../models/operations/trip.dart";
 import "../exceptions/schedule_parse_exception.dart";
 import "schedule_parser.dart";

@@ -50,7 +50,9 @@ class ArrivaScheduleParser implements ScheduleParser {

    print("=== FOUND ${documentSections.length} SECTIONS ===");
    for (var section in documentSections) {
-      print("Section: ${section.direction}, ${section.stations.length} stations, ${section.tripLines.length} trips");
+      print(
+        "Section: ${section.direction}, ${section.stations.length} stations, ${section.tripLines.length} trips",
+      );
    }

    if (documentSections.isEmpty) {
@@ -62,7 +64,9 @@ class ArrivaScheduleParser implements ScheduleParser {
    for (var section in documentSections) {
      final sectionTrips = _parseSectionTrips(section);
      trips.addAll(sectionTrips);
-      print("✓ Parsed ${sectionTrips.length} trips from ${section.direction} section");
+      print(
+        "✓ Parsed ${sectionTrips.length} trips from ${section.direction} section",
+      );
    }

    // Step 4: Sort by scheduled time
@@ -74,12 +78,13 @@ class ArrivaScheduleParser implements ScheduleParser {
  String _extractTextFromDocx(Uint8List bytes) {
    try {
      return docxToText(bytes);
-    } catch (e) {
+    } catch (e, stackTrace) {
+      print("Arriva parser document read failed: $e");
+      print(stackTrace);
      throw ScheduleParseException("Failed to read document: $e");
    }
  }

-
  String _formatTime(String rawTime) {
    if (rawTime.length != 4) {
      throw FormatException("Invalid time format: $rawTime");
@@ -109,14 +114,17 @@ class ArrivaScheduleParser implements ScheduleParser {
          tripLines: [],
        );

-        print("Found station header at line $i with ${stations.length} stations");
-        print("  Stations: ${stations.take(3).join(", ")} ... ${stations.skip(stations.length - 2).join(", ")}");
+        print(
+          "Found station header at line $i with ${stations.length} stations",
+        );
+        print(
+          "  Stations: ${stations.take(3).join(", ")} ... ${stations.skip(stations.length - 2).join(", ")}",
+        );
        continue;
      }

      // Check if this is a trip line
      if (currentSection != null && _isTripLine(line)) {
-
        // Infer direction from first trip line if not yet set
        if (currentSection.direction == "unknown") {
          currentSection.direction = _inferDirectionFromTripLine(line);
@@ -147,22 +155,68 @@ class ArrivaScheduleParser implements ScheduleParser {
    // Split by whitespace and filter for potential station codes (3-8 uppercase letters)
    final parts = line.split(RegExp(r"\s+"));
    final potentialStations = parts
-        .where((part) => part.length >= 3 &&
-                        part.length <= 8 &&
-                        RegExp(r"^[A-Z]+$").hasMatch(part))
+        .where(
+          (part) =>
+              part.length >= 3 &&
+              part.length <= 8 &&
+              RegExp(r"^[A-Z]+$").hasMatch(part),
+        )
        .toList();

    if (potentialStations.length < 8) return null;

    // Filter out common metadata words that appear in headers
    const nonStationWords = {
-      "TRP", "DUTY", "BUS", "START", "END", "GAR", "DEP", "ARR",
-      "DENOTES", "FINISHES", "RELIEF", "TRIP", "NEXT", "NO", "AT",
-      "SPELL", "HOURS", "TOTAL", "LAYOVER", "MILES", "LIVE", "DEAD",
-      "MILEAGE", "TIME", "SIGN", "FORM", "NXT", "THIS", "HAS", "OR",
-      "FOR", "CHANGE", "SERVICE", "POINT", "LSN", "MAN", "RUI", "SN",
-      "ROUTE", "RUNNING", "PREV", "FIN", "ENTOD", "SOALL", "USHRS",
-      "ADDTL", "CASH", "TODAYS", "REL", "IEF",
+      "TRP",
+      "DUTY",
+      "BUS",
+      "START",
+      "END",
+      "GAR",
+      "DEP",
+      "ARR",
+      "DENOTES",
+      "FINISHES",
+      "RELIEF",
+      "TRIP",
+      "NEXT",
+      "NO",
+      "AT",
+      "SPELL",
+      "HOURS",
+      "TOTAL",
+      "LAYOVER",
+      "MILES",
+      "LIVE",
+      "DEAD",
+      "MILEAGE",
+      "TIME",
+      "SIGN",
+      "FORM",
+      "NXT",
+      "THIS",
+      "HAS",
+      "OR",
+      "FOR",
+      "CHANGE",
+      "SERVICE",
+      "POINT",
+      "LSN",
+      "MAN",
+      "RUI",
+      "SN",
+      "ROUTE",
+      "RUNNING",
+      "PREV",
+      "FIN",
+      "ENTOD",
+      "SOALL",
+      "USHRS",
+      "ADDTL",
+      "CASH",
+      "TODAYS",
+      "REL",
+      "IEF",
    };

    final stations = potentialStations
@@ -199,7 +253,9 @@ class ArrivaScheduleParser implements ScheduleParser {
      // Note: running number might be "N503  EC" (with spaces) or "N 503 EC" or just "503 EC"
      // Outbound: starts with HHMM_HHMM
      final isOutboundFormat = RegExp(r"^\d{4}_\d{4}").hasMatch(line);
-      final isInboundFormat = RegExp(r"^\d+\s+\d+\s+(?:[NRF]\d+\s+|[NRF]\s+\d+\s+|F\s+|\d+\s+)EC").hasMatch(line);
+      final isInboundFormat = RegExp(
+        r"^\d+\s+\d+\s+(?:[NRF]\d+\s+|[NRF]\s+\d+\s+|F\s+|\d+\s+)EC",
+      ).hasMatch(line);

      if (isOutboundFormat) {
        trip = _parseOutboundTrip(line, section.stations);
@@ -210,25 +266,28 @@ class ArrivaScheduleParser implements ScheduleParser {
      if (trip != null) {
        trips.add(trip);
      } else {
-        final format = isOutboundFormat ? "outbound" : isInboundFormat ? "inbound" : "unknown";
-        print("Failed to parse $format line: ${line.substring(0, line.length > 80 ? 80 : line.length)}...");
+        final format = isOutboundFormat
+            ? "outbound"
+            : isInboundFormat
+            ? "inbound"
+            : "unknown";
+        print(
+          "Failed to parse $format line: ${line.substring(0, line.length > 80 ? 80 : line.length)}...",
+        );
      }
    }

    return trips;
  }

-  Trip? _parseInboundTrip(
-    String line,
-    List<String> stations,
-  ) {
+  Trip? _parseInboundTrip(String line, List<String> stations) {
    // INBOUND: trip duty running EC### ... HHMM_HHMM times...
    var match = _inboundPattern.firstMatch(line);
    if (match != null) {
      final tripNumber = match.group(1)!;
      final dutyNumber = match.group(2)!;
      final tripType = match.group(3) ?? "";
-      final runningNumber = match.group(4)!;
+      final busWorkNumber = match.group(4)!;
      final firstTime = match.group(6)!;
      final secondTime = match.group(7)!;
      final timesString = match.group(8) ?? "";
@@ -244,13 +303,15 @@ class ArrivaScheduleParser implements ScheduleParser {
      return Trip(
        tripNumber: tripNumber,
        dutyNumber: dutyNumber,
-        runningNumber: runningNumber,
+        busWorkNumber: busWorkNumber,
        scheduledTime: scheduledTime,
        tripType: tripType,
        isFinishing: false,
        stationTimes: stationTimes,
        stationOrder: stations,
-        direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
+        direction: (int.tryParse(tripNumber) ?? 0).isOdd
+            ? "inbound"
+            : "outbound",
      );
    }

@@ -273,23 +334,22 @@ class ArrivaScheduleParser implements ScheduleParser {
      return Trip(
        tripNumber: tripNumber,
        dutyNumber: dutyNumber,
-        runningNumber: dutyNumber,
+        busWorkNumber: dutyNumber,
        scheduledTime: scheduledTime,
        tripType: "F",
        isFinishing: true,
        stationTimes: stationTimes,
        stationOrder: stations,
-        direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
+        direction: (int.tryParse(tripNumber) ?? 0).isOdd
+            ? "inbound"
+            : "outbound",
      );
    }

    return null;
  }

-  Trip? _parseOutboundTrip(
-    String line,
-    List<String> stations,
-  ) {
+  Trip? _parseOutboundTrip(String line, List<String> stations) {
    // OUTBOUND: HHMM_HHMM times... EC### duty running trip
    var match = _outboundPattern.firstMatch(line);
    if (match != null) {
@@ -298,7 +358,7 @@ class ArrivaScheduleParser implements ScheduleParser {
      final timesString = match.group(3) ?? "";
      final dutyNumber = match.group(5)!;
      final tripType = match.group(6) ?? "";
-      final runningNumber = match.group(7)!;
+      final busWorkNumber = match.group(7)!;
      final tripNumber = match.group(8)!;

      // Build complete time array: first_time, second_time, then remaining times
@@ -311,13 +371,15 @@ class ArrivaScheduleParser implements ScheduleParser {
      return Trip(
        tripNumber: tripNumber,
        dutyNumber: dutyNumber,
-        runningNumber: runningNumber,
+        busWorkNumber: busWorkNumber,
        scheduledTime: scheduledTime,
        tripType: tripType,
        isFinishing: false,
        stationTimes: stationTimes,
        stationOrder: stations,
-        direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
+        direction: (int.tryParse(tripNumber) ?? 0).isOdd
+            ? "inbound"
+            : "outbound",
      );
    }

@@ -336,15 +398,18 @@ class ArrivaScheduleParser implements ScheduleParser {
      final scheduledTime = _formatTime(firstTime);

      return Trip(
-        tripNumber: dutyNumber, // Finishing trips may not have separate trip number
+        tripNumber:
+            dutyNumber, // Finishing trips may not have separate trip number
        dutyNumber: dutyNumber,
-        runningNumber: dutyNumber,
+        busWorkNumber: dutyNumber,
        scheduledTime: scheduledTime,
        tripType: "F",
        isFinishing: true,
        stationTimes: stationTimes,
        stationOrder: stations,
-        direction: (int.tryParse(dutyNumber) ?? 0).isOdd ? "inbound" : "outbound",
+        direction: (int.tryParse(dutyNumber) ?? 0).isOdd
+            ? "inbound"
+            : "outbound",
      );
    }

@@ -354,10 +419,7 @@ class ArrivaScheduleParser implements ScheduleParser {
  List<String> _extractTimesFromString(String timesString) {
    // Extract all 4-digit times from the string
    final pattern = RegExp(r"\b(\d{4})\b");
-    return pattern
-        .allMatches(timesString)
-        .map((m) => m.group(1)!)
-        .toList();
+    return pattern.allMatches(timesString).map((m) => m.group(1)!).toList();
  }

  List<String> _extractAllTimes(String line) {
@@ -1,5 +1,5 @@
 import "dart:typed_data";
-import "../models/trip.dart";
+import "../models/operations/trip.dart";

 abstract class ScheduleParser {
  Future<List<Trip>> parseBytes(Uint8List bytes);
@@ -1,6 +1,6 @@
 import "dart:typed_data";
 import "package:syncfusion_flutter_pdf/pdf.dart";
-import "../models/trip.dart";
+import "../models/operations/trip.dart";
 import "../exceptions/schedule_parse_exception.dart";
 import "schedule_parser.dart";

@@ -20,14 +20,24 @@ class StagecoachScheduleParser implements ScheduleParser {

    // Syncfusion splits text with newlines everywhere.
    // Collapse each page into single line, normalize all whitespace runs to single space
-    final pages = rawPages.map((p) =>
-      p.replaceAll("\n", " ").replaceAll(RegExp(r"\s+"), " ").trim()
-    ).toList();
+    final pages = rawPages
+        .map(
+          (p) => p.replaceAll("\n", " ").replaceAll(RegExp(r"\s+"), " ").trim(),
+        )
+        .toList();

    print("=== STAGECOACH: ${pages.length} pages ===");
+    for (var i = 0; i < rawPages.length; i++) {
+      print("=== RAW PDF PAGE ${i + 1} TEXT START ===");
+      print(rawPages[i]);
+      print("=== RAW PDF PAGE ${i + 1} TEXT END ===");
+    }

    // Extract route name — "ROUTE 099CSATURDAY..." or "ROUTE CRL SATURDAY..."
-    final routeMatch = RegExp(r"ROUTE\s+(\w+?)(?:SATURDAY|SUNDAY|MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY)", caseSensitive: false).firstMatch(pages.first);
+    final routeMatch = RegExp(
+      r"ROUTE\s+(\w+?)(?:SATURDAY|SUNDAY|MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY)",
+      caseSensitive: false,
+    ).firstMatch(pages.first);
    if (routeMatch != null) {
      parsedRouteName = routeMatch.group(1);
    } else {
@@ -57,7 +67,9 @@ class StagecoachScheduleParser implements ScheduleParser {
        if (seenKeys.contains(key)) continue;
        seenKeys.add(key);
        allTrips.add(t);
-        print("  TRIP: ${t.tripNumber} | ${t.scheduledTime} | ${t.direction} | duty=${t.dutyNumber} run=${t.runningNumber} | stations=${t.stationTimes}");
+        print(
+          "  TRIP: ${t.tripNumber} | ${t.scheduledTime} | ${t.direction} | duty=${t.dutyNumber} busWork=${t.busWorkNumber} | stations=${t.stationTimes}",
+        );
      }
    }

@@ -75,13 +87,15 @@ class StagecoachScheduleParser implements ScheduleParser {
      final doc = PdfDocument(inputBytes: bytes);
      final pages = <String>[];
      for (int i = 0; i < doc.pages.count; i++) {
-        pages.add(PdfTextExtractor(doc).extractText(
-          startPageIndex: i, endPageIndex: i,
-        ));
+        pages.add(
+          PdfTextExtractor(doc).extractText(startPageIndex: i, endPageIndex: i),
+        );
      }
      doc.dispose();
      return pages;
-    } catch (e) {
+    } catch (e, stackTrace) {
+      print("Stagecoach parser PDF read failed: $e");
+      print(stackTrace);
      throw ScheduleParseException("Failed to read PDF: $e");
    }
  }
@@ -99,7 +113,9 @@ class StagecoachScheduleParser implements ScheduleParser {

    if (isOutbound) {
      // Between "DEPT GAR" and "RLF DEPT TIME" (or just before the first duty ID)
-      final match = RegExp(r"DEPT GAR (.+?) RLF DEPT TIME").firstMatch(pageText);
+      final match = RegExp(
+        r"DEPT GAR (.+?) RLF DEPT TIME",
+      ).firstMatch(pageText);
      if (match != null) stationBlock = match.group(1);
    } else {
      // Between "RLF STS:" and "ARR GAR"
@@ -122,8 +138,16 @@ class StagecoachScheduleParser implements ScheduleParser {

    var block = match.group(1)!;
    // Remove known non-station words
-    for (final word in ["DEPT GAR", "ARR GAR", "RLF DEPT TIME", "DUTY END",
-        "NEXT BUS", "NEXT TRIP", "DRV SPELL", "RLF"]) {
+    for (final word in [
+      "DEPT GAR",
+      "ARR GAR",
+      "RLF DEPT TIME",
+      "DUTY END",
+      "NEXT BUS",
+      "NEXT TRIP",
+      "DRV SPELL",
+      "RLF",
+    ]) {
      block = block.replaceAll(word, " ");
    }
    return _pairStationTokens(block.trim());
@@ -163,13 +187,18 @@ class StagecoachScheduleParser implements ScheduleParser {
    return stations;
  }

-
-  List<Trip> _parseTripsFromPage(String pageText, String direction, List<String> stations) {
+  List<Trip> _parseTripsFromPage(
+    String pageText,
+    String direction,
+    List<String> stations,
+  ) {
    final trips = <Trip>[];

    // Find all trip-start positions: DUTY_ID followed by RUN(digits) and BUS(3 digits)
    // This pattern marks the beginning of a real trip entry
-    final tripStartPattern = RegExp(r"\b((\d/)?\d[A-Z]\d{3})\s+(\d+)\s+(\d{3})\b");
+    final tripStartPattern = RegExp(
+      r"\b((\d/)?\d[A-Z]\d{3})\s+(\d+)\s+(\d{3})\b",
+    );
    final starts = tripStartPattern.allMatches(pageText).toList();

    print("  Trip-start matches: ${starts.length}");
@@ -180,20 +209,31 @@ class StagecoachScheduleParser implements ScheduleParser {
    for (var i = 0; i < starts.length; i++) {
      final segStart = starts[i].start;
      // segment ends where the next trip starts, or at end of text
-      final segEnd = (i + 1 < starts.length) ? starts[i + 1].start : pageText.length;
+      final segEnd = (i + 1 < starts.length)
+          ? starts[i + 1].start
+          : pageText.length;
      final segment = pageText.substring(segStart, segEnd).trim();

      final dutyId = starts[i].group(1)!;
      final runNumber = starts[i].group(3)!;
      final busWorkingNo = starts[i].group(4)!;

-      final trip = _parseTripFromSegment(segment, dutyId, runNumber, busWorkingNo, direction, stations);
+      final trip = _parseTripFromSegment(
+        segment,
+        dutyId,
+        runNumber,
+        busWorkingNo,
+        direction,
+        stations,
+      );
      if (trip != null) {
        trips.add(trip);
        parsed++;
      } else {
        failed++;
-        final preview = segment.length > 120 ? segment.substring(0, 120) : segment;
+        final preview = segment.length > 120
+            ? segment.substring(0, 120)
+            : segment;
        print("    Skip: $preview");
      }
    }
@@ -204,20 +244,32 @@ class StagecoachScheduleParser implements ScheduleParser {
  }

  Trip? _parseTripFromSegment(
-    String segment, String dutyId, String runNumber, String busWorkingNo,
-    String direction, List<String> stations,
+    String segment,
+    String dutyId,
+    String runNumber,
+    String busWorkingNo,
+    String direction,
+    List<String> stations,
  ) {
    // Skip header junk
    if (segment.contains("TRIP NO") ||
-        segment.contains("DUTY START") || segment.contains("SCHEDULE DATE")) {
+        segment.contains("DUTY START") ||
+        segment.contains("SCHEDULE DATE")) {
      return null;
    }

    // Skip deadhead/light runs — but only if LIGHT appears early in the segment
    // (before station times), not in trailing text from the next concatenated entry
-    final lightMatch = RegExp(r"LIGHT", caseSensitive: false).firstMatch(segment);
+    final lightMatch = RegExp(
+      r"LIGHT",
+      caseSensitive: false,
+    ).firstMatch(segment);
    if (lightMatch != null) {
-      final headerEnd = RegExp(RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}").firstMatch(segment)?.end ?? 0;
+      final headerEnd =
+          RegExp(
+            RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}",
+          ).firstMatch(segment)?.end ??
+          0;
      // if LIGHT is within 40 chars of the header, its a genuine deadhead
      if (lightMatch.start - headerEnd < 40) {
        return null;
@@ -225,14 +277,21 @@ class StagecoachScheduleParser implements ScheduleParser {
    }

    // Strip the leading "DUTY RUN BUS" we already extracted
-    final afterHeader = segment.substring(
-      RegExp(RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}").firstMatch(segment)!.end
-    ).trim();
+    final afterHeader = segment
+        .substring(
+          RegExp(
+            RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}",
+          ).firstMatch(segment)!.end,
+        )
+        .trim();

    // Strip relief/garage prefix to get to the times
    var timesSection = afterHeader;
    // remove leading relief markers and garage name
-    timesSection = timesSection.replaceFirst(RegExp(r"^(RR|FR|FRX|FX|R)\s*"), "");
+    timesSection = timesSection.replaceFirst(
+      RegExp(r"^(RR|FR|FRX|FX|R)\s*"),
+      "",
+    );
    timesSection = timesSection.replaceFirst(RegExp(r"^HomeGara\s*"), "");
    // Also remove leading text from inbound like just "R " or "F "
    timesSection = timesSection.replaceFirst(RegExp(r"^(F|R)\s+"), "");
@@ -249,8 +308,18 @@ class StagecoachScheduleParser implements ScheduleParser {

    if (times.isEmpty) return null;

-    // For garage trips, first time is the garage departure — skip it
-    final isFromGarage = afterHeader.contains("HomeGara");
+    // For garage-prefixed trips, first time is often a non-route garage departure.
+    // Stagecoach extracts vary ("HomeGara", "DEPT GAR", etc), so use multiple hints.
+    var isFromGarage = RegExp(
+      r"\b(HomeGara|DEPT\s*GAR|ARR\s*GAR)\b",
+      caseSensitive: false,
+    ).hasMatch(afterHeader);
+    // Practical fallback: outbound rows commonly include one leading garage time.
+    if (!isFromGarage &&
+        direction == "outbound" &&
+        times.length > stations.length) {
+      isFromGarage = true;
+    }
    int firstStationIdx = 0;
    if (isFromGarage && times.length > 1) {
      firstStationIdx = 1;
@@ -271,7 +340,8 @@ class StagecoachScheduleParser implements ScheduleParser {
    final scheduledTime = _formatTime(times[firstStationIdx]);

    String tripType = "";
-    if (afterHeader.startsWith("RR") || afterHeader.startsWith("R")) tripType = "R";
+    if (afterHeader.startsWith("RR") || afterHeader.startsWith("R"))
+      tripType = "R";

    final actualDirection = dutyId.startsWith("2/") ? "inbound" : direction;

@@ -283,9 +353,9 @@ class StagecoachScheduleParser implements ScheduleParser {

    return Trip(
      scheduledTime: scheduledTime,
-      tripNumber: dutyId,
-      dutyNumber: busWorkingNo,
-      runningNumber: runNumber,
+      tripNumber: runNumber,
+      dutyNumber: dutyId,
+      busWorkNumber: busWorkingNo,
      tripType: tripType,
      isFinishing: segment.contains("BUS FIN"),
      stationTimes: stationTimes,