Add initial project files and configurations for bus_running_record app
This commit is contained in:
@@ -0,0 +1,409 @@
|
||||
import "dart:typed_data";
|
||||
import "package:docx_to_text/docx_to_text.dart";
|
||||
import "../models/trip.dart";
|
||||
import "../exceptions/schedule_parse_exception.dart";
|
||||
import "schedule_parser.dart";
|
||||
|
||||
class ArrivaScheduleParser implements ScheduleParser {
|
||||
// OUTBOUND FORMAT: HHMM_HHMM times... EC### duty running trip
|
||||
// Times appear on the LEFT, trip info on the RIGHT
|
||||
static final _outboundPattern = RegExp(
|
||||
r"^(\d{4})_(\d{4})\s+(.*?)\s+(EC\d+)\s+(\d+)\s+([NRF]?)(\d+)\s+(\d+)",
|
||||
);
|
||||
|
||||
static final _outboundFinishingPattern = RegExp(
|
||||
r"^(\d{4})_(\d{4})\s+(.*?)\s+(EC\d+)\s+(\d+)\s+F",
|
||||
);
|
||||
|
||||
// INBOUND FORMAT: trip duty running EC### ... HHMM_HHMM times...
|
||||
// Trip info appears on the LEFT, times on the RIGHT
|
||||
// Note: running number may have N/R/F prefix like "N503" or be separate like "N 503"
|
||||
static final _inboundPattern = RegExp(
|
||||
r"^(\d+)\s+(\d+)\s+([NRF]?)(\d+)\s+(EC\d+)\s+.*?(\d{4})_(\d{4})\s+([\d\s]+)",
|
||||
);
|
||||
|
||||
static final _inboundFinishingPattern = RegExp(
|
||||
r"^(\d+)\s+(\d+)\s+F\s+(EC\d+)\s+.*?(\d{4})_(\d{4})(.*)",
|
||||
);
|
||||
|
||||
@override
|
||||
bool canParse(String content) {
|
||||
// Check if content looks like Arriva format
|
||||
return content.contains("EC5") &&
|
||||
content.contains("FINISH AT POINT") &&
|
||||
RegExp(r"\d{4}_\d{4}").hasMatch(content);
|
||||
}
|
||||
|
||||
@override
|
||||
Future<List<Trip>> parseBytes(Uint8List bytes) async {
|
||||
// Step 1: Extract text
|
||||
final text = _extractTextFromDocx(bytes);
|
||||
|
||||
// DEBUG: Print extracted text
|
||||
print("=== EXTRACTED TEXT START ===");
|
||||
print(text);
|
||||
print("=== EXTRACTED TEXT END ===");
|
||||
|
||||
// Step 2: Parse document structure (headers and trips)
|
||||
final lines = text.split("\n");
|
||||
final documentSections = _parseDocumentSections(lines);
|
||||
|
||||
print("=== FOUND ${documentSections.length} SECTIONS ===");
|
||||
for (var section in documentSections) {
|
||||
print("Section: ${section.direction}, ${section.stations.length} stations, ${section.tripLines.length} trips");
|
||||
}
|
||||
|
||||
if (documentSections.isEmpty) {
|
||||
throw ScheduleParseException("No trip data found in schedule");
|
||||
}
|
||||
|
||||
// Step 3: Parse trips from all sections
|
||||
final trips = <Trip>[];
|
||||
for (var section in documentSections) {
|
||||
final sectionTrips = _parseSectionTrips(section);
|
||||
trips.addAll(sectionTrips);
|
||||
print("✓ Parsed ${sectionTrips.length} trips from ${section.direction} section");
|
||||
}
|
||||
|
||||
// Step 4: Sort by scheduled time
|
||||
trips.sort((a, b) => a.scheduledTime.compareTo(b.scheduledTime));
|
||||
|
||||
return trips;
|
||||
}
|
||||
|
||||
String _extractTextFromDocx(Uint8List bytes) {
|
||||
try {
|
||||
return docxToText(bytes);
|
||||
} catch (e) {
|
||||
throw ScheduleParseException("Failed to read document: $e");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String _formatTime(String rawTime) {
|
||||
if (rawTime.length != 4) {
|
||||
throw FormatException("Invalid time format: $rawTime");
|
||||
}
|
||||
return "${rawTime.substring(0, 2)}:${rawTime.substring(2, 4)}";
|
||||
}
|
||||
|
||||
List<_DocumentSection> _parseDocumentSections(List<String> lines) {
|
||||
final sections = <_DocumentSection>[];
|
||||
_DocumentSection? currentSection;
|
||||
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
final line = lines[i].trim();
|
||||
|
||||
// Check if this is a station header line
|
||||
final stations = _extractStationHeader(line);
|
||||
if (stations != null && stations.isNotEmpty) {
|
||||
// Save previous section if it exists
|
||||
if (currentSection != null && currentSection.tripLines.isNotEmpty) {
|
||||
sections.add(currentSection);
|
||||
}
|
||||
|
||||
// Direction is determined later from the first trip line
|
||||
currentSection = _DocumentSection(
|
||||
stations: stations,
|
||||
direction: "unknown",
|
||||
tripLines: [],
|
||||
);
|
||||
|
||||
print("Found station header at line $i with ${stations.length} stations");
|
||||
print(" Stations: ${stations.take(3).join(", ")} ... ${stations.skip(stations.length - 2).join(", ")}");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is a trip line
|
||||
if (currentSection != null && _isTripLine(line)) {
|
||||
|
||||
// Infer direction from first trip line if not yet set
|
||||
if (currentSection.direction == "unknown") {
|
||||
currentSection.direction = _inferDirectionFromTripLine(line);
|
||||
print(" Direction inferred: ${currentSection.direction}");
|
||||
}
|
||||
|
||||
currentSection.tripLines.add(line);
|
||||
}
|
||||
}
|
||||
|
||||
// Add final section
|
||||
if (currentSection != null && currentSection.tripLines.isNotEmpty) {
|
||||
sections.add(currentSection);
|
||||
}
|
||||
|
||||
return sections;
|
||||
}
|
||||
|
||||
List<String>? _extractStationHeader(String line) {
|
||||
// Station headers have multiple short uppercase codes, no digits, no underscores
|
||||
if (line.contains(RegExp(r"\d")) ||
|
||||
line.contains("EC") ||
|
||||
line.contains("_") ||
|
||||
line.length < 10) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Split by whitespace and filter for potential station codes (3-8 uppercase letters)
|
||||
final parts = line.split(RegExp(r"\s+"));
|
||||
final potentialStations = parts
|
||||
.where((part) => part.length >= 3 &&
|
||||
part.length <= 8 &&
|
||||
RegExp(r"^[A-Z]+$").hasMatch(part))
|
||||
.toList();
|
||||
|
||||
if (potentialStations.length < 8) return null;
|
||||
|
||||
// Filter out common metadata words that appear in headers
|
||||
const nonStationWords = {
|
||||
"TRP", "DUTY", "BUS", "START", "END", "GAR", "DEP", "ARR",
|
||||
"DENOTES", "FINISHES", "RELIEF", "TRIP", "NEXT", "NO", "AT",
|
||||
"SPELL", "HOURS", "TOTAL", "LAYOVER", "MILES", "LIVE", "DEAD",
|
||||
"MILEAGE", "TIME", "SIGN", "FORM", "NXT", "THIS", "HAS", "OR",
|
||||
"FOR", "CHANGE", "SERVICE", "POINT", "LSN", "MAN", "RUI", "SN",
|
||||
"ROUTE", "RUNNING", "PREV", "FIN", "ENTOD", "SOALL", "USHRS",
|
||||
"ADDTL", "CASH", "TODAYS", "REL", "IEF",
|
||||
};
|
||||
|
||||
final stations = potentialStations
|
||||
.where((s) => !nonStationWords.contains(s))
|
||||
.toList();
|
||||
|
||||
// Need at least 5 actual station-like codes remaining - the structural
|
||||
// density of codes is what marks this as a station header, not a known list
|
||||
return stations.length >= 5 ? stations : null;
|
||||
}
|
||||
|
||||
// Determine direction from the format of the first trip line.
|
||||
// Lines starting with digits are outbound (trip number comes first).
|
||||
// Lines starting with underscores or a bare time are inbound (times come first).
|
||||
String _inferDirectionFromTripLine(String line) {
|
||||
if (RegExp(r"^\d{4}_\d{4}").hasMatch(line)) return "inbound";
|
||||
if (RegExp(r"^_+\d{4}").hasMatch(line)) return "inbound";
|
||||
if (RegExp(r"^\d+\s+\d+").hasMatch(line)) return "outbound";
|
||||
return "outbound";
|
||||
}
|
||||
|
||||
bool _isTripLine(String line) {
|
||||
return RegExp(r"\d{4}_\d{4}").hasMatch(line) && line.contains("EC");
|
||||
}
|
||||
|
||||
List<Trip> _parseSectionTrips(_DocumentSection section) {
|
||||
final trips = <Trip>[];
|
||||
|
||||
for (final line in section.tripLines) {
|
||||
Trip? trip;
|
||||
|
||||
// Detect actual line format by looking at structure
|
||||
// Inbound: starts with numbers (trip duty running EC###) or (trip duty F EC###)
|
||||
// Note: running number might be "N503 EC" (with spaces) or "N 503 EC" or just "503 EC"
|
||||
// Outbound: starts with HHMM_HHMM
|
||||
final isOutboundFormat = RegExp(r"^\d{4}_\d{4}").hasMatch(line);
|
||||
final isInboundFormat = RegExp(r"^\d+\s+\d+\s+(?:[NRF]\d+\s+|[NRF]\s+\d+\s+|F\s+|\d+\s+)EC").hasMatch(line);
|
||||
|
||||
if (isOutboundFormat) {
|
||||
trip = _parseOutboundTrip(line, section.stations);
|
||||
} else if (isInboundFormat) {
|
||||
trip = _parseInboundTrip(line, section.stations);
|
||||
}
|
||||
|
||||
if (trip != null) {
|
||||
trips.add(trip);
|
||||
} else {
|
||||
final format = isOutboundFormat ? "outbound" : isInboundFormat ? "inbound" : "unknown";
|
||||
print("Failed to parse $format line: ${line.substring(0, line.length > 80 ? 80 : line.length)}...");
|
||||
}
|
||||
}
|
||||
|
||||
return trips;
|
||||
}
|
||||
|
||||
Trip? _parseInboundTrip(
|
||||
String line,
|
||||
List<String> stations,
|
||||
) {
|
||||
// INBOUND: trip duty running EC### ... HHMM_HHMM times...
|
||||
var match = _inboundPattern.firstMatch(line);
|
||||
if (match != null) {
|
||||
final tripNumber = match.group(1)!;
|
||||
final dutyNumber = match.group(2)!;
|
||||
final tripType = match.group(3) ?? "";
|
||||
final runningNumber = match.group(4)!;
|
||||
final firstTime = match.group(6)!;
|
||||
final secondTime = match.group(7)!;
|
||||
final timesString = match.group(8) ?? "";
|
||||
|
||||
// Build complete time array: first_time, second_time, then remaining times
|
||||
final times = [firstTime, secondTime];
|
||||
final additionalTimes = _extractTimesFromString(timesString);
|
||||
times.addAll(additionalTimes);
|
||||
|
||||
final stationTimes = _mapStationsToTimes(stations, times);
|
||||
final scheduledTime = _formatTime(firstTime);
|
||||
|
||||
return Trip(
|
||||
tripNumber: tripNumber,
|
||||
dutyNumber: dutyNumber,
|
||||
runningNumber: runningNumber,
|
||||
scheduledTime: scheduledTime,
|
||||
tripType: tripType,
|
||||
isFinishing: false,
|
||||
stationTimes: stationTimes,
|
||||
stationOrder: stations,
|
||||
direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
|
||||
);
|
||||
}
|
||||
|
||||
// Try finishing pattern
|
||||
match = _inboundFinishingPattern.firstMatch(line);
|
||||
if (match != null) {
|
||||
final tripNumber = match.group(1)!;
|
||||
final dutyNumber = match.group(2)!;
|
||||
final firstTime = match.group(4)!;
|
||||
final secondTime = match.group(5)!;
|
||||
final timesString = match.group(6) ?? "";
|
||||
|
||||
final times = [firstTime, secondTime];
|
||||
final additionalTimes = _extractTimesFromString(timesString);
|
||||
times.addAll(additionalTimes);
|
||||
|
||||
final stationTimes = _mapStationsToTimes(stations, times);
|
||||
final scheduledTime = _formatTime(firstTime);
|
||||
|
||||
return Trip(
|
||||
tripNumber: tripNumber,
|
||||
dutyNumber: dutyNumber,
|
||||
runningNumber: dutyNumber,
|
||||
scheduledTime: scheduledTime,
|
||||
tripType: "F",
|
||||
isFinishing: true,
|
||||
stationTimes: stationTimes,
|
||||
stationOrder: stations,
|
||||
direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
Trip? _parseOutboundTrip(
|
||||
String line,
|
||||
List<String> stations,
|
||||
) {
|
||||
// OUTBOUND: HHMM_HHMM times... EC### duty running trip
|
||||
var match = _outboundPattern.firstMatch(line);
|
||||
if (match != null) {
|
||||
final firstTime = match.group(1)!;
|
||||
final secondTime = match.group(2)!;
|
||||
final timesString = match.group(3) ?? "";
|
||||
final dutyNumber = match.group(5)!;
|
||||
final tripType = match.group(6) ?? "";
|
||||
final runningNumber = match.group(7)!;
|
||||
final tripNumber = match.group(8)!;
|
||||
|
||||
// Build complete time array: first_time, second_time, then remaining times
|
||||
final times = [firstTime, secondTime];
|
||||
times.addAll(_extractTimesFromString(timesString));
|
||||
|
||||
final stationTimes = _mapStationsToTimes(stations, times);
|
||||
final scheduledTime = _formatTime(firstTime);
|
||||
|
||||
return Trip(
|
||||
tripNumber: tripNumber,
|
||||
dutyNumber: dutyNumber,
|
||||
runningNumber: runningNumber,
|
||||
scheduledTime: scheduledTime,
|
||||
tripType: tripType,
|
||||
isFinishing: false,
|
||||
stationTimes: stationTimes,
|
||||
stationOrder: stations,
|
||||
direction: (int.tryParse(tripNumber) ?? 0).isOdd ? "inbound" : "outbound",
|
||||
);
|
||||
}
|
||||
|
||||
// Try finishing pattern
|
||||
match = _outboundFinishingPattern.firstMatch(line);
|
||||
if (match != null) {
|
||||
final firstTime = match.group(1)!;
|
||||
final secondTime = match.group(2)!;
|
||||
final timesString = match.group(3) ?? "";
|
||||
final dutyNumber = match.group(5)!;
|
||||
|
||||
final times = [firstTime, secondTime];
|
||||
times.addAll(_extractTimesFromString(timesString));
|
||||
|
||||
final stationTimes = _mapStationsToTimes(stations, times);
|
||||
final scheduledTime = _formatTime(firstTime);
|
||||
|
||||
return Trip(
|
||||
tripNumber: dutyNumber, // Finishing trips may not have separate trip number
|
||||
dutyNumber: dutyNumber,
|
||||
runningNumber: dutyNumber,
|
||||
scheduledTime: scheduledTime,
|
||||
tripType: "F",
|
||||
isFinishing: true,
|
||||
stationTimes: stationTimes,
|
||||
stationOrder: stations,
|
||||
direction: (int.tryParse(dutyNumber) ?? 0).isOdd ? "inbound" : "outbound",
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
List<String> _extractTimesFromString(String timesString) {
|
||||
// Extract all 4-digit times from the string
|
||||
final pattern = RegExp(r"\b(\d{4})\b");
|
||||
return pattern
|
||||
.allMatches(timesString)
|
||||
.map((m) => m.group(1)!)
|
||||
.toList();
|
||||
}
|
||||
|
||||
List<String> _extractAllTimes(String line) {
|
||||
// Extract all 4-digit times, including those in HHMM_HHMM format
|
||||
final timePattern = RegExp(r"\b(\d{4})(?:_(\d{4}))?\b");
|
||||
final matches = timePattern.allMatches(line);
|
||||
final times = <String>[];
|
||||
|
||||
for (final match in matches) {
|
||||
// Add first time
|
||||
times.add(match.group(1)!);
|
||||
// Add second time if it exists (from HHMM_HHMM)
|
||||
if (match.group(2) != null) {
|
||||
times.add(match.group(2)!);
|
||||
}
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
Map<String, String> _mapStationsToTimes(
|
||||
List<String> stations,
|
||||
List<String> times,
|
||||
) {
|
||||
final stationTimes = <String, String>{};
|
||||
|
||||
for (var i = 0; i < stations.length && i < times.length; i++) {
|
||||
final time = times[i];
|
||||
// Only add non-empty times (not "____" or similar)
|
||||
if (RegExp(r"^\d{4}$").hasMatch(time)) {
|
||||
stationTimes[stations[i]] = _formatTime(time);
|
||||
}
|
||||
}
|
||||
|
||||
return stationTimes;
|
||||
}
|
||||
}
|
||||
|
||||
class _DocumentSection {
|
||||
final List<String> stations;
|
||||
String direction;
|
||||
final List<String> tripLines;
|
||||
|
||||
_DocumentSection({
|
||||
required this.stations,
|
||||
required this.direction,
|
||||
required this.tripLines,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
import "dart:typed_data";
|
||||
import "../models/trip.dart";
|
||||
|
||||
abstract class ScheduleParser {
|
||||
Future<List<Trip>> parseBytes(Uint8List bytes);
|
||||
bool canParse(String content);
|
||||
}
|
||||
@@ -0,0 +1,301 @@
|
||||
import "dart:typed_data";
|
||||
import "package:syncfusion_flutter_pdf/pdf.dart";
|
||||
import "../models/trip.dart";
|
||||
import "../exceptions/schedule_parse_exception.dart";
|
||||
import "schedule_parser.dart";
|
||||
|
||||
class StagecoachScheduleParser implements ScheduleParser {
|
||||
String? parsedRouteName;
|
||||
|
||||
@override
|
||||
bool canParse(String content) {
|
||||
final collapsed = content.replaceAll("\n", " ");
|
||||
return collapsed.contains("ROUTE") &&
|
||||
RegExp(r"\dD\d{3}").hasMatch(collapsed);
|
||||
}
|
||||
|
||||
@override
|
||||
Future<List<Trip>> parseBytes(Uint8List bytes) async {
|
||||
final rawPages = _extractPages(bytes);
|
||||
|
||||
// Syncfusion splits text with newlines everywhere.
|
||||
// Collapse each page into single line, normalize all whitespace runs to single space
|
||||
final pages = rawPages.map((p) =>
|
||||
p.replaceAll("\n", " ").replaceAll(RegExp(r"\s+"), " ").trim()
|
||||
).toList();
|
||||
|
||||
print("=== STAGECOACH: ${pages.length} pages ===");
|
||||
|
||||
// Extract route name — "ROUTE 099CSATURDAY..." or "ROUTE CRL SATURDAY..."
|
||||
final routeMatch = RegExp(r"ROUTE\s+(\w+?)(?:SATURDAY|SUNDAY|MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY)", caseSensitive: false).firstMatch(pages.first);
|
||||
if (routeMatch != null) {
|
||||
parsedRouteName = routeMatch.group(1);
|
||||
} else {
|
||||
final fallback = RegExp(r"ROUTE\s+(\w+)").firstMatch(pages.first);
|
||||
parsedRouteName = fallback?.group(1);
|
||||
}
|
||||
print("Route: $parsedRouteName");
|
||||
|
||||
final allTrips = <Trip>[];
|
||||
final seenKeys = <String>{};
|
||||
|
||||
for (var pi = 0; pi < pages.length; pi++) {
|
||||
final page = pages[pi];
|
||||
|
||||
// Direction: outbound pages have "DEPT GAR" in header
|
||||
final isOutbound = RegExp(r"DEPT\s+GAR").hasMatch(page);
|
||||
final direction = isOutbound ? "outbound" : "inbound";
|
||||
|
||||
final stations = _extractStations(page, isOutbound);
|
||||
print("Page ${pi + 1}: $direction, stations: $stations");
|
||||
|
||||
final trips = _parseTripsFromPage(page, direction, stations);
|
||||
print(" Trips found: ${trips.length}");
|
||||
|
||||
for (final t in trips) {
|
||||
final key = "${t.tripNumber}_${t.scheduledTime}_${t.direction}";
|
||||
if (seenKeys.contains(key)) continue;
|
||||
seenKeys.add(key);
|
||||
allTrips.add(t);
|
||||
print(" TRIP: ${t.tripNumber} | ${t.scheduledTime} | ${t.direction} | duty=${t.dutyNumber} run=${t.runningNumber} | stations=${t.stationTimes}");
|
||||
}
|
||||
}
|
||||
|
||||
if (allTrips.isEmpty) {
|
||||
throw ScheduleParseException("No trips found in stagecoach schedule");
|
||||
}
|
||||
|
||||
// keep original parse order for stagecoach
|
||||
print("Total unique trips: ${allTrips.length}");
|
||||
return allTrips;
|
||||
}
|
||||
|
||||
List<String> _extractPages(Uint8List bytes) {
|
||||
try {
|
||||
final doc = PdfDocument(inputBytes: bytes);
|
||||
final pages = <String>[];
|
||||
for (int i = 0; i < doc.pages.count; i++) {
|
||||
pages.add(PdfTextExtractor(doc).extractText(
|
||||
startPageIndex: i, endPageIndex: i,
|
||||
));
|
||||
}
|
||||
doc.dispose();
|
||||
return pages;
|
||||
} catch (e) {
|
||||
throw ScheduleParseException("Failed to read PDF: $e");
|
||||
}
|
||||
}
|
||||
|
||||
List<String> _extractStations(String pageText, bool isOutbound) {
|
||||
// After full normalization (all whitespace = single space), the header looks like:
|
||||
// "...RLF STS: DEPT GAR STFC BS T STFC BS FGAT PA ... ROMF SN T RLF DEPT TIME..."
|
||||
// or inbound: "...RLF STS: ROMF SN T ROMF SN ... STFC BS T ARR GAR RLF DEPT TIME..."
|
||||
//
|
||||
// Station names are 4-letter codes followed by qualifiers: "STFC BS T", "MNPK BY" etc.
|
||||
// In the syncfusion text each station name part was on its own line, so after
|
||||
// collapsing they alternate: CODE QUALIFIER CODE QUALIFIER...
|
||||
|
||||
String? stationBlock;
|
||||
|
||||
if (isOutbound) {
|
||||
// Between "DEPT GAR" and "RLF DEPT TIME" (or just before the first duty ID)
|
||||
final match = RegExp(r"DEPT GAR (.+?) RLF DEPT TIME").firstMatch(pageText);
|
||||
if (match != null) stationBlock = match.group(1);
|
||||
} else {
|
||||
// Between "RLF STS:" and "ARR GAR"
|
||||
final match = RegExp(r"RLF STS: (.+?) ARR GAR").firstMatch(pageText);
|
||||
if (match != null) stationBlock = match.group(1);
|
||||
}
|
||||
|
||||
if (stationBlock == null) {
|
||||
print(" No station header match");
|
||||
return _extractStationsFallback(pageText);
|
||||
}
|
||||
|
||||
return _pairStationTokens(stationBlock.trim());
|
||||
}
|
||||
|
||||
List<String> _extractStationsFallback(String pageText) {
|
||||
// broader: between STS: and first duty ID
|
||||
final match = RegExp(r"STS: (.+?) \d[A-Z]\d{3}").firstMatch(pageText);
|
||||
if (match == null) return [];
|
||||
|
||||
var block = match.group(1)!;
|
||||
// Remove known non-station words
|
||||
for (final word in ["DEPT GAR", "ARR GAR", "RLF DEPT TIME", "DUTY END",
|
||||
"NEXT BUS", "NEXT TRIP", "DRV SPELL", "RLF"]) {
|
||||
block = block.replaceAll(word, " ");
|
||||
}
|
||||
return _pairStationTokens(block.trim());
|
||||
}
|
||||
|
||||
List<String> _pairStationTokens(String block) {
|
||||
// Station names in stagecoach PDFs are structured as:
|
||||
// 4-letter CODE followed by a qualifier (1-3 chars, possibly with T suffix)
|
||||
// After normalization: "STFC BS T STFC BS FGAT PA MNPK BY ILFD BS"
|
||||
//
|
||||
// The pattern is: each station starts with a 3-4 uppercase letter code,
|
||||
// followed by qualifier tokens until the next 3-4 letter code.
|
||||
//
|
||||
// Strategy: scan tokens, when we see a 3-4 letter all-caps token that looks
|
||||
// like a station code, start a new station name. Append subsequent tokens
|
||||
// as qualifiers until the next station code.
|
||||
|
||||
final tokens = block.split(" ").where((t) => t.isNotEmpty).toList();
|
||||
final stations = <String>[];
|
||||
var current = <String>[];
|
||||
|
||||
for (final token in tokens) {
|
||||
// A new station code: 3-4 uppercase letters
|
||||
if (RegExp(r"^[A-Z]{3,4}$").hasMatch(token) && current.isNotEmpty) {
|
||||
stations.add(current.join(" "));
|
||||
current = [token];
|
||||
} else if (current.isEmpty && RegExp(r"^[A-Z]{3,4}$").hasMatch(token)) {
|
||||
current = [token];
|
||||
} else {
|
||||
current.add(token);
|
||||
}
|
||||
}
|
||||
if (current.isNotEmpty) {
|
||||
stations.add(current.join(" "));
|
||||
}
|
||||
|
||||
return stations;
|
||||
}
|
||||
|
||||
|
||||
List<Trip> _parseTripsFromPage(String pageText, String direction, List<String> stations) {
|
||||
final trips = <Trip>[];
|
||||
|
||||
// Find all trip-start positions: DUTY_ID followed by RUN(digits) and BUS(3 digits)
|
||||
// This pattern marks the beginning of a real trip entry
|
||||
final tripStartPattern = RegExp(r"\b((\d/)?\d[A-Z]\d{3})\s+(\d+)\s+(\d{3})\b");
|
||||
final starts = tripStartPattern.allMatches(pageText).toList();
|
||||
|
||||
print(" Trip-start matches: ${starts.length}");
|
||||
|
||||
int parsed = 0;
|
||||
int failed = 0;
|
||||
|
||||
for (var i = 0; i < starts.length; i++) {
|
||||
final segStart = starts[i].start;
|
||||
// segment ends where the next trip starts, or at end of text
|
||||
final segEnd = (i + 1 < starts.length) ? starts[i + 1].start : pageText.length;
|
||||
final segment = pageText.substring(segStart, segEnd).trim();
|
||||
|
||||
final dutyId = starts[i].group(1)!;
|
||||
final runNumber = starts[i].group(3)!;
|
||||
final busWorkingNo = starts[i].group(4)!;
|
||||
|
||||
final trip = _parseTripFromSegment(segment, dutyId, runNumber, busWorkingNo, direction, stations);
|
||||
if (trip != null) {
|
||||
trips.add(trip);
|
||||
parsed++;
|
||||
} else {
|
||||
failed++;
|
||||
final preview = segment.length > 120 ? segment.substring(0, 120) : segment;
|
||||
print(" Skip: $preview");
|
||||
}
|
||||
}
|
||||
|
||||
print(" Parsed: $parsed, skipped: $failed");
|
||||
|
||||
return trips;
|
||||
}
|
||||
|
||||
Trip? _parseTripFromSegment(
|
||||
String segment, String dutyId, String runNumber, String busWorkingNo,
|
||||
String direction, List<String> stations,
|
||||
) {
|
||||
// Skip header junk
|
||||
if (segment.contains("TRIP NO") ||
|
||||
segment.contains("DUTY START") || segment.contains("SCHEDULE DATE")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Skip deadhead/light runs — but only if LIGHT appears early in the segment
|
||||
// (before station times), not in trailing text from the next concatenated entry
|
||||
final lightMatch = RegExp(r"LIGHT", caseSensitive: false).firstMatch(segment);
|
||||
if (lightMatch != null) {
|
||||
final headerEnd = RegExp(RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}").firstMatch(segment)?.end ?? 0;
|
||||
// if LIGHT is within 40 chars of the header, its a genuine deadhead
|
||||
if (lightMatch.start - headerEnd < 40) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the leading "DUTY RUN BUS" we already extracted
|
||||
final afterHeader = segment.substring(
|
||||
RegExp(RegExp.escape(dutyId) + r"\s+\d+\s+\d{3}").firstMatch(segment)!.end
|
||||
).trim();
|
||||
|
||||
// Strip relief/garage prefix to get to the times
|
||||
var timesSection = afterHeader;
|
||||
// remove leading relief markers and garage name
|
||||
timesSection = timesSection.replaceFirst(RegExp(r"^(RR|FR|FRX|FX|R)\s*"), "");
|
||||
timesSection = timesSection.replaceFirst(RegExp(r"^HomeGara\s*"), "");
|
||||
// Also remove leading text from inbound like just "R " or "F "
|
||||
timesSection = timesSection.replaceFirst(RegExp(r"^(F|R)\s+"), "");
|
||||
|
||||
// Extract all 4-digit time values
|
||||
final times = RegExp(r"\b(\d{4})\b")
|
||||
.allMatches(timesSection)
|
||||
.map((m) => m.group(1)!)
|
||||
.where((t) {
|
||||
final h = int.tryParse(t.substring(0, 2)) ?? 99;
|
||||
return h < 30;
|
||||
})
|
||||
.toList();
|
||||
|
||||
if (times.isEmpty) return null;
|
||||
|
||||
// For garage trips, first time is the garage departure — skip it
|
||||
final isFromGarage = afterHeader.contains("HomeGara");
|
||||
int firstStationIdx = 0;
|
||||
if (isFromGarage && times.length > 1) {
|
||||
firstStationIdx = 1;
|
||||
}
|
||||
|
||||
// Only use the first N times (where N = station count) to avoid
|
||||
// grabbing times from the trailing metadata (spell, next bus etc)
|
||||
final stationTimes = <String, String>{};
|
||||
for (var i = 0; i < stations.length; i++) {
|
||||
final timeIdx = firstStationIdx + i;
|
||||
if (timeIdx < times.length) {
|
||||
stationTimes[stations[i]] = _formatTime(times[timeIdx]);
|
||||
}
|
||||
}
|
||||
|
||||
if (stationTimes.isEmpty) return null;
|
||||
|
||||
final scheduledTime = _formatTime(times[firstStationIdx]);
|
||||
|
||||
String tripType = "";
|
||||
if (afterHeader.startsWith("RR") || afterHeader.startsWith("R")) tripType = "R";
|
||||
|
||||
final actualDirection = dutyId.startsWith("2/") ? "inbound" : direction;
|
||||
|
||||
// If trip direction doesnt match page direction,
|
||||
// reverse station order so terminus is correct
|
||||
final tripStations = actualDirection != direction
|
||||
? stations.reversed.toList()
|
||||
: List<String>.from(stations);
|
||||
|
||||
return Trip(
|
||||
scheduledTime: scheduledTime,
|
||||
tripNumber: dutyId,
|
||||
dutyNumber: busWorkingNo,
|
||||
runningNumber: runNumber,
|
||||
tripType: tripType,
|
||||
isFinishing: segment.contains("BUS FIN"),
|
||||
stationTimes: stationTimes,
|
||||
stationOrder: tripStations,
|
||||
direction: actualDirection,
|
||||
);
|
||||
}
|
||||
|
||||
String _formatTime(String rawTime) {
|
||||
if (rawTime.length != 4) return rawTime;
|
||||
return "${rawTime.substring(0, 2)}:${rawTime.substring(2, 4)}";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user