Refactor BinaryTable file handling to use C-style file operations and improve memory management

This commit is contained in:
ImBenji
2025-10-12 16:04:23 +01:00
parent b15d11a5a4
commit 75d682a41d
6 changed files with 421 additions and 626 deletions

View File

@@ -467,33 +467,32 @@ template class BT_UniformArray<float>;
// BinaryTable implementation
BinaryTable::BinaryTable(const std::string& path)
: filePath_(path), freeListLifted_(false) {
file_.open(path, std::ios::binary | std::ios::in | std::ios::out);
file_ = fopen(path.c_str(), "r+b");
if (!file_) {
// File doesn't exist, create it
file_.open(path, std::ios::binary | std::ios::out);
file_.close();
file_.open(path, std::ios::binary | std::ios::in | std::ios::out);
file_ = fopen(path.c_str(), "w+b");
}
}
BinaryTable::~BinaryTable() {
if (file_.is_open()) {
file_.close();
if (file_) {
fclose(file_);
}
}
void BinaryTable::initialize() {
file_.seekp(0);
fseek(file_, 0, SEEK_SET);
writeInt64(0, BT_Null.address()); // Address table pointer (8 bytes)
writeInt32(8, 0); // Free list entry count (4 bytes)
file_.flush();
fflush(file_);
}
// File I/O helper implementations
int32_t BinaryTable::readInt32(int64_t position) {
file_.seekg(position);
fseek(file_, position, SEEK_SET);
uint8_t bytes[4];
file_.read(reinterpret_cast<char*>(bytes), 4);
fread(bytes, 1, 4, file_);
return static_cast<int32_t>(bytes[0]) |
(static_cast<int32_t>(bytes[1]) << 8) |
@@ -502,9 +501,9 @@ int32_t BinaryTable::readInt32(int64_t position) {
}
float BinaryTable::readFloat32(int64_t position) {
file_.seekg(position);
fseek(file_, position, SEEK_SET);
uint8_t bytes[4];
file_.read(reinterpret_cast<char*>(bytes), 4);
fread(bytes, 1, 4, file_);
uint32_t floatBits = static_cast<uint32_t>(bytes[0]) |
(static_cast<uint32_t>(bytes[1]) << 8) |
@@ -517,9 +516,9 @@ float BinaryTable::readFloat32(int64_t position) {
}
int64_t BinaryTable::readInt64(int64_t position) {
file_.seekg(position);
fseek(file_, position, SEEK_SET);
uint8_t bytes[8];
file_.read(reinterpret_cast<char*>(bytes), 8);
fread(bytes, 1, 8, file_);
int64_t result = 0;
for (int i = 0; i < 8; i++) {
@@ -530,32 +529,32 @@ int64_t BinaryTable::readInt64(int64_t position) {
}
uint8_t BinaryTable::readByte(int64_t position) {
file_.seekg(position);
fseek(file_, position, SEEK_SET);
uint8_t byte;
file_.read(reinterpret_cast<char*>(&byte), 1);
fread(&byte, 1, 1, file_);
return byte;
}
std::vector<uint8_t> BinaryTable::readBytes(int64_t position, int32_t count) {
file_.seekg(position);
fseek(file_, position, SEEK_SET);
std::vector<uint8_t> bytes(count);
file_.read(reinterpret_cast<char*>(bytes.data()), count);
fread(bytes.data(), 1, count, file_);
return bytes;
}
void BinaryTable::writeInt32(int64_t position, int32_t value) {
file_.seekp(position);
fseek(file_, position, SEEK_SET);
uint8_t bytes[4] = {
static_cast<uint8_t>(value & 0xFF),
static_cast<uint8_t>((value >> 8) & 0xFF),
static_cast<uint8_t>((value >> 16) & 0xFF),
static_cast<uint8_t>((value >> 24) & 0xFF)
};
file_.write(reinterpret_cast<const char*>(bytes), 4);
fwrite(bytes, 1, 4, file_);
}
void BinaryTable::writeFloat32(int64_t position, float value) {
file_.seekp(position);
fseek(file_, position, SEEK_SET);
uint32_t floatBits;
std::memcpy(&floatBits, &value, sizeof(float));
@@ -565,73 +564,132 @@ void BinaryTable::writeFloat32(int64_t position, float value) {
static_cast<uint8_t>((floatBits >> 16) & 0xFF),
static_cast<uint8_t>((floatBits >> 24) & 0xFF)
};
file_.write(reinterpret_cast<const char*>(bytes), 4);
fwrite(bytes, 1, 4, file_);
}
void BinaryTable::writeInt64(int64_t position, int64_t value) {
file_.seekp(position);
fseek(file_, position, SEEK_SET);
uint8_t bytes[8];
for (int i = 0; i < 8; i++) {
bytes[i] = static_cast<uint8_t>((value >> (i * 8)) & 0xFF);
}
file_.write(reinterpret_cast<const char*>(bytes), 8);
fwrite(bytes, 1, 8, file_);
}
void BinaryTable::writeByte(int64_t position, uint8_t value) {
file_.seekp(position);
file_.write(reinterpret_cast<const char*>(&value), 1);
fseek(file_, position, SEEK_SET);
fwrite(&value, 1, 1, file_);
}
void BinaryTable::writeBytes(int64_t position, const std::vector<uint8_t>& data) {
file_.seekp(position);
file_.write(reinterpret_cast<const char*>(data.data()), data.size());
fseek(file_, position, SEEK_SET);
fwrite(data.data(), 1, data.size(), file_);
}
int64_t BinaryTable::getFileLength() {
file_.seekg(0, std::ios::end);
return file_.tellg();
long current = ftell(file_);
fseek(file_, 0, SEEK_END);
long length = ftell(file_);
fseek(file_, current, SEEK_SET); // Restore position
return length;
}
void BinaryTable::setFilePosition(int64_t position) {
file_.seekg(position);
file_.seekp(position);
fseek(file_, position, SEEK_SET);
}
// Address table management
std::unordered_map<int64_t, BT_Pointer> BinaryTable::getAddressTable() {
file_.seekg(0);
int64_t tableAddress = readInt64(0);
DEBUG_PRINTLN("DEBUG: getAddressTable reading from address " << tableAddress);
if (tableAddress == -1) { // Null pointer
return {};
}
// Validate table address is within file bounds
int64_t fileLength = getFileLength();
if (tableAddress < 0 || tableAddress >= fileLength) {
DEBUG_PRINTLN("DEBUG: Address table pointer is out of bounds: " << tableAddress << " (file length: " << fileLength << ")");
throw std::runtime_error("Address table pointer is corrupted - out of bounds");
}
try {
uint8_t typeId = readByte(tableAddress);
if (static_cast<BT_Type>(typeId) != BT_Type::ADDRESS_TABLE) {
DEBUG_PRINTLN("DEBUG: Invalid type ID at address table location: " << (int)typeId);
// Address table might not be valid yet, return empty
return {};
}
int32_t tableCount = readInt32(tableAddress + 1);
// Validate table count is reasonable
if (tableCount < 0 || tableCount > 1000000) { // Arbitrary but reasonable limit
DEBUG_PRINTLN("DEBUG: Suspicious address table count: " << tableCount);
throw std::runtime_error("Address table appears corrupted - invalid entry count");
}
// Validate the entire table fits within file bounds
int64_t requiredSize = 1 + 4 + tableCount * (8 + 8); // Type + count + entries
if (tableAddress + requiredSize > fileLength) {
DEBUG_PRINTLN("DEBUG: Address table extends beyond file bounds");
throw std::runtime_error("Address table appears corrupted - extends beyond file");
}
std::unordered_map<int64_t, BT_Pointer> addressTable;
for (int32_t i = 0; i < tableCount; i++) {
int64_t offset = tableAddress + 1 + 4 + i * (8 + 8);
int64_t keyHash = readInt64(offset);
int64_t valueAddress = readInt64(offset + 8);
// Validate each value address is within bounds (or null)
if (valueAddress != -1 && (valueAddress < 0 || valueAddress >= fileLength)) {
DEBUG_PRINTLN("DEBUG: Invalid value address in entry " << i << ": " << valueAddress);
throw std::runtime_error("Address table entry contains invalid pointer");
}
DEBUG_PRINTLN(" Reading entry " << i << ": hash " << keyHash << " -> address " << valueAddress);
addressTable[keyHash] = BT_Pointer(valueAddress);
}
return addressTable;
} catch (const std::runtime_error& e) {
// Re-throw runtime errors (our validation failures)
throw;
} catch (...) {
// If we can't read the address table, return empty
// If we can't read the address table for other reasons, return empty
DEBUG_PRINTLN("DEBUG: Failed to read address table due to I/O error");
return {};
}
}
void BinaryTable::setAddressTable(const std::unordered_map<int64_t, BT_Pointer>& table) {
DEBUG_PRINTLN("DEBUG: setAddressTable called! This should NOT happen during get operations!");
DEBUG_PRINTLN("DEBUG: setAddressTable writing " << table.size() << " entries");
for (const auto& [key, value] : table) {
DEBUG_PRINTLN(" Writing hash " << key << " -> address " << value.address());
}
// Read old table pointer FIRST to ensure we can clean it up later
int64_t oldTablePointerAddress = readInt64(0);
BT_Pointer oldTablePtr(oldTablePointerAddress);
int32_t oldTableSize = 0;
// Calculate old table size if it exists
if (!oldTablePtr.isNull()) {
try {
BT_Reference oldTableRef(this, oldTablePtr);
oldTableSize = oldTableRef.size();
} catch (...) {
// If we can't read the old table, we can't free it safely
DEBUG_PRINTLN("DEBUG: WARNING - Cannot read old table for cleanup");
oldTablePtr = BT_Null;
}
}
// Build buffer manually (matching Dart implementation exactly)
std::vector<uint8_t> buffer;
@@ -657,25 +715,29 @@ void BinaryTable::setAddressTable(const std::unordered_map<int64_t, BT_Pointer>&
}
}
// Write new address table at end of file
BT_Pointer tableAddress = alloc(static_cast<int32_t>(buffer.size()));
file_.seekp(tableAddress.address());
file_.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
// Allocate and write new address table
BT_Pointer newTableAddress = alloc(static_cast<int32_t>(buffer.size()));
setFilePosition(newTableAddress.address());
size_t written = fwrite(buffer.data(), 1, buffer.size(), file_);
// Read old table pointer before updating
file_.seekg(0);
int64_t oldTablePointerAddress = readInt64(0);
BT_Pointer oldTablePtr(oldTablePointerAddress);
if (written != buffer.size()) {
throw std::runtime_error("Failed to write complete address table");
}
// Update header to point to new table
file_.seekp(0);
writeInt64(0, tableAddress.address());
file_.flush();
// Ensure new table is written to disk before updating header
fflush(file_);
// Now free the old table if it exists and is not the same as the new one
if (!oldTablePtr.isNull() && oldTablePtr != tableAddress) {
BT_Reference oldTableRef(this, oldTablePtr);
free(oldTablePtr, oldTableRef.size());
// Atomically update header to point to new table
writeInt64(0, newTableAddress.address());
fflush(file_);
// Only free old table after new one is successfully committed
DEBUG_PRINTLN("DEBUG: oldTablePtr.isNull()=" << oldTablePtr.isNull() << ", oldTablePtr.address()=" << oldTablePtr.address() << ", newTableAddress=" << newTableAddress.address());
if (!oldTablePtr.isNull() && oldTablePtr != newTableAddress) {
DEBUG_PRINTLN("DEBUG: Calling free() for old table");
free(oldTablePtr, oldTableSize);
} else {
DEBUG_PRINTLN("DEBUG: NOT calling free() - condition not met");
}
}
@@ -711,33 +773,47 @@ std::vector<BT_FreeListEntry> BinaryTable::getFreeList() {
}
void BinaryTable::setFreeList(const std::vector<BT_FreeListEntry>& list) {
DEBUG_PRINTLN("DEBUG: setFreeList called with freeListLifted_=" << freeListLifted_ << ", list.size()=" << list.size());
if (freeListLifted_) {
freeListCache_ = list;
DEBUG_PRINTLN("DEBUG: setFreeList early return - just updating cache");
return;
}
std::cout << "DEBUG: setFreeList called with " << list.size() << " entries" << std::endl;
// Read old entry count from last 4 bytes (matching Dart exactly)
// Always remove old free list first (matching Dart behavior)
int64_t fileLength = getFileLength();
std::cout << "DEBUG: File length: " << fileLength << std::endl;
DEBUG_PRINTLN("DEBUG: setFreeList fileLength=" << fileLength);
file_.seekg(fileLength - 4);
int32_t oldEntryCount = readInt32(fileLength - 4);
int32_t oldListSize = (oldEntryCount * (8 + 4)) + 4; // Entries + Count
std::cout << "DEBUG: Old entry count: " << oldEntryCount << ", old list size: " << oldListSize << std::endl;
// Calculate old free list size to remove
int32_t oldEntryCount = 0;
if (fileLength >= 4) {
oldEntryCount = readInt32(fileLength - 4);
}
DEBUG_PRINTLN("DEBUG: setFreeList oldEntryCount=" << oldEntryCount);
// Truncate file to remove old free list (Dart does _file.truncateSync)
int64_t newFileLength = fileLength - oldListSize;
std::cout << "DEBUG: New file length after truncation: " << newFileLength << std::endl;
// Skip actual truncation for now, just use logical position
// Remove old free list (matching Dart: always truncate first)
if (oldEntryCount > 0) {
int32_t oldListSize = (oldEntryCount * (8 + 4)) + 4; // Entries + Count
int64_t newFileLength = fileLength - oldListSize;
DEBUG_PRINTLN("DEBUG: setFreeList - removing old free list, oldListSize=" << oldListSize << ", truncating to: " << newFileLength);
truncateFile(newFileLength);
fileLength = newFileLength; // Update file length
}
// Encode new free list (matching Dart bt_encode exactly)
// If the new free list is empty, we're done (old list already removed)
if (list.empty()) {
DEBUG_PRINTLN("DEBUG: setFreeList - empty list, old list removed, done");
return;
}
// Write new free list at end of file
int64_t newLogicalEnd = fileLength;
// Encode new free list
std::vector<uint8_t> buffer;
// Entries
for (const auto& entry : list) {
std::cout << "DEBUG: Encoding entry - address: " << entry.pointer.address() << ", size: " << entry.size << std::endl;
// Pointer (8 bytes, little endian)
int64_t addr = entry.pointer.address();
for (int i = 0; i < 8; i++) {
@@ -756,81 +832,92 @@ void BinaryTable::setFreeList(const std::vector<BT_FreeListEntry>& list) {
buffer.push_back(static_cast<uint8_t>((count >> (i * 8)) & 0xFF));
}
std::cout << "DEBUG: Buffer size: " << buffer.size() << " bytes" << std::endl;
std::cout << "DEBUG: Writing free list at position: " << newFileLength << std::endl;
// Write at the logical end position
fseek(file_, newLogicalEnd, SEEK_SET);
fwrite(buffer.data(), 1, buffer.size(), file_);
fflush(file_);
// Write at end of (truncated) file - seek to end of logical file, not physical file
file_.seekp(0, std::ios::end);
int64_t actualFileLength = file_.tellp();
std::cout << "DEBUG: Actual file length: " << actualFileLength << std::endl;
// Write at the calculated position (after logical truncation)
file_.seekp(newFileLength);
file_.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
file_.flush();
std::cout << "DEBUG: setFreeList completed" << std::endl;
// Update logical file length
// File will be extended automatically by write operations
}
void BinaryTable::truncateFile(int64_t newSize) {
// Actually truncate the file (matching Dart behavior)
file_.close();
std::filesystem::resize_file(filePath_, newSize);
file_.open(filePath_, std::ios::binary | std::ios::in | std::ios::out);
DEBUG_PRINTLN("DEBUG: truncateFile - truncating to " << newSize);
fclose(file_);
try {
std::filesystem::resize_file(filePath_, newSize);
DEBUG_PRINTLN("DEBUG: truncateFile - resize successful");
} catch (const std::exception& e) {
DEBUG_PRINTLN("DEBUG: truncateFile - resize failed: " << e.what());
}
file_ = fopen(filePath_.c_str(), "r+b");
DEBUG_PRINTLN("DEBUG: truncateFile - reopen: success=" << (file_ != nullptr));
}
void BinaryTable::liftFreeList() {
DEBUG_PRINTLN("DEBUG: liftFreeList() called - this truncates the file!");
if (freeListLifted_) {
throw std::runtime_error("Free list is already lifted");
}
freeListCache_ = getFreeList();
// Remove free list from end of file
int64_t fileLength = getFileLength();
int32_t oldEntryCount = (fileLength >= 4) ? readInt32(fileLength - 4) : 0;
int32_t oldEntrySize = 8 + 4;
int32_t oldFreeListSize = oldEntryCount * oldEntrySize + 4;
// Truncate file to remove free list
truncateFile(fileLength - oldFreeListSize);
if (oldEntryCount > 0) {
int32_t oldEntrySize = 8 + 4;
int32_t oldFreeListSize = oldEntryCount * oldEntrySize + 4;
int64_t newFileLength = fileLength - oldFreeListSize;
// Store current file position to restore later if needed
long currentPos = ftell(file_);
// Properly truncate the file
truncateFile(newFileLength);
// Restore file position if it's still valid
if (currentPos >= 0 && currentPos < newFileLength) {
fseek(file_, currentPos, SEEK_SET);
}
}
freeListLifted_ = true;
}
void BinaryTable::dropFreeList() {
DEBUG_PRINTLN("DEBUG: dropFreeList() called - this writes data back to file!");
if (!freeListLifted_) {
throw std::runtime_error("Free list is not lifted");
}
std::cout << "DEBUG: dropFreeList - seeking to end" << std::endl;
file_.seekp(0, std::ios::end);
std::cout << "DEBUG: dropFreeList - about to call setFreeList with " << freeListCache_.size() << " entries" << std::endl;
freeListLifted_ = false;
DEBUG_PRINTLN("DEBUG: About to call setFreeList - this might corrupt the address table!");
setFreeList(freeListCache_);
std::cout << "DEBUG: dropFreeList - setFreeList completed" << std::endl;
DEBUG_PRINTLN("DEBUG: setFreeList completed");
freeListCache_.clear();
}
void BinaryTable::antiFreeListScope(std::function<void()> fn) {
std::cout << "DEBUG: antiFreeListScope START" << std::endl;
liftFreeList();
std::cout << "DEBUG: After liftFreeList" << std::endl;
try {
fn();
std::cout << "DEBUG: After fn() execution" << std::endl;
} catch (...) {
std::cout << "DEBUG: Exception caught, dropping free list" << std::endl;
dropFreeList();
throw;
}
std::cout << "DEBUG: About to dropFreeList" << std::endl;
dropFreeList();
std::cout << "DEBUG: antiFreeListScope END" << std::endl;
}
// Memory management
void BinaryTable::free(BT_Pointer pointer, int32_t size) {
DEBUG_PRINTLN("DEBUG: free() called with freeListLifted_=" << freeListLifted_);
if (!freeListLifted_) {
DEBUG_PRINTLN("DEBUG: free() THROWING EXCEPTION - free list not lifted!");
throw std::runtime_error("Free list must be lifted before freeing memory");
}
@@ -897,7 +984,8 @@ BT_Pointer BinaryTable::alloc(int32_t size) {
if (it == freeListCache_.end()) {
// No suitable block, allocate at end of file
return BT_Pointer(getFileLength());
int64_t allocPos = getFileLength();
return BT_Pointer(allocPos);
}
BT_Pointer result = it->pointer;
@@ -969,49 +1057,48 @@ void BinaryTable::truncate() {
freeList.pop_back();
setFreeList(freeList);
// Truncate file
file_.close();
file_.open(filePath_, std::ios::binary | std::ios::in | std::ios::out);
// Actually truncate file (matching Dart behavior)
truncateFile(lastEntry.pointer.address());
}
});
}
// Debug methods
void BinaryTable::debugAddressTable(const std::string& context) {
std::cout << "\n=== DEBUG ADDRESS TABLE";
DEBUG_PRINT("\n=== DEBUG ADDRESS TABLE");
if (!context.empty()) {
std::cout << " (" << context << ")";
DEBUG_PRINT(" (" << context << ")");
}
std::cout << " ===" << std::endl;
DEBUG_PRINTLN(" ===");
auto addressTable = getAddressTable();
std::cout << "Address table has " << addressTable.size() << " entries" << std::endl;
DEBUG_PRINTLN("Address table has " << addressTable.size() << " entries");
for (const auto& [hash, pointer] : addressTable) {
std::cout << " Hash " << hash << " -> Address " << pointer.address() << std::endl;
DEBUG_PRINTLN(" Hash " << hash << " -> Address " << pointer.address());
if (!pointer.isNull()) {
try {
uint8_t typeByte = readByte(pointer.address());
std::cout << " Type byte: " << (int)typeByte << std::endl;
DEBUG_PRINTLN(" Type byte: " << (int)typeByte);
if (typeByte == 2) { // INTEGER
int32_t value = readInt32(pointer.address() + 1);
std::cout << " Value: " << value << std::endl;
DEBUG_PRINTLN(" Value: " << value);
} else {
std::cout << " Raw bytes: ";
DEBUG_PRINT(" Raw bytes: ");
for (int i = 0; i < 8; i++) {
uint8_t byte = readByte(pointer.address() + i);
std::cout << std::hex << (int)byte << " ";
DEBUG_PRINT(std::hex << (int)byte << " ");
}
std::cout << std::dec << std::endl;
DEBUG_PRINTLN(std::dec);
}
} catch (const std::exception& e) {
std::cout << " Error reading data: " << e.what() << std::endl;
DEBUG_PRINTLN(" Error reading data: " << e.what());
}
}
}
std::cout << "=========================" << std::endl;
DEBUG_PRINTLN("=========================");
}
} // namespace bt