Files
SweepStore/cpp/binary_table.cpp

1104 lines
36 KiB
C++

#include "binary_table.h"
#include <algorithm>
#include <cstring>
#include <functional>
#include <filesystem>
#include <iostream>
namespace bt {
// FNV-1a hash implementation
int64_t BinaryTable::hashString(const std::string& str) const {
uint64_t hash = 0xcbf29ce484222325ULL; // FNV offset basis
for (char c : str) {
hash ^= static_cast<uint8_t>(c);
hash *= 0x100000001b3ULL; // FNV prime
}
return static_cast<int64_t>(hash);
}
// Value encoding implementations
std::vector<uint8_t> encodeValue(const int32_t& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER));
// Little endian encoding
buffer.push_back(value & 0xFF);
buffer.push_back((value >> 8) & 0xFF);
buffer.push_back((value >> 16) & 0xFF);
buffer.push_back((value >> 24) & 0xFF);
return buffer;
}
std::vector<uint8_t> encodeValue(const float& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT));
// Convert float to bytes (little endian)
uint32_t floatBits;
std::memcpy(&floatBits, &value, sizeof(float));
buffer.push_back(floatBits & 0xFF);
buffer.push_back((floatBits >> 8) & 0xFF);
buffer.push_back((floatBits >> 16) & 0xFF);
buffer.push_back((floatBits >> 24) & 0xFF);
return buffer;
}
std::vector<uint8_t> encodeValue(const std::string& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::STRING));
// String length (little endian)
int32_t length = static_cast<int32_t>(value.length());
buffer.push_back(length & 0xFF);
buffer.push_back((length >> 8) & 0xFF);
buffer.push_back((length >> 16) & 0xFF);
buffer.push_back((length >> 24) & 0xFF);
// String bytes
for (char c : value) {
buffer.push_back(static_cast<uint8_t>(c));
}
return buffer;
}
std::vector<uint8_t> encodeValue(const std::vector<int32_t>& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER_ARRAY));
// Array length (little endian)
int32_t length = static_cast<int32_t>(value.size());
buffer.push_back(length & 0xFF);
buffer.push_back((length >> 8) & 0xFF);
buffer.push_back((length >> 16) & 0xFF);
buffer.push_back((length >> 24) & 0xFF);
// Array elements
for (const auto& item : value) {
auto itemBuffer = encodeValue(item);
buffer.insert(buffer.end(), itemBuffer.begin(), itemBuffer.end());
}
return buffer;
}
std::vector<uint8_t> encodeValue(const std::vector<float>& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT_ARRAY));
// Array length (little endian)
int32_t length = static_cast<int32_t>(value.size());
buffer.push_back(length & 0xFF);
buffer.push_back((length >> 8) & 0xFF);
buffer.push_back((length >> 16) & 0xFF);
buffer.push_back((length >> 24) & 0xFF);
// Array elements
for (const auto& item : value) {
auto itemBuffer = encodeValue(item);
buffer.insert(buffer.end(), itemBuffer.begin(), itemBuffer.end());
}
return buffer;
}
// BT_Reference implementation
BT_Reference::BT_Reference(BinaryTable* table, BT_Pointer pointer)
: table_(table), pointer_(pointer) {}
template<>
int32_t BT_Reference::decodeValue<int32_t>() {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
table_->setFilePosition(pointer_.address());
uint8_t typeId = table_->readByte(pointer_.address());
if (static_cast<BT_Type>(typeId) != BT_Type::INTEGER) {
throw std::runtime_error("Type mismatch");
}
return table_->readInt32(pointer_.address() + 1);
}
template<>
float BT_Reference::decodeValue<float>() {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
table_->setFilePosition(pointer_.address());
uint8_t typeId = table_->readByte(pointer_.address());
if (static_cast<BT_Type>(typeId) != BT_Type::FLOAT) {
throw std::runtime_error("Type mismatch");
}
return table_->readFloat32(pointer_.address() + 1);
}
template<>
std::string BT_Reference::decodeValue<std::string>() {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
table_->setFilePosition(pointer_.address());
uint8_t typeId = table_->readByte(pointer_.address());
if (static_cast<BT_Type>(typeId) != BT_Type::STRING) {
throw std::runtime_error("Type mismatch");
}
int32_t length = table_->readInt32(pointer_.address() + 1);
auto bytes = table_->readBytes(pointer_.address() + 5, length);
return std::string(bytes.begin(), bytes.end());
}
template<>
BT_UniformArray<int32_t> BT_Reference::decodeValue<BT_UniformArray<int32_t>>() {
return BT_UniformArray<int32_t>(table_, pointer_);
}
template<>
BT_UniformArray<float> BT_Reference::decodeValue<BT_UniformArray<float>>() {
return BT_UniformArray<float>(table_, pointer_);
}
template<>
std::vector<int32_t> BT_Reference::decodeValue<std::vector<int32_t>>() {
if (pointer_.isNull()) {
return {};
}
uint8_t typeId = table_->readByte(pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
if (type != BT_Type::INTEGER_ARRAY) {
throw std::runtime_error("Type mismatch - expected integer array");
}
int32_t length = table_->readInt32(pointer_.address() + 1);
std::vector<int32_t> result;
result.reserve(length);
// Each element is: type byte (1) + int32 data (4) = 5 bytes
int64_t elementPos = pointer_.address() + 1 + 4; // Skip type and length
for (int32_t i = 0; i < length; i++) {
// Skip the type byte, read the int32 value
int32_t value = table_->readInt32(elementPos + 1);
result.push_back(value);
elementPos += 5; // Move to next element
}
return result;
}
template<>
std::vector<float> BT_Reference::decodeValue<std::vector<float>>() {
if (pointer_.isNull()) {
return {};
}
uint8_t typeId = table_->readByte(pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
if (type != BT_Type::FLOAT_ARRAY) {
throw std::runtime_error("Type mismatch - expected float array");
}
int32_t length = table_->readInt32(pointer_.address() + 1);
std::vector<float> result;
result.reserve(length);
// Each element is: type byte (1) + float data (4) = 5 bytes
int64_t elementPos = pointer_.address() + 1 + 4; // Skip type and length
for (int32_t i = 0; i < length; i++) {
// Skip the type byte, read the float value
float value = table_->readFloat32(elementPos + 1);
result.push_back(value);
elementPos += 5; // Move to next element
}
return result;
}
int32_t BT_Reference::size() const {
if (pointer_.isNull()) {
return 0;
}
uint8_t typeId = table_->readByte(pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
switch (type) {
case BT_Type::POINTER:
return 1 + 8; // Type byte + pointer
case BT_Type::INTEGER:
case BT_Type::FLOAT:
return 1 + 4; // Type byte + data
case BT_Type::STRING: {
int32_t length = table_->readInt32(pointer_.address() + 1);
return 1 + 4 + length; // Type + length + string bytes
}
case BT_Type::ADDRESS_TABLE: {
int32_t count = table_->readInt32(pointer_.address() + 1);
return 1 + 4 + count * (8 + 8); // Type + count + entries
}
case BT_Type::INTEGER_ARRAY:
case BT_Type::FLOAT_ARRAY: {
int32_t length = table_->readInt32(pointer_.address() + 1);
int32_t elementSize = (type == BT_Type::INTEGER_ARRAY) ? (1 + 4) : (1 + 4);
return 1 + 4 + length * elementSize;
}
}
return 0;
}
BT_Type BT_Reference::getType() const {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
uint8_t typeId = table_->readByte(pointer_.address());
return static_cast<BT_Type>(typeId);
}
// BT_UniformArray template implementations
template<typename T>
int32_t BT_UniformArray<T>::length() const {
if (this->pointer_.isNull()) {
return 0;
}
try {
uint8_t typeId = this->table_->readByte(this->pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
if (!isArrayType(type)) {
return 0; // Treat non-array as empty array instead of throwing
}
return this->table_->readInt32(this->pointer_.address() + 1);
} catch (...) {
return 0; // If we can't read, treat as empty
}
}
template<typename T>
T BT_UniformArray<T>::operator[](int32_t index) const {
if (this->pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
int32_t len = length();
if (index < 0 || index >= len) {
throw std::out_of_range("Index out of range");
}
// Determine element type and size
uint8_t elementTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type elementType = static_cast<BT_Type>(elementTypeId);
int32_t elementSize = 1 + getTypeSize(elementType);
int64_t itemAddress = this->pointer_.address() + 1 + 4 + index * elementSize;
BT_Reference itemRef(this->table_, BT_Pointer(itemAddress));
return itemRef.decodeValue<T>();
}
template<typename T>
void BT_UniformArray<T>::set(int32_t index, const T& value) {
if (this->pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
int32_t len = length();
if (index < 0 || index >= len) {
throw std::out_of_range("Index out of range");
}
// Validate type compatibility
BT_Type expectedType = getTypeFromValue<T>();
uint8_t elementTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type elementType = static_cast<BT_Type>(elementTypeId);
if (expectedType != elementType) {
throw std::runtime_error("Type mismatch");
}
// Encode and write value
auto valueBuffer = encodeValue(value);
int32_t elementSize = 1 + getTypeSize(elementType);
int64_t itemAddress = this->pointer_.address() + 1 + 4 + index * elementSize;
this->table_->writeBytes(itemAddress, valueBuffer);
}
template<typename T>
void BT_UniformArray<T>::add(const T& value) {
addAll({value});
}
template<typename T>
void BT_UniformArray<T>::addAll(const std::vector<T>& values) {
this->table_->antiFreeListScope([&]() {
// Get current element type or determine from new values
BT_Type elementType = getTypeFromValue<T>();
if (length() > 0) {
uint8_t existingTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type existingType = static_cast<BT_Type>(existingTypeId);
if (existingType != elementType) {
throw std::runtime_error("Type mismatch");
}
}
// Validate all values are compatible
for (const auto& value : values) {
(void)value; // Suppress unused variable warning
BT_Type valueType = getTypeFromValue<T>();
if (valueType != elementType) {
throw std::runtime_error("Type mismatch in values");
}
if (getTypeSize(elementType) == -1) {
throw std::runtime_error("Variable size types not supported in uniform arrays");
}
}
// Read current array buffer
int32_t currentLength = length();
int32_t elementSize = 1 + getTypeSize(elementType);
int32_t currentBufferSize = 1 + 4 + currentLength * elementSize;
std::vector<uint8_t> fullBuffer;
if (currentLength > 0) {
fullBuffer = this->table_->readBytes(this->pointer_.address(), currentBufferSize);
} else {
// Empty array, create initial buffer
fullBuffer.push_back(static_cast<uint8_t>(elementType == BT_Type::INTEGER ? BT_Type::INTEGER_ARRAY : BT_Type::FLOAT_ARRAY));
fullBuffer.push_back(0); // Length will be updated
fullBuffer.push_back(0);
fullBuffer.push_back(0);
fullBuffer.push_back(0);
}
// Add new values to buffer
for (const auto& value : values) {
auto valueBuffer = encodeValue(value);
fullBuffer.insert(fullBuffer.end(), valueBuffer.begin(), valueBuffer.end());
}
// Update length in buffer
int32_t newLength = currentLength + static_cast<int32_t>(values.size());
fullBuffer[1] = newLength & 0xFF;
fullBuffer[2] = (newLength >> 8) & 0xFF;
fullBuffer[3] = (newLength >> 16) & 0xFF;
fullBuffer[4] = (newLength >> 24) & 0xFF;
// Free old array if it exists
if (!this->pointer_.isNull()) {
this->table_->free(this->pointer_, currentBufferSize);
}
// Allocate new space
BT_Pointer newPointer = this->table_->alloc(static_cast<int32_t>(fullBuffer.size()));
// Update any references in address table
auto addressTable = this->table_->getAddressTable();
for (auto& [key, value] : addressTable) {
if (value == this->pointer_) {
value = newPointer;
}
}
this->table_->setAddressTable(addressTable);
this->pointer_ = newPointer;
// Write updated buffer
this->table_->writeBytes(newPointer.address(), fullBuffer);
});
}
template<typename T>
std::vector<T> BT_UniformArray<T>::fetchSublist(int32_t start, int32_t end) {
int32_t len = length();
if (len == 0) {
return {};
}
if (end == -1) {
end = len;
}
if (start < 0 || start >= len || end < start || end > len) {
throw std::out_of_range("Invalid range");
}
uint8_t elementTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type elementType = static_cast<BT_Type>(elementTypeId);
int32_t elementSize = 1 + getTypeSize(elementType);
if (getTypeSize(elementType) == -1) {
throw std::runtime_error("Variable size types not supported in uniform arrays");
}
std::vector<T> result;
for (int32_t i = start; i < end; i++) {
int64_t itemAddress = this->pointer_.address() + 1 + 4 + i * elementSize;
BT_Reference itemRef(this->table_, BT_Pointer(itemAddress));
result.push_back(itemRef.decodeValue<T>());
}
return result;
}
// Explicit template instantiations
template class BT_UniformArray<int32_t>;
template class BT_UniformArray<float>;
// BinaryTable implementation
BinaryTable::BinaryTable(const std::string& path)
: filePath_(path), freeListLifted_(false) {
file_ = fopen(path.c_str(), "r+b");
if (!file_) {
// File doesn't exist, create it
file_ = fopen(path.c_str(), "w+b");
}
}
BinaryTable::~BinaryTable() {
if (file_) {
fclose(file_);
}
}
void BinaryTable::initialize() {
fseek(file_, 0, SEEK_SET);
writeInt64(0, BT_Null.address()); // Address table pointer (8 bytes)
writeInt32(8, 0); // Free list entry count (4 bytes)
fflush(file_);
}
// File I/O helper implementations
int32_t BinaryTable::readInt32(int64_t position) {
fseek(file_, position, SEEK_SET);
uint8_t bytes[4];
fread(bytes, 1, 4, file_);
return static_cast<int32_t>(bytes[0]) |
(static_cast<int32_t>(bytes[1]) << 8) |
(static_cast<int32_t>(bytes[2]) << 16) |
(static_cast<int32_t>(bytes[3]) << 24);
}
float BinaryTable::readFloat32(int64_t position) {
fseek(file_, position, SEEK_SET);
uint8_t bytes[4];
fread(bytes, 1, 4, file_);
uint32_t floatBits = static_cast<uint32_t>(bytes[0]) |
(static_cast<uint32_t>(bytes[1]) << 8) |
(static_cast<uint32_t>(bytes[2]) << 16) |
(static_cast<uint32_t>(bytes[3]) << 24);
float result;
std::memcpy(&result, &floatBits, sizeof(float));
return result;
}
int64_t BinaryTable::readInt64(int64_t position) {
fseek(file_, position, SEEK_SET);
uint8_t bytes[8];
fread(bytes, 1, 8, file_);
int64_t result = 0;
for (int i = 0; i < 8; i++) {
result |= static_cast<int64_t>(bytes[i]) << (i * 8);
}
return result;
}
uint8_t BinaryTable::readByte(int64_t position) {
fseek(file_, position, SEEK_SET);
uint8_t byte;
fread(&byte, 1, 1, file_);
return byte;
}
std::vector<uint8_t> BinaryTable::readBytes(int64_t position, int32_t count) {
fseek(file_, position, SEEK_SET);
std::vector<uint8_t> bytes(count);
fread(bytes.data(), 1, count, file_);
return bytes;
}
void BinaryTable::writeInt32(int64_t position, int32_t value) {
fseek(file_, position, SEEK_SET);
uint8_t bytes[4] = {
static_cast<uint8_t>(value & 0xFF),
static_cast<uint8_t>((value >> 8) & 0xFF),
static_cast<uint8_t>((value >> 16) & 0xFF),
static_cast<uint8_t>((value >> 24) & 0xFF)
};
fwrite(bytes, 1, 4, file_);
}
void BinaryTable::writeFloat32(int64_t position, float value) {
fseek(file_, position, SEEK_SET);
uint32_t floatBits;
std::memcpy(&floatBits, &value, sizeof(float));
uint8_t bytes[4] = {
static_cast<uint8_t>(floatBits & 0xFF),
static_cast<uint8_t>((floatBits >> 8) & 0xFF),
static_cast<uint8_t>((floatBits >> 16) & 0xFF),
static_cast<uint8_t>((floatBits >> 24) & 0xFF)
};
fwrite(bytes, 1, 4, file_);
}
void BinaryTable::writeInt64(int64_t position, int64_t value) {
fseek(file_, position, SEEK_SET);
uint8_t bytes[8];
for (int i = 0; i < 8; i++) {
bytes[i] = static_cast<uint8_t>((value >> (i * 8)) & 0xFF);
}
fwrite(bytes, 1, 8, file_);
}
void BinaryTable::writeByte(int64_t position, uint8_t value) {
fseek(file_, position, SEEK_SET);
fwrite(&value, 1, 1, file_);
}
void BinaryTable::writeBytes(int64_t position, const std::vector<uint8_t>& data) {
fseek(file_, position, SEEK_SET);
fwrite(data.data(), 1, data.size(), file_);
}
int64_t BinaryTable::getFileLength() {
long current = ftell(file_);
fseek(file_, 0, SEEK_END);
long length = ftell(file_);
fseek(file_, current, SEEK_SET); // Restore position
return length;
}
void BinaryTable::setFilePosition(int64_t position) {
fseek(file_, position, SEEK_SET);
}
// Address table management
std::unordered_map<int64_t, BT_Pointer> BinaryTable::getAddressTable() {
int64_t tableAddress = readInt64(0);
DEBUG_PRINTLN("DEBUG: getAddressTable reading from address " << tableAddress);
if (tableAddress == -1) { // Null pointer
return {};
}
// Validate table address is within file bounds
int64_t fileLength = getFileLength();
if (tableAddress < 0 || tableAddress >= fileLength) {
DEBUG_PRINTLN("DEBUG: Address table pointer is out of bounds: " << tableAddress << " (file length: " << fileLength << ")");
throw std::runtime_error("Address table pointer is corrupted - out of bounds");
}
try {
uint8_t typeId = readByte(tableAddress);
if (static_cast<BT_Type>(typeId) != BT_Type::ADDRESS_TABLE) {
DEBUG_PRINTLN("DEBUG: Invalid type ID at address table location: " << (int)typeId);
// Address table might not be valid yet, return empty
return {};
}
int32_t tableCount = readInt32(tableAddress + 1);
// Validate table count is reasonable
if (tableCount < 0 || tableCount > 1000000) { // Arbitrary but reasonable limit
DEBUG_PRINTLN("DEBUG: Suspicious address table count: " << tableCount);
throw std::runtime_error("Address table appears corrupted - invalid entry count");
}
// Validate the entire table fits within file bounds
int64_t requiredSize = 1 + 4 + tableCount * (8 + 8); // Type + count + entries
if (tableAddress + requiredSize > fileLength) {
DEBUG_PRINTLN("DEBUG: Address table extends beyond file bounds");
throw std::runtime_error("Address table appears corrupted - extends beyond file");
}
std::unordered_map<int64_t, BT_Pointer> addressTable;
for (int32_t i = 0; i < tableCount; i++) {
int64_t offset = tableAddress + 1 + 4 + i * (8 + 8);
int64_t keyHash = readInt64(offset);
int64_t valueAddress = readInt64(offset + 8);
// Validate each value address is within bounds (or null)
if (valueAddress != -1 && (valueAddress < 0 || valueAddress >= fileLength)) {
DEBUG_PRINTLN("DEBUG: Invalid value address in entry " << i << ": " << valueAddress);
throw std::runtime_error("Address table entry contains invalid pointer");
}
DEBUG_PRINTLN(" Reading entry " << i << ": hash " << keyHash << " -> address " << valueAddress);
addressTable[keyHash] = BT_Pointer(valueAddress);
}
return addressTable;
} catch (const std::runtime_error& e) {
// Re-throw runtime errors (our validation failures)
throw;
} catch (...) {
// If we can't read the address table for other reasons, return empty
DEBUG_PRINTLN("DEBUG: Failed to read address table due to I/O error");
return {};
}
}
void BinaryTable::setAddressTable(const std::unordered_map<int64_t, BT_Pointer>& table) {
DEBUG_PRINTLN("DEBUG: setAddressTable called! This should NOT happen during get operations!");
DEBUG_PRINTLN("DEBUG: setAddressTable writing " << table.size() << " entries");
for (const auto& [key, value] : table) {
DEBUG_PRINTLN(" Writing hash " << key << " -> address " << value.address());
}
// Read old table pointer FIRST to ensure we can clean it up later
int64_t oldTablePointerAddress = readInt64(0);
BT_Pointer oldTablePtr(oldTablePointerAddress);
int32_t oldTableSize = 0;
// Calculate old table size if it exists
if (!oldTablePtr.isNull()) {
try {
BT_Reference oldTableRef(this, oldTablePtr);
oldTableSize = oldTableRef.size();
} catch (...) {
// If we can't read the old table, we can't free it safely
DEBUG_PRINTLN("DEBUG: WARNING - Cannot read old table for cleanup");
oldTablePtr = BT_Null;
}
}
// Build buffer manually (matching Dart implementation exactly)
std::vector<uint8_t> buffer;
// Type byte
buffer.push_back(static_cast<uint8_t>(BT_Type::ADDRESS_TABLE));
// Table count (little endian, 4 bytes)
int32_t count = static_cast<int32_t>(table.size());
for (int i = 0; i < 4; i++) {
buffer.push_back(static_cast<uint8_t>((count >> (i * 8)) & 0xFF));
}
// Table entries
for (const auto& [key, value] : table) {
// Key hash (little endian, 8 bytes)
for (int i = 0; i < 8; i++) {
buffer.push_back(static_cast<uint8_t>((key >> (i * 8)) & 0xFF));
}
// Value address (little endian, 8 bytes)
int64_t addr = value.address();
for (int i = 0; i < 8; i++) {
buffer.push_back(static_cast<uint8_t>((addr >> (i * 8)) & 0xFF));
}
}
// Allocate and write new address table
BT_Pointer newTableAddress = alloc(static_cast<int32_t>(buffer.size()));
setFilePosition(newTableAddress.address());
size_t written = fwrite(buffer.data(), 1, buffer.size(), file_);
if (written != buffer.size()) {
throw std::runtime_error("Failed to write complete address table");
}
// Ensure new table is written to disk before updating header
fflush(file_);
// Atomically update header to point to new table
writeInt64(0, newTableAddress.address());
fflush(file_);
// Only free old table after new one is successfully committed
DEBUG_PRINTLN("DEBUG: oldTablePtr.isNull()=" << oldTablePtr.isNull() << ", oldTablePtr.address()=" << oldTablePtr.address() << ", newTableAddress=" << newTableAddress.address());
if (!oldTablePtr.isNull() && oldTablePtr != newTableAddress) {
DEBUG_PRINTLN("DEBUG: Calling free() for old table");
free(oldTablePtr, oldTableSize);
} else {
DEBUG_PRINTLN("DEBUG: NOT calling free() - condition not met");
}
}
// Free list management
std::vector<BT_FreeListEntry> BinaryTable::getFreeList() {
if (freeListLifted_) {
return freeListCache_;
}
int64_t fileLength = getFileLength();
if (fileLength < 4) {
return {};
}
int32_t entryCount = readInt32(fileLength - 4);
if (entryCount == 0) {
return {};
}
int32_t entrySize = 8 + 4; // Pointer + Size
int32_t freeListSize = entryCount * entrySize;
int64_t freeListStart = fileLength - 4 - freeListSize;
std::vector<BT_FreeListEntry> freeList;
for (int32_t i = 0; i < entryCount; i++) {
int64_t offset = freeListStart + i * entrySize;
int64_t pointerAddress = readInt64(offset);
int32_t size = readInt32(offset + 8);
freeList.emplace_back(BT_Pointer(pointerAddress), size);
}
return freeList;
}
void BinaryTable::setFreeList(const std::vector<BT_FreeListEntry>& list) {
DEBUG_PRINTLN("DEBUG: setFreeList called with freeListLifted_=" << freeListLifted_ << ", list.size()=" << list.size());
if (freeListLifted_) {
freeListCache_ = list;
DEBUG_PRINTLN("DEBUG: setFreeList early return - just updating cache");
return;
}
// Always remove old free list first (matching Dart behavior)
int64_t fileLength = getFileLength();
DEBUG_PRINTLN("DEBUG: setFreeList fileLength=" << fileLength);
// Calculate old free list size to remove
int32_t oldEntryCount = 0;
if (fileLength >= 4) {
oldEntryCount = readInt32(fileLength - 4);
}
DEBUG_PRINTLN("DEBUG: setFreeList oldEntryCount=" << oldEntryCount);
// Remove old free list (matching Dart: always truncate first)
if (oldEntryCount > 0) {
int32_t oldListSize = (oldEntryCount * (8 + 4)) + 4; // Entries + Count
int64_t newFileLength = fileLength - oldListSize;
DEBUG_PRINTLN("DEBUG: setFreeList - removing old free list, oldListSize=" << oldListSize << ", truncating to: " << newFileLength);
truncateFile(newFileLength);
fileLength = newFileLength; // Update file length
}
// If the new free list is empty, we're done (old list already removed)
if (list.empty()) {
DEBUG_PRINTLN("DEBUG: setFreeList - empty list, old list removed, done");
return;
}
// Write new free list at end of file
int64_t newLogicalEnd = fileLength;
// Encode new free list
std::vector<uint8_t> buffer;
// Entries
for (const auto& entry : list) {
// Pointer (8 bytes, little endian)
int64_t addr = entry.pointer.address();
for (int i = 0; i < 8; i++) {
buffer.push_back(static_cast<uint8_t>((addr >> (i * 8)) & 0xFF));
}
// Size (4 bytes, little endian)
int32_t size = entry.size;
for (int i = 0; i < 4; i++) {
buffer.push_back(static_cast<uint8_t>((size >> (i * 8)) & 0xFF));
}
}
// Entry count (4 bytes, little endian)
int32_t count = static_cast<int32_t>(list.size());
for (int i = 0; i < 4; i++) {
buffer.push_back(static_cast<uint8_t>((count >> (i * 8)) & 0xFF));
}
// Write at the logical end position
fseek(file_, newLogicalEnd, SEEK_SET);
fwrite(buffer.data(), 1, buffer.size(), file_);
fflush(file_);
// Update logical file length
// File will be extended automatically by write operations
}
void BinaryTable::truncateFile(int64_t newSize) {
// Actually truncate the file (matching Dart behavior)
DEBUG_PRINTLN("DEBUG: truncateFile - truncating to " << newSize);
fclose(file_);
try {
std::filesystem::resize_file(filePath_, newSize);
DEBUG_PRINTLN("DEBUG: truncateFile - resize successful");
} catch (const std::exception& e) {
DEBUG_PRINTLN("DEBUG: truncateFile - resize failed: " << e.what());
}
file_ = fopen(filePath_.c_str(), "r+b");
DEBUG_PRINTLN("DEBUG: truncateFile - reopen: success=" << (file_ != nullptr));
}
void BinaryTable::liftFreeList() {
DEBUG_PRINTLN("DEBUG: liftFreeList() called - this truncates the file!");
if (freeListLifted_) {
throw std::runtime_error("Free list is already lifted");
}
freeListCache_ = getFreeList();
// Remove free list from end of file
int64_t fileLength = getFileLength();
int32_t oldEntryCount = (fileLength >= 4) ? readInt32(fileLength - 4) : 0;
if (oldEntryCount > 0) {
int32_t oldEntrySize = 8 + 4;
int32_t oldFreeListSize = oldEntryCount * oldEntrySize + 4;
int64_t newFileLength = fileLength - oldFreeListSize;
// Store current file position to restore later if needed
long currentPos = ftell(file_);
// Properly truncate the file
truncateFile(newFileLength);
// Restore file position if it's still valid
if (currentPos >= 0 && currentPos < newFileLength) {
fseek(file_, currentPos, SEEK_SET);
}
}
freeListLifted_ = true;
}
void BinaryTable::dropFreeList() {
DEBUG_PRINTLN("DEBUG: dropFreeList() called - this writes data back to file!");
if (!freeListLifted_) {
throw std::runtime_error("Free list is not lifted");
}
freeListLifted_ = false;
DEBUG_PRINTLN("DEBUG: About to call setFreeList - this might corrupt the address table!");
setFreeList(freeListCache_);
DEBUG_PRINTLN("DEBUG: setFreeList completed");
freeListCache_.clear();
}
void BinaryTable::antiFreeListScope(std::function<void()> fn) {
liftFreeList();
try {
fn();
} catch (...) {
dropFreeList();
throw;
}
dropFreeList();
}
// Memory management
void BinaryTable::free(BT_Pointer pointer, int32_t size) {
DEBUG_PRINTLN("DEBUG: free() called with freeListLifted_=" << freeListLifted_);
if (!freeListLifted_) {
DEBUG_PRINTLN("DEBUG: free() THROWING EXCEPTION - free list not lifted!");
throw std::runtime_error("Free list must be lifted before freeing memory");
}
if (pointer.isNull() || size <= 0) {
throw std::invalid_argument("Cannot free null pointer or zero size");
}
// Fetch current free list (matching Dart exactly)
std::vector<BT_FreeListEntry> freeList = freeListCache_;
// Add new free entry
freeList.emplace_back(pointer, size);
// Merge contiguous free entries (matching Dart logic exactly)
auto mergeContiguousFreeBlocks = [](std::vector<BT_FreeListEntry> freeList) -> std::vector<BT_FreeListEntry> {
if (freeList.empty()) return {};
// Create a copy and sort by address to check for contiguous blocks
std::vector<BT_FreeListEntry> sorted = freeList;
std::sort(sorted.begin(), sorted.end(),
[](const BT_FreeListEntry& a, const BT_FreeListEntry& b) {
return a.pointer.address() < b.pointer.address();
});
std::vector<BT_FreeListEntry> merged;
for (const auto& entry : sorted) {
if (merged.empty()) {
// First entry, just add it
merged.emplace_back(entry.pointer, entry.size);
} else {
auto& last = merged.back();
// Check if current entry is contiguous with the last merged entry
if (last.pointer.address() + last.size == entry.pointer.address()) {
// Merge: extend the size of the last entry
last.size += entry.size;
} else {
// Not contiguous, add as separate entry
merged.emplace_back(entry.pointer, entry.size);
}
}
}
return merged;
};
freeList = mergeContiguousFreeBlocks(freeList);
// Update free list
freeListCache_ = freeList;
}
BT_Pointer BinaryTable::alloc(int32_t size) {
if (!freeListLifted_) {
throw std::runtime_error("Free list must be lifted before allocation");
}
// Find suitable free block
auto it = std::find_if(freeListCache_.begin(), freeListCache_.end(),
[size](const BT_FreeListEntry& entry) {
return entry.size >= size;
});
if (it == freeListCache_.end()) {
// No suitable block, allocate at end of file
int64_t allocPos = getFileLength();
return BT_Pointer(allocPos);
}
BT_Pointer result = it->pointer;
if (it->size == size) {
// Exact fit, remove block
freeListCache_.erase(it);
} else {
// Split block
it->pointer = BT_Pointer(it->pointer.address() + size);
it->size -= size;
}
return result;
}
// Data operations
BT_Reference BinaryTable::getReference(const std::string& key) {
auto addressTable = getAddressTable();
int64_t keyHash = hashString(key);
auto it = addressTable.find(keyHash);
if (it == addressTable.end()) {
throw std::runtime_error("Key does not exist");
}
return BT_Reference(this, it->second);
}
void BinaryTable::remove(const std::string& key) {
antiFreeListScope([&]() {
auto addressTable = getAddressTable();
int64_t keyHash = hashString(key);
auto it = addressTable.find(keyHash);
if (it == addressTable.end()) {
throw std::runtime_error("Key does not exist");
}
BT_Reference valueRef(this, it->second);
free(it->second, valueRef.size());
addressTable.erase(it);
setAddressTable(addressTable);
});
}
void BinaryTable::truncate() {
antiFreeListScope([&]() {
// Relocate address table
setAddressTable(getAddressTable());
// Check if last free block is at end of file
auto freeList = getFreeList();
if (freeList.empty()) {
return;
}
std::sort(freeList.begin(), freeList.end(),
[](const BT_FreeListEntry& a, const BT_FreeListEntry& b) {
return a.pointer.address() < b.pointer.address();
});
const auto& lastEntry = freeList.back();
int64_t fileEnd = getFileLength();
int64_t expectedEnd = lastEntry.pointer.address() + lastEntry.size;
if (expectedEnd == fileEnd) {
freeList.pop_back();
setFreeList(freeList);
// Actually truncate file (matching Dart behavior)
truncateFile(lastEntry.pointer.address());
}
});
}
// Debug methods
void BinaryTable::debugAddressTable(const std::string& context) {
DEBUG_PRINT("\n=== DEBUG ADDRESS TABLE");
if (!context.empty()) {
DEBUG_PRINT(" (" << context << ")");
}
DEBUG_PRINTLN(" ===");
auto addressTable = getAddressTable();
DEBUG_PRINTLN("Address table has " << addressTable.size() << " entries");
for (const auto& [hash, pointer] : addressTable) {
DEBUG_PRINTLN(" Hash " << hash << " -> Address " << pointer.address());
if (!pointer.isNull()) {
try {
uint8_t typeByte = readByte(pointer.address());
DEBUG_PRINTLN(" Type byte: " << (int)typeByte);
if (typeByte == 2) { // INTEGER
int32_t value = readInt32(pointer.address() + 1);
DEBUG_PRINTLN(" Value: " << value);
} else {
DEBUG_PRINT(" Raw bytes: ");
for (int i = 0; i < 8; i++) {
uint8_t byte = readByte(pointer.address() + i);
DEBUG_PRINT(std::hex << (int)byte << " ");
}
DEBUG_PRINTLN(std::dec);
}
} catch (const std::exception& e) {
DEBUG_PRINTLN(" Error reading data: " << e.what());
}
}
}
DEBUG_PRINTLN("=========================");
}
} // namespace bt