Files
SweepStore/cpp/binary_table.cpp

1017 lines
33 KiB
C++

#include "binary_table.h"
#include <algorithm>
#include <cstring>
#include <functional>
#include <filesystem>
#include <iostream>
namespace bt {
// FNV-1a hash implementation
int64_t BinaryTable::hashString(const std::string& str) const {
uint64_t hash = 0xcbf29ce484222325ULL; // FNV offset basis
for (char c : str) {
hash ^= static_cast<uint8_t>(c);
hash *= 0x100000001b3ULL; // FNV prime
}
return static_cast<int64_t>(hash);
}
// Value encoding implementations
std::vector<uint8_t> encodeValue(const int32_t& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER));
// Little endian encoding
buffer.push_back(value & 0xFF);
buffer.push_back((value >> 8) & 0xFF);
buffer.push_back((value >> 16) & 0xFF);
buffer.push_back((value >> 24) & 0xFF);
return buffer;
}
std::vector<uint8_t> encodeValue(const float& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT));
// Convert float to bytes (little endian)
uint32_t floatBits;
std::memcpy(&floatBits, &value, sizeof(float));
buffer.push_back(floatBits & 0xFF);
buffer.push_back((floatBits >> 8) & 0xFF);
buffer.push_back((floatBits >> 16) & 0xFF);
buffer.push_back((floatBits >> 24) & 0xFF);
return buffer;
}
std::vector<uint8_t> encodeValue(const std::string& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::STRING));
// String length (little endian)
int32_t length = static_cast<int32_t>(value.length());
buffer.push_back(length & 0xFF);
buffer.push_back((length >> 8) & 0xFF);
buffer.push_back((length >> 16) & 0xFF);
buffer.push_back((length >> 24) & 0xFF);
// String bytes
for (char c : value) {
buffer.push_back(static_cast<uint8_t>(c));
}
return buffer;
}
std::vector<uint8_t> encodeValue(const std::vector<int32_t>& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER_ARRAY));
// Array length (little endian)
int32_t length = static_cast<int32_t>(value.size());
buffer.push_back(length & 0xFF);
buffer.push_back((length >> 8) & 0xFF);
buffer.push_back((length >> 16) & 0xFF);
buffer.push_back((length >> 24) & 0xFF);
// Array elements
for (const auto& item : value) {
auto itemBuffer = encodeValue(item);
buffer.insert(buffer.end(), itemBuffer.begin(), itemBuffer.end());
}
return buffer;
}
std::vector<uint8_t> encodeValue(const std::vector<float>& value) {
std::vector<uint8_t> buffer;
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT_ARRAY));
// Array length (little endian)
int32_t length = static_cast<int32_t>(value.size());
buffer.push_back(length & 0xFF);
buffer.push_back((length >> 8) & 0xFF);
buffer.push_back((length >> 16) & 0xFF);
buffer.push_back((length >> 24) & 0xFF);
// Array elements
for (const auto& item : value) {
auto itemBuffer = encodeValue(item);
buffer.insert(buffer.end(), itemBuffer.begin(), itemBuffer.end());
}
return buffer;
}
// BT_Reference implementation
BT_Reference::BT_Reference(BinaryTable* table, BT_Pointer pointer)
: table_(table), pointer_(pointer) {}
template<>
int32_t BT_Reference::decodeValue<int32_t>() {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
table_->setFilePosition(pointer_.address());
uint8_t typeId = table_->readByte(pointer_.address());
if (static_cast<BT_Type>(typeId) != BT_Type::INTEGER) {
throw std::runtime_error("Type mismatch");
}
return table_->readInt32(pointer_.address() + 1);
}
template<>
float BT_Reference::decodeValue<float>() {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
table_->setFilePosition(pointer_.address());
uint8_t typeId = table_->readByte(pointer_.address());
if (static_cast<BT_Type>(typeId) != BT_Type::FLOAT) {
throw std::runtime_error("Type mismatch");
}
return table_->readFloat32(pointer_.address() + 1);
}
template<>
std::string BT_Reference::decodeValue<std::string>() {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
table_->setFilePosition(pointer_.address());
uint8_t typeId = table_->readByte(pointer_.address());
if (static_cast<BT_Type>(typeId) != BT_Type::STRING) {
throw std::runtime_error("Type mismatch");
}
int32_t length = table_->readInt32(pointer_.address() + 1);
auto bytes = table_->readBytes(pointer_.address() + 5, length);
return std::string(bytes.begin(), bytes.end());
}
template<>
BT_UniformArray<int32_t> BT_Reference::decodeValue<BT_UniformArray<int32_t>>() {
return BT_UniformArray<int32_t>(table_, pointer_);
}
template<>
BT_UniformArray<float> BT_Reference::decodeValue<BT_UniformArray<float>>() {
return BT_UniformArray<float>(table_, pointer_);
}
template<>
std::vector<int32_t> BT_Reference::decodeValue<std::vector<int32_t>>() {
if (pointer_.isNull()) {
return {};
}
uint8_t typeId = table_->readByte(pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
if (type != BT_Type::INTEGER_ARRAY) {
throw std::runtime_error("Type mismatch - expected integer array");
}
int32_t length = table_->readInt32(pointer_.address() + 1);
std::vector<int32_t> result;
result.reserve(length);
// Each element is: type byte (1) + int32 data (4) = 5 bytes
int64_t elementPos = pointer_.address() + 1 + 4; // Skip type and length
for (int32_t i = 0; i < length; i++) {
// Skip the type byte, read the int32 value
int32_t value = table_->readInt32(elementPos + 1);
result.push_back(value);
elementPos += 5; // Move to next element
}
return result;
}
template<>
std::vector<float> BT_Reference::decodeValue<std::vector<float>>() {
if (pointer_.isNull()) {
return {};
}
uint8_t typeId = table_->readByte(pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
if (type != BT_Type::FLOAT_ARRAY) {
throw std::runtime_error("Type mismatch - expected float array");
}
int32_t length = table_->readInt32(pointer_.address() + 1);
std::vector<float> result;
result.reserve(length);
// Each element is: type byte (1) + float data (4) = 5 bytes
int64_t elementPos = pointer_.address() + 1 + 4; // Skip type and length
for (int32_t i = 0; i < length; i++) {
// Skip the type byte, read the float value
float value = table_->readFloat32(elementPos + 1);
result.push_back(value);
elementPos += 5; // Move to next element
}
return result;
}
int32_t BT_Reference::size() const {
if (pointer_.isNull()) {
return 0;
}
uint8_t typeId = table_->readByte(pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
switch (type) {
case BT_Type::POINTER:
return 1 + 8; // Type byte + pointer
case BT_Type::INTEGER:
case BT_Type::FLOAT:
return 1 + 4; // Type byte + data
case BT_Type::STRING: {
int32_t length = table_->readInt32(pointer_.address() + 1);
return 1 + 4 + length; // Type + length + string bytes
}
case BT_Type::ADDRESS_TABLE: {
int32_t count = table_->readInt32(pointer_.address() + 1);
return 1 + 4 + count * (8 + 8); // Type + count + entries
}
case BT_Type::INTEGER_ARRAY:
case BT_Type::FLOAT_ARRAY: {
int32_t length = table_->readInt32(pointer_.address() + 1);
int32_t elementSize = (type == BT_Type::INTEGER_ARRAY) ? (1 + 4) : (1 + 4);
return 1 + 4 + length * elementSize;
}
}
return 0;
}
BT_Type BT_Reference::getType() const {
if (pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
uint8_t typeId = table_->readByte(pointer_.address());
return static_cast<BT_Type>(typeId);
}
// BT_UniformArray template implementations
template<typename T>
int32_t BT_UniformArray<T>::length() const {
if (this->pointer_.isNull()) {
return 0;
}
try {
uint8_t typeId = this->table_->readByte(this->pointer_.address());
BT_Type type = static_cast<BT_Type>(typeId);
if (!isArrayType(type)) {
return 0; // Treat non-array as empty array instead of throwing
}
return this->table_->readInt32(this->pointer_.address() + 1);
} catch (...) {
return 0; // If we can't read, treat as empty
}
}
template<typename T>
T BT_UniformArray<T>::operator[](int32_t index) const {
if (this->pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
int32_t len = length();
if (index < 0 || index >= len) {
throw std::out_of_range("Index out of range");
}
// Determine element type and size
uint8_t elementTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type elementType = static_cast<BT_Type>(elementTypeId);
int32_t elementSize = 1 + getTypeSize(elementType);
int64_t itemAddress = this->pointer_.address() + 1 + 4 + index * elementSize;
BT_Reference itemRef(this->table_, BT_Pointer(itemAddress));
return itemRef.decodeValue<T>();
}
template<typename T>
void BT_UniformArray<T>::set(int32_t index, const T& value) {
if (this->pointer_.isNull()) {
throw std::runtime_error("Null pointer");
}
int32_t len = length();
if (index < 0 || index >= len) {
throw std::out_of_range("Index out of range");
}
// Validate type compatibility
BT_Type expectedType = getTypeFromValue<T>();
uint8_t elementTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type elementType = static_cast<BT_Type>(elementTypeId);
if (expectedType != elementType) {
throw std::runtime_error("Type mismatch");
}
// Encode and write value
auto valueBuffer = encodeValue(value);
int32_t elementSize = 1 + getTypeSize(elementType);
int64_t itemAddress = this->pointer_.address() + 1 + 4 + index * elementSize;
this->table_->writeBytes(itemAddress, valueBuffer);
}
template<typename T>
void BT_UniformArray<T>::add(const T& value) {
addAll({value});
}
template<typename T>
void BT_UniformArray<T>::addAll(const std::vector<T>& values) {
this->table_->antiFreeListScope([&]() {
// Get current element type or determine from new values
BT_Type elementType = getTypeFromValue<T>();
if (length() > 0) {
uint8_t existingTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type existingType = static_cast<BT_Type>(existingTypeId);
if (existingType != elementType) {
throw std::runtime_error("Type mismatch");
}
}
// Validate all values are compatible
for (const auto& value : values) {
(void)value; // Suppress unused variable warning
BT_Type valueType = getTypeFromValue<T>();
if (valueType != elementType) {
throw std::runtime_error("Type mismatch in values");
}
if (getTypeSize(elementType) == -1) {
throw std::runtime_error("Variable size types not supported in uniform arrays");
}
}
// Read current array buffer
int32_t currentLength = length();
int32_t elementSize = 1 + getTypeSize(elementType);
int32_t currentBufferSize = 1 + 4 + currentLength * elementSize;
std::vector<uint8_t> fullBuffer;
if (currentLength > 0) {
fullBuffer = this->table_->readBytes(this->pointer_.address(), currentBufferSize);
} else {
// Empty array, create initial buffer
fullBuffer.push_back(static_cast<uint8_t>(elementType == BT_Type::INTEGER ? BT_Type::INTEGER_ARRAY : BT_Type::FLOAT_ARRAY));
fullBuffer.push_back(0); // Length will be updated
fullBuffer.push_back(0);
fullBuffer.push_back(0);
fullBuffer.push_back(0);
}
// Add new values to buffer
for (const auto& value : values) {
auto valueBuffer = encodeValue(value);
fullBuffer.insert(fullBuffer.end(), valueBuffer.begin(), valueBuffer.end());
}
// Update length in buffer
int32_t newLength = currentLength + static_cast<int32_t>(values.size());
fullBuffer[1] = newLength & 0xFF;
fullBuffer[2] = (newLength >> 8) & 0xFF;
fullBuffer[3] = (newLength >> 16) & 0xFF;
fullBuffer[4] = (newLength >> 24) & 0xFF;
// Free old array if it exists
if (!this->pointer_.isNull()) {
this->table_->free(this->pointer_, currentBufferSize);
}
// Allocate new space
BT_Pointer newPointer = this->table_->alloc(static_cast<int32_t>(fullBuffer.size()));
// Update any references in address table
auto addressTable = this->table_->getAddressTable();
for (auto& [key, value] : addressTable) {
if (value == this->pointer_) {
value = newPointer;
}
}
this->table_->setAddressTable(addressTable);
this->pointer_ = newPointer;
// Write updated buffer
this->table_->writeBytes(newPointer.address(), fullBuffer);
});
}
template<typename T>
std::vector<T> BT_UniformArray<T>::fetchSublist(int32_t start, int32_t end) {
int32_t len = length();
if (len == 0) {
return {};
}
if (end == -1) {
end = len;
}
if (start < 0 || start >= len || end < start || end > len) {
throw std::out_of_range("Invalid range");
}
uint8_t elementTypeId = this->table_->readByte(this->pointer_.address() + 1 + 4);
BT_Type elementType = static_cast<BT_Type>(elementTypeId);
int32_t elementSize = 1 + getTypeSize(elementType);
if (getTypeSize(elementType) == -1) {
throw std::runtime_error("Variable size types not supported in uniform arrays");
}
std::vector<T> result;
for (int32_t i = start; i < end; i++) {
int64_t itemAddress = this->pointer_.address() + 1 + 4 + i * elementSize;
BT_Reference itemRef(this->table_, BT_Pointer(itemAddress));
result.push_back(itemRef.decodeValue<T>());
}
return result;
}
// Explicit template instantiations
template class BT_UniformArray<int32_t>;
template class BT_UniformArray<float>;
// BinaryTable implementation
BinaryTable::BinaryTable(const std::string& path)
: filePath_(path), freeListLifted_(false) {
file_.open(path, std::ios::binary | std::ios::in | std::ios::out);
if (!file_) {
// File doesn't exist, create it
file_.open(path, std::ios::binary | std::ios::out);
file_.close();
file_.open(path, std::ios::binary | std::ios::in | std::ios::out);
}
}
BinaryTable::~BinaryTable() {
if (file_.is_open()) {
file_.close();
}
}
void BinaryTable::initialize() {
file_.seekp(0);
writeInt64(0, BT_Null.address()); // Address table pointer (8 bytes)
writeInt32(8, 0); // Free list entry count (4 bytes)
file_.flush();
}
// File I/O helper implementations
int32_t BinaryTable::readInt32(int64_t position) {
file_.seekg(position);
uint8_t bytes[4];
file_.read(reinterpret_cast<char*>(bytes), 4);
return static_cast<int32_t>(bytes[0]) |
(static_cast<int32_t>(bytes[1]) << 8) |
(static_cast<int32_t>(bytes[2]) << 16) |
(static_cast<int32_t>(bytes[3]) << 24);
}
float BinaryTable::readFloat32(int64_t position) {
file_.seekg(position);
uint8_t bytes[4];
file_.read(reinterpret_cast<char*>(bytes), 4);
uint32_t floatBits = static_cast<uint32_t>(bytes[0]) |
(static_cast<uint32_t>(bytes[1]) << 8) |
(static_cast<uint32_t>(bytes[2]) << 16) |
(static_cast<uint32_t>(bytes[3]) << 24);
float result;
std::memcpy(&result, &floatBits, sizeof(float));
return result;
}
int64_t BinaryTable::readInt64(int64_t position) {
file_.seekg(position);
uint8_t bytes[8];
file_.read(reinterpret_cast<char*>(bytes), 8);
int64_t result = 0;
for (int i = 0; i < 8; i++) {
result |= static_cast<int64_t>(bytes[i]) << (i * 8);
}
return result;
}
uint8_t BinaryTable::readByte(int64_t position) {
file_.seekg(position);
uint8_t byte;
file_.read(reinterpret_cast<char*>(&byte), 1);
return byte;
}
std::vector<uint8_t> BinaryTable::readBytes(int64_t position, int32_t count) {
file_.seekg(position);
std::vector<uint8_t> bytes(count);
file_.read(reinterpret_cast<char*>(bytes.data()), count);
return bytes;
}
void BinaryTable::writeInt32(int64_t position, int32_t value) {
file_.seekp(position);
uint8_t bytes[4] = {
static_cast<uint8_t>(value & 0xFF),
static_cast<uint8_t>((value >> 8) & 0xFF),
static_cast<uint8_t>((value >> 16) & 0xFF),
static_cast<uint8_t>((value >> 24) & 0xFF)
};
file_.write(reinterpret_cast<const char*>(bytes), 4);
}
void BinaryTable::writeFloat32(int64_t position, float value) {
file_.seekp(position);
uint32_t floatBits;
std::memcpy(&floatBits, &value, sizeof(float));
uint8_t bytes[4] = {
static_cast<uint8_t>(floatBits & 0xFF),
static_cast<uint8_t>((floatBits >> 8) & 0xFF),
static_cast<uint8_t>((floatBits >> 16) & 0xFF),
static_cast<uint8_t>((floatBits >> 24) & 0xFF)
};
file_.write(reinterpret_cast<const char*>(bytes), 4);
}
void BinaryTable::writeInt64(int64_t position, int64_t value) {
file_.seekp(position);
uint8_t bytes[8];
for (int i = 0; i < 8; i++) {
bytes[i] = static_cast<uint8_t>((value >> (i * 8)) & 0xFF);
}
file_.write(reinterpret_cast<const char*>(bytes), 8);
}
void BinaryTable::writeByte(int64_t position, uint8_t value) {
file_.seekp(position);
file_.write(reinterpret_cast<const char*>(&value), 1);
}
void BinaryTable::writeBytes(int64_t position, const std::vector<uint8_t>& data) {
file_.seekp(position);
file_.write(reinterpret_cast<const char*>(data.data()), data.size());
}
int64_t BinaryTable::getFileLength() {
file_.seekg(0, std::ios::end);
return file_.tellg();
}
void BinaryTable::setFilePosition(int64_t position) {
file_.seekg(position);
file_.seekp(position);
}
// Address table management
std::unordered_map<int64_t, BT_Pointer> BinaryTable::getAddressTable() {
file_.seekg(0);
int64_t tableAddress = readInt64(0);
if (tableAddress == -1) { // Null pointer
return {};
}
try {
uint8_t typeId = readByte(tableAddress);
if (static_cast<BT_Type>(typeId) != BT_Type::ADDRESS_TABLE) {
// Address table might not be valid yet, return empty
return {};
}
int32_t tableCount = readInt32(tableAddress + 1);
std::unordered_map<int64_t, BT_Pointer> addressTable;
for (int32_t i = 0; i < tableCount; i++) {
int64_t offset = tableAddress + 1 + 4 + i * (8 + 8);
int64_t keyHash = readInt64(offset);
int64_t valueAddress = readInt64(offset + 8);
addressTable[keyHash] = BT_Pointer(valueAddress);
}
return addressTable;
} catch (...) {
// If we can't read the address table, return empty
return {};
}
}
void BinaryTable::setAddressTable(const std::unordered_map<int64_t, BT_Pointer>& table) {
// Build buffer manually (matching Dart implementation exactly)
std::vector<uint8_t> buffer;
// Type byte
buffer.push_back(static_cast<uint8_t>(BT_Type::ADDRESS_TABLE));
// Table count (little endian, 4 bytes)
int32_t count = static_cast<int32_t>(table.size());
for (int i = 0; i < 4; i++) {
buffer.push_back(static_cast<uint8_t>((count >> (i * 8)) & 0xFF));
}
// Table entries
for (const auto& [key, value] : table) {
// Key hash (little endian, 8 bytes)
for (int i = 0; i < 8; i++) {
buffer.push_back(static_cast<uint8_t>((key >> (i * 8)) & 0xFF));
}
// Value address (little endian, 8 bytes)
int64_t addr = value.address();
for (int i = 0; i < 8; i++) {
buffer.push_back(static_cast<uint8_t>((addr >> (i * 8)) & 0xFF));
}
}
// Write new address table at end of file
BT_Pointer tableAddress = alloc(static_cast<int32_t>(buffer.size()));
file_.seekp(tableAddress.address());
file_.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
// Read old table pointer before updating
file_.seekg(0);
int64_t oldTablePointerAddress = readInt64(0);
BT_Pointer oldTablePtr(oldTablePointerAddress);
// Update header to point to new table
file_.seekp(0);
writeInt64(0, tableAddress.address());
file_.flush();
// Now free the old table if it exists and is not the same as the new one
if (!oldTablePtr.isNull() && oldTablePtr != tableAddress) {
BT_Reference oldTableRef(this, oldTablePtr);
free(oldTablePtr, oldTableRef.size());
}
}
// Free list management
std::vector<BT_FreeListEntry> BinaryTable::getFreeList() {
if (freeListLifted_) {
return freeListCache_;
}
int64_t fileLength = getFileLength();
if (fileLength < 4) {
return {};
}
int32_t entryCount = readInt32(fileLength - 4);
if (entryCount == 0) {
return {};
}
int32_t entrySize = 8 + 4; // Pointer + Size
int32_t freeListSize = entryCount * entrySize;
int64_t freeListStart = fileLength - 4 - freeListSize;
std::vector<BT_FreeListEntry> freeList;
for (int32_t i = 0; i < entryCount; i++) {
int64_t offset = freeListStart + i * entrySize;
int64_t pointerAddress = readInt64(offset);
int32_t size = readInt32(offset + 8);
freeList.emplace_back(BT_Pointer(pointerAddress), size);
}
return freeList;
}
void BinaryTable::setFreeList(const std::vector<BT_FreeListEntry>& list) {
if (freeListLifted_) {
freeListCache_ = list;
return;
}
std::cout << "DEBUG: setFreeList called with " << list.size() << " entries" << std::endl;
// Read old entry count from last 4 bytes (matching Dart exactly)
int64_t fileLength = getFileLength();
std::cout << "DEBUG: File length: " << fileLength << std::endl;
file_.seekg(fileLength - 4);
int32_t oldEntryCount = readInt32(fileLength - 4);
int32_t oldListSize = (oldEntryCount * (8 + 4)) + 4; // Entries + Count
std::cout << "DEBUG: Old entry count: " << oldEntryCount << ", old list size: " << oldListSize << std::endl;
// Truncate file to remove old free list (Dart does _file.truncateSync)
int64_t newFileLength = fileLength - oldListSize;
std::cout << "DEBUG: New file length after truncation: " << newFileLength << std::endl;
// Skip actual truncation for now, just use logical position
// Encode new free list (matching Dart bt_encode exactly)
std::vector<uint8_t> buffer;
// Entries
for (const auto& entry : list) {
std::cout << "DEBUG: Encoding entry - address: " << entry.pointer.address() << ", size: " << entry.size << std::endl;
// Pointer (8 bytes, little endian)
int64_t addr = entry.pointer.address();
for (int i = 0; i < 8; i++) {
buffer.push_back(static_cast<uint8_t>((addr >> (i * 8)) & 0xFF));
}
// Size (4 bytes, little endian)
int32_t size = entry.size;
for (int i = 0; i < 4; i++) {
buffer.push_back(static_cast<uint8_t>((size >> (i * 8)) & 0xFF));
}
}
// Entry count (4 bytes, little endian)
int32_t count = static_cast<int32_t>(list.size());
for (int i = 0; i < 4; i++) {
buffer.push_back(static_cast<uint8_t>((count >> (i * 8)) & 0xFF));
}
std::cout << "DEBUG: Buffer size: " << buffer.size() << " bytes" << std::endl;
std::cout << "DEBUG: Writing free list at position: " << newFileLength << std::endl;
// Write at end of (truncated) file - seek to end of logical file, not physical file
file_.seekp(0, std::ios::end);
int64_t actualFileLength = file_.tellp();
std::cout << "DEBUG: Actual file length: " << actualFileLength << std::endl;
// Write at the calculated position (after logical truncation)
file_.seekp(newFileLength);
file_.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
file_.flush();
std::cout << "DEBUG: setFreeList completed" << std::endl;
}
void BinaryTable::truncateFile(int64_t newSize) {
// Actually truncate the file (matching Dart behavior)
file_.close();
std::filesystem::resize_file(filePath_, newSize);
file_.open(filePath_, std::ios::binary | std::ios::in | std::ios::out);
}
void BinaryTable::liftFreeList() {
if (freeListLifted_) {
throw std::runtime_error("Free list is already lifted");
}
freeListCache_ = getFreeList();
int64_t fileLength = getFileLength();
int32_t oldEntryCount = (fileLength >= 4) ? readInt32(fileLength - 4) : 0;
int32_t oldEntrySize = 8 + 4;
int32_t oldFreeListSize = oldEntryCount * oldEntrySize + 4;
// Truncate file to remove free list
truncateFile(fileLength - oldFreeListSize);
freeListLifted_ = true;
}
void BinaryTable::dropFreeList() {
if (!freeListLifted_) {
throw std::runtime_error("Free list is not lifted");
}
std::cout << "DEBUG: dropFreeList - seeking to end" << std::endl;
file_.seekp(0, std::ios::end);
std::cout << "DEBUG: dropFreeList - about to call setFreeList with " << freeListCache_.size() << " entries" << std::endl;
freeListLifted_ = false;
setFreeList(freeListCache_);
std::cout << "DEBUG: dropFreeList - setFreeList completed" << std::endl;
freeListCache_.clear();
}
void BinaryTable::antiFreeListScope(std::function<void()> fn) {
std::cout << "DEBUG: antiFreeListScope START" << std::endl;
liftFreeList();
std::cout << "DEBUG: After liftFreeList" << std::endl;
try {
fn();
std::cout << "DEBUG: After fn() execution" << std::endl;
} catch (...) {
std::cout << "DEBUG: Exception caught, dropping free list" << std::endl;
dropFreeList();
throw;
}
std::cout << "DEBUG: About to dropFreeList" << std::endl;
dropFreeList();
std::cout << "DEBUG: antiFreeListScope END" << std::endl;
}
// Memory management
void BinaryTable::free(BT_Pointer pointer, int32_t size) {
if (!freeListLifted_) {
throw std::runtime_error("Free list must be lifted before freeing memory");
}
if (pointer.isNull() || size <= 0) {
throw std::invalid_argument("Cannot free null pointer or zero size");
}
// Fetch current free list (matching Dart exactly)
std::vector<BT_FreeListEntry> freeList = freeListCache_;
// Add new free entry
freeList.emplace_back(pointer, size);
// Merge contiguous free entries (matching Dart logic exactly)
auto mergeContiguousFreeBlocks = [](std::vector<BT_FreeListEntry> freeList) -> std::vector<BT_FreeListEntry> {
if (freeList.empty()) return {};
// Create a copy and sort by address to check for contiguous blocks
std::vector<BT_FreeListEntry> sorted = freeList;
std::sort(sorted.begin(), sorted.end(),
[](const BT_FreeListEntry& a, const BT_FreeListEntry& b) {
return a.pointer.address() < b.pointer.address();
});
std::vector<BT_FreeListEntry> merged;
for (const auto& entry : sorted) {
if (merged.empty()) {
// First entry, just add it
merged.emplace_back(entry.pointer, entry.size);
} else {
auto& last = merged.back();
// Check if current entry is contiguous with the last merged entry
if (last.pointer.address() + last.size == entry.pointer.address()) {
// Merge: extend the size of the last entry
last.size += entry.size;
} else {
// Not contiguous, add as separate entry
merged.emplace_back(entry.pointer, entry.size);
}
}
}
return merged;
};
freeList = mergeContiguousFreeBlocks(freeList);
// Update free list
freeListCache_ = freeList;
}
BT_Pointer BinaryTable::alloc(int32_t size) {
if (!freeListLifted_) {
throw std::runtime_error("Free list must be lifted before allocation");
}
// Find suitable free block
auto it = std::find_if(freeListCache_.begin(), freeListCache_.end(),
[size](const BT_FreeListEntry& entry) {
return entry.size >= size;
});
if (it == freeListCache_.end()) {
// No suitable block, allocate at end of file
return BT_Pointer(getFileLength());
}
BT_Pointer result = it->pointer;
if (it->size == size) {
// Exact fit, remove block
freeListCache_.erase(it);
} else {
// Split block
it->pointer = BT_Pointer(it->pointer.address() + size);
it->size -= size;
}
return result;
}
// Data operations
BT_Reference BinaryTable::getReference(const std::string& key) {
auto addressTable = getAddressTable();
int64_t keyHash = hashString(key);
auto it = addressTable.find(keyHash);
if (it == addressTable.end()) {
throw std::runtime_error("Key does not exist");
}
return BT_Reference(this, it->second);
}
void BinaryTable::remove(const std::string& key) {
antiFreeListScope([&]() {
auto addressTable = getAddressTable();
int64_t keyHash = hashString(key);
auto it = addressTable.find(keyHash);
if (it == addressTable.end()) {
throw std::runtime_error("Key does not exist");
}
BT_Reference valueRef(this, it->second);
free(it->second, valueRef.size());
addressTable.erase(it);
setAddressTable(addressTable);
});
}
void BinaryTable::truncate() {
antiFreeListScope([&]() {
// Relocate address table
setAddressTable(getAddressTable());
// Check if last free block is at end of file
auto freeList = getFreeList();
if (freeList.empty()) {
return;
}
std::sort(freeList.begin(), freeList.end(),
[](const BT_FreeListEntry& a, const BT_FreeListEntry& b) {
return a.pointer.address() < b.pointer.address();
});
const auto& lastEntry = freeList.back();
int64_t fileEnd = getFileLength();
int64_t expectedEnd = lastEntry.pointer.address() + lastEntry.size;
if (expectedEnd == fileEnd) {
freeList.pop_back();
setFreeList(freeList);
// Truncate file
file_.close();
file_.open(filePath_, std::ios::binary | std::ios::in | std::ios::out);
}
});
}
// Debug methods
void BinaryTable::debugAddressTable(const std::string& context) {
std::cout << "\n=== DEBUG ADDRESS TABLE";
if (!context.empty()) {
std::cout << " (" << context << ")";
}
std::cout << " ===" << std::endl;
auto addressTable = getAddressTable();
std::cout << "Address table has " << addressTable.size() << " entries" << std::endl;
for (const auto& [hash, pointer] : addressTable) {
std::cout << " Hash " << hash << " -> Address " << pointer.address() << std::endl;
if (!pointer.isNull()) {
try {
uint8_t typeByte = readByte(pointer.address());
std::cout << " Type byte: " << (int)typeByte << std::endl;
if (typeByte == 2) { // INTEGER
int32_t value = readInt32(pointer.address() + 1);
std::cout << " Value: " << value << std::endl;
} else {
std::cout << " Raw bytes: ";
for (int i = 0; i < 8; i++) {
uint8_t byte = readByte(pointer.address() + i);
std::cout << std::hex << (int)byte << " ";
}
std::cout << std::dec << std::endl;
}
} catch (const std::exception& e) {
std::cout << " Error reading data: " << e.what() << std::endl;
}
}
}
std::cout << "=========================" << std::endl;
}
} // namespace bt