Files
SweepStore/cpp/binary_table.h

626 lines
22 KiB
C++

#define BINARY_TABLE_MAIN
#pragma once
#include <cstdint>
#include <vector>
#include <string>
#include <fstream>
#include <map>
#include <variant>
#include <stdexcept>
#include <cstring>
#include <algorithm>
#include <sstream>
#include <iomanip>
// --- BT_Type Enum ---
enum class BT_Type : int {
POINTER = 0,
ADDRESS_TABLE = 1,
INTEGER = 2,
FLOAT = 3,
STRING = 4,
INTEGER_ARRAY = 5,
FLOAT_ARRAY = 6
};
inline int BT_Type_size(BT_Type t) {
switch (t) {
case BT_Type::POINTER: return 8;
case BT_Type::ADDRESS_TABLE: return -1;
case BT_Type::INTEGER: return 4;
case BT_Type::FLOAT: return 4;
case BT_Type::STRING: return -1;
case BT_Type::INTEGER_ARRAY: return -1;
case BT_Type::FLOAT_ARRAY: return -1;
default: throw std::invalid_argument("Invalid BT_Type");
}
}
inline bool BT_Type_is_array(BT_Type t) {
return t == BT_Type::INTEGER_ARRAY || t == BT_Type::FLOAT_ARRAY;
}
inline BT_Type BT_Type_from_id(int id) {
if (id < 0 || id > 6) throw std::invalid_argument("Invalid BT_Type id");
return static_cast<BT_Type>(id);
}
// --- FNV-1a Hash ---
inline int64_t bt_hash(const std::string& str) {
uint64_t hash = 0xcbf29ce484222325ULL;
for (unsigned char c : str) {
hash ^= c;
hash *= 0x100000001b3ULL;
}
return static_cast<int64_t>(hash);
}
// --- BT_Pointer ---
struct BT_Pointer {
int64_t address;
BT_Pointer(int64_t addr = -1) : address(addr) {}
bool is_null() const { return address == -1; }
bool operator==(const BT_Pointer& other) const { return address == other.address; }
bool operator!=(const BT_Pointer& other) const { return !(*this == other); }
std::string to_string() const {
std::ostringstream oss;
oss << "0x" << std::hex << address << " (" << std::dec << address << ")";
return oss.str();
}
};
const BT_Pointer BT_Null(-1);
// --- BT_Value Type ---
using BT_Value = std::variant<int, double, std::string, std::vector<int>, std::vector<double>>;
// --- encodeValue ---
inline std::vector<uint8_t> encodeValue(const BT_Value& value) {
std::vector<uint8_t> buffer;
if (std::holds_alternative<int>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER));
int v = std::get<int>(value);
for (int i = 0; i < 4; ++i) buffer.push_back((v >> (i * 8)) & 0xFF);
} else if (std::holds_alternative<double>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT));
float v = static_cast<float>(std::get<double>(value));
uint8_t bytes[4];
std::memcpy(bytes, &v, 4);
buffer.insert(buffer.end(), bytes, bytes + 4);
} else if (std::holds_alternative<std::string>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::STRING));
const std::string& str = std::get<std::string>(value);
int len = static_cast<int>(str.size());
for (int i = 0; i < 4; ++i) buffer.push_back((len >> (i * 8)) & 0xFF);
buffer.insert(buffer.end(), str.begin(), str.end());
} else if (std::holds_alternative<std::vector<int>>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER_ARRAY));
const auto& arr = std::get<std::vector<int>>(value);
int len = static_cast<int>(arr.size());
for (int i = 0; i < 4; ++i) buffer.push_back((len >> (i * 8)) & 0xFF);
for (int v : arr) {
auto enc = encodeValue(v);
buffer.insert(buffer.end(), enc.begin(), enc.end());
}
} else if (std::holds_alternative<std::vector<double>>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT_ARRAY));
const auto& arr = std::get<std::vector<double>>(value);
int len = static_cast<int>(arr.size());
for (int i = 0; i < 4; ++i) buffer.push_back((len >> (i * 8)) & 0xFF);
for (double v : arr) {
auto enc = encodeValue(v);
buffer.insert(buffer.end(), enc.begin(), enc.end());
}
} else {
throw std::invalid_argument("Unsupported BT_Value type");
}
return buffer;
}
// --- BT_FreeListEntry ---
struct BT_FreeListEntry {
BT_Pointer pointer;
int size;
BT_FreeListEntry(BT_Pointer p, int s) : pointer(p), size(s) {}
};
// --- File I/O Helpers ---
class BT_File {
public:
std::fstream file;
BT_File(const std::string& path) {
file.open(path, std::ios::in | std::ios::out | std::ios::binary);
if (!file.is_open()) {
// Try to create the file if it doesn't exist
file.open(path, std::ios::out | std::ios::binary);
file.close();
file.open(path, std::ios::in | std::ios::out | std::ios::binary);
}
if (!file.is_open()) throw std::runtime_error("Failed to open file");
}
void setPosition(int64_t pos) {
file.seekp(pos);
file.seekg(pos);
}
int64_t length() {
auto cur = file.tellg();
file.seekg(0, std::ios::end);
int64_t len = file.tellg();
file.seekg(cur);
file.seekp(cur);
return len;
}
std::vector<uint8_t> read(int size) {
std::vector<uint8_t> buf(size);
file.read(reinterpret_cast<char*>(buf.data()), size);
return buf;
}
void write(const std::vector<uint8_t>& buf) {
file.write(reinterpret_cast<const char*>(buf.data()), buf.size());
}
int readInt(int size = 4) {
std::vector<uint8_t> buf = read(size);
int result = 0;
for (int i = size - 1; i >= 0; --i) {
result = (result << 8) | buf[i];
}
// Sign extend if MSB is set
int signBit = 1 << (size * 8 - 1);
if (result & signBit) {
result -= 1 << (size * 8);
}
return result;
}
void writeInt(int value, int size = 4) {
std::vector<uint8_t> buf(size);
for (int i = 0; i < size; ++i) {
buf[i] = (value >> (i * 8)) & 0xFF;
}
write(buf);
}
BT_Pointer readPointer() {
int64_t addr = 0;
std::vector<uint8_t> buf = read(8);
for (int i = 7; i >= 0; --i) {
addr = (addr << 8) | buf[i];
}
return BT_Pointer(addr);
}
void writePointer(const BT_Pointer& ptr) {
int64_t addr = ptr.address;
std::vector<uint8_t> buf(8);
for (int i = 0; i < 8; ++i) {
buf[i] = (addr >> (i * 8)) & 0xFF;
}
write(buf);
}
float readFloat32() {
std::vector<uint8_t> buf = read(4);
float val;
std::memcpy(&val, buf.data(), 4);
return val;
}
void writeFloat32(float value) {
uint8_t buf[4];
std::memcpy(buf, &value, 4);
write(std::vector<uint8_t>(buf, buf + 4));
}
double readFloat64() {
std::vector<uint8_t> buf = read(8);
double val;
std::memcpy(&val, buf.data(), 8);
return val;
}
void writeFloat64(double value) {
uint8_t buf[8];
std::memcpy(buf, &value, 8);
write(std::vector<uint8_t>(buf, buf + 8));
}
uint8_t readByte() {
char c;
file.read(&c, 1);
return static_cast<uint8_t>(c);
}
void writeByte(uint8_t b) {
char c = static_cast<char>(b);
file.write(&c, 1);
}
};
// --- BT_Reference ---
class BinaryTable; // Forward declaration
class BT_Reference {
public:
BinaryTable* _table;
BT_Pointer _pointer;
BT_Reference(BinaryTable* table, BT_Pointer pointer)
: _table(table), _pointer(pointer) {}
// decodeValue returns BT_Value for primitives, nullptr for arrays (handled separately)
virtual BT_Value decodeValue();
virtual int size();
std::string to_string() const { return _pointer.to_string(); }
};
// --- BT_UniformArray ---
class BT_UniformArray : public BT_Reference {
public:
using BT_Reference::BT_Reference;
int length();
BT_Value operator[](int index);
void set(int index, const BT_Value& value);
void add(const BT_Value& value);
void addAll(const std::vector<BT_Value>& values);
int size();
BT_Type elementType();
std::string to_string(bool readValues = false);
};
// --- binaryDump utility ---
inline std::string binaryDump(const std::vector<uint8_t>& data) {
std::ostringstream buffer;
for (size_t i = 0; i < data.size(); i += 16) {
// Address
buffer << "0x" << std::setw(4) << std::setfill('0') << std::hex << std::uppercase << i;
buffer << " (" << std::dec << std::setw(4) << i << ") | ";
// Hex bytes
for (size_t j = 0; j < 16; ++j) {
if (i + j < data.size()) {
buffer << std::setw(2) << std::setfill('0') << std::hex << std::uppercase << (int)data[i + j] << " ";
} else {
buffer << " ";
}
}
buffer << " | ";
// Integer representation
for (size_t j = 0; j < 16; ++j) {
if (i + j < data.size()) {
buffer << std::dec << std::setw(3) << (int)data[i + j] << " ";
} else {
buffer << " ";
}
}
buffer << " | ";
// ASCII representation
for (size_t j = 0; j < 16; ++j) {
if (i + j < data.size()) {
int byte = data[i + j];
if (byte >= 32 && byte <= 126) {
buffer << (char)byte;
} else {
buffer << '.';
}
}
}
buffer << " | ";
if (i + 16 < data.size()) buffer << std::endl;
}
return buffer.str();
}
// --- BT_Reference Implementation ---
#include <memory>
class BinaryTable {
public:
std::unique_ptr<BT_File> _file;
std::map<int64_t, BT_Pointer> _addressTable;
BinaryTable(const std::string& path) : _file(std::make_unique<BT_File>(path)) {}
// ...other members will be added later...
// Set a value for a key
void set(const std::string& key, const BT_Value& value) {
int64_t keyHash = bt_hash(key);
std::vector<uint8_t> valueBuffer = encodeValue(value);
// Append value to end of file
_file->setPosition(_file->length());
int64_t valueAddress = _file->length();
_file->write(valueBuffer);
_addressTable[keyHash] = BT_Pointer(valueAddress);
}
// Retrieve the pointer for a given key
BT_Pointer getPointer(const std::string& key) {
int64_t keyHash = bt_hash(key);
auto it = _addressTable.find(keyHash);
if (it == _addressTable.end()) {
throw std::runtime_error("Key not found in address table: " + key);
}
return it->second;
}
};
inline BT_Value BT_Reference::decodeValue() {
if (_pointer.is_null()) throw std::runtime_error("Null pointer");
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (type == BT_Type::INTEGER) {
return _table->_file->readInt(4);
} else if (type == BT_Type::FLOAT) {
return static_cast<double>(_table->_file->readFloat32());
} else if (type == BT_Type::STRING) {
int length = _table->_file->readInt(4);
std::vector<uint8_t> bytes = _table->_file->read(length);
return std::string(bytes.begin(), bytes.end());
} else if (type == BT_Type::INTEGER_ARRAY || type == BT_Type::FLOAT_ARRAY) {
throw std::runtime_error("decodeValue() called on array type; use BT_UniformArray instead");
} else {
throw std::runtime_error("Unsupported or unimplemented BT_Type in decodeValue");
}
}
inline int BT_Reference::size() {
if (_pointer.is_null()) return 0;
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (type == BT_Type::INTEGER || type == BT_Type::FLOAT) {
return 1 + 4;
} else if (type == BT_Type::STRING) {
int length = _table->_file->readInt(4);
return 1 + 4 + length;
} else if (type == BT_Type::ADDRESS_TABLE) {
int count = _table->_file->readInt(4);
return 1 + 4 + count * (8 + BT_Type_size(BT_Type::POINTER));
} else {
throw std::runtime_error("Unsupported BT_Type for size()");
}
}
// --- BT_UniformArray Implementation ---
inline int BT_UniformArray::length() {
if (_pointer.is_null()) return 0;
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (!BT_Type_is_array(type)) throw std::runtime_error("Not an array");
return _table->_file->readInt(4);
}
inline BT_Value BT_UniformArray::operator[](int index) {
if (_pointer.is_null()) throw std::runtime_error("Null pointer");
int len = length();
if (index < 0 || index >= len) throw std::out_of_range("Index out of range");
_table->_file->setPosition(_pointer.address + 1 + 4);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
int itemOffset = index * (1 + BT_Type_size(type));
BT_Reference itemRef(_table, BT_Pointer((_pointer.address + 1 + 4) + itemOffset));
return itemRef.decodeValue();
}
inline void BT_UniformArray::set(int index, const BT_Value& value) {
if (_pointer.is_null()) throw std::runtime_error("Null pointer");
int len = length();
if (index < 0 || index >= len) throw std::out_of_range("Index out of range");
_table->_file->setPosition(_pointer.address + 1 + 4);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (BT_Type_size(type) == -1) throw std::runtime_error("Variable size types not supported in uniform arrays");
// Type check omitted for brevity
int itemOffset = index * (1 + BT_Type_size(type));
BT_Pointer itemPointer((_pointer.address + 1 + 4) + itemOffset);
std::vector<uint8_t> valueBuffer = encodeValue(value);
_table->_file->setPosition(itemPointer.address);
_table->_file->write(valueBuffer);
}
inline void BT_UniformArray::add(const BT_Value& value) {
addAll(std::vector<BT_Value>{value});
}
inline void BT_UniformArray::addAll(const std::vector<BT_Value>& values) {
// Read current array type and length
int oldLen = length();
BT_Type type = elementType();
if (values.empty()) return;
// Validate all new values are of the correct type
for (size_t i = 0; i < values.size(); i++) {
BT_Type newValueType;
if (std::holds_alternative<int>(values[i])) newValueType = BT_Type::INTEGER;
else if (std::holds_alternative<double>(values[i])) newValueType = BT_Type::FLOAT;
else throw std::runtime_error("Type mismatch or unsupported type in addAll");
if (newValueType != type) {
throw std::runtime_error("Type mismatch in addAll: expected " + std::to_string((int)type) + ", got " + std::to_string((int)newValueType));
}
}
// Read the full array buffer
int elemSize = 1 + BT_Type_size(type);
int oldBufferSize = 1 + 4 + oldLen * elemSize;
_table->_file->setPosition(_pointer.address);
std::vector<uint8_t> fullBuffer = _table->_file->read(oldBufferSize);
// Encode new values and append
for (const auto& v : values) {
std::vector<uint8_t> enc = encodeValue(v);
fullBuffer.insert(fullBuffer.end(), enc.begin(), enc.end());
}
// Update length in buffer
int newLen = oldLen + (int)values.size();
for (int i = 0; i < 4; ++i) fullBuffer[1 + i] = (newLen >> (i * 8)) & 0xFF;
// Append new buffer to file (simulate alloc)
_table->_file->setPosition(_table->_file->length());
int64_t newAddress = _table->_file->length();
_table->_file->write(fullBuffer);
// Update address table (in-memory only)
for (auto& kv : _table->_addressTable) {
if (kv.second == _pointer) {
kv.second = BT_Pointer(newAddress);
}
}
_pointer = BT_Pointer(newAddress);
}
inline int BT_UniformArray::size() {
int len = length();
if (len == 0) return 1 + 4;
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (BT_Type_is_array(type)) {
return 1 + 4 + len * (1 + BT_Type_size(elementType()));
}
return BT_Reference::size();
}
inline BT_Type BT_UniformArray::elementType() {
if (length() == 0) return BT_Type::INTEGER; // Default/fallback
_table->_file->setPosition(_pointer.address + 1 + 4);
int typeId = _table->_file->readByte();
return BT_Type_from_id(typeId);
}
inline std::string BT_UniformArray::to_string(bool readValues) {
std::ostringstream oss;
int len = length();
if (!readValues) {
oss << "Uniform Array of length " << len;
return oss.str();
}
oss << "Uniform Array: [";
for (int i = 0; i < len; ++i) {
if (i > 0) oss << ", ";
BT_Value v = (*this)[i];
if (std::holds_alternative<int>(v)) oss << std::get<int>(v);
else if (std::holds_alternative<double>(v)) oss << std::get<double>(v);
else if (std::holds_alternative<std::string>(v)) oss << '"' << std::get<std::string>(v) << '"';
else oss << "?";
}
oss << "]";
return oss.str();
}
// --- Free List Encoding/Decoding ---
inline std::vector<uint8_t> encodeFreeList(const std::vector<BT_FreeListEntry>& freeList) {
std::vector<uint8_t> buffer;
for (const auto& entry : freeList) {
// Pointer (8 bytes, little-endian)
int64_t addr = entry.pointer.address;
for (int i = 0; i < 8; ++i) buffer.push_back((addr >> (i * 8)) & 0xFF);
// Size (4 bytes, little-endian)
int size = entry.size;
for (int i = 0; i < 4; ++i) buffer.push_back((size >> (i * 8)) & 0xFF);
}
// Entry count (4 bytes, little-endian)
int count = static_cast<int>(freeList.size());
for (int i = 0; i < 4; ++i) buffer.push_back((count >> (i * 8)) & 0xFF);
return buffer;
}
inline std::vector<BT_FreeListEntry> decodeFreeList(const std::vector<uint8_t>& buffer) {
std::vector<BT_FreeListEntry> freeList;
if (buffer.size() < 4) return freeList;
int count = 0;
for (int i = 0; i < 4; ++i) count |= (buffer[buffer.size() - 4 + i] << (i * 8));
if (count == 0) return freeList;
int entrySize = 8 + 4;
int freeListSize = count * entrySize;
if (buffer.size() < static_cast<size_t>(freeListSize + 4)) return freeList;
for (int i = 0; i < count; ++i) {
int offset = i * entrySize;
int64_t addr = 0;
for (int j = 0; j < 8; ++j) addr |= (static_cast<int64_t>(buffer[offset + j]) << (j * 8));
int size = 0;
for (int j = 0; j < 4; ++j) size |= (buffer[offset + 8 + j] << (j * 8));
freeList.emplace_back(BT_Pointer(addr), size);
}
return freeList;
}
// Helper to print BT_Value variant
inline std::string printBTValue(const BT_Value& v) {
if (std::holds_alternative<int>(v)) return std::to_string(std::get<int>(v));
if (std::holds_alternative<double>(v)) return std::to_string(std::get<double>(v));
if (std::holds_alternative<std::string>(v)) return '"' + std::get<std::string>(v) + '"';
if (std::holds_alternative<std::vector<int>>(v)) {
const auto& arr = std::get<std::vector<int>>(v);
std::ostringstream oss; oss << "[";
for (size_t i = 0; i < arr.size(); ++i) { if (i) oss << ", "; oss << arr[i]; }
oss << "]"; return oss.str();
}
if (std::holds_alternative<std::vector<double>>(v)) {
const auto& arr = std::get<std::vector<double>>(v);
std::ostringstream oss; oss << "[";
for (size_t i = 0; i < arr.size(); ++i) { if (i) oss << ", "; oss << arr[i]; }
oss << "]"; return oss.str();
}
return "<unknown>";
}
// --- Main function for testing ---
#ifdef BINARY_TABLE_MAIN
#include <iostream>
#include <fstream>
// Helper to get type from pointer
BT_Type get_type(BinaryTable& table, const BT_Pointer& ptr) {
if (ptr.is_null()) throw std::runtime_error("Null pointer");
table._file->setPosition(ptr.address);
int typeId = table._file->readByte();
return BT_Type_from_id(typeId);
}
int main() {
const std::string filename = "example.bin";
std::remove(filename.c_str());
std::ofstream(filename).close();
BinaryTable table(filename);
std::cout << "File dump:" << std::endl;
{
std::ifstream f(filename, std::ios::binary);
std::vector<uint8_t> data((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
std::cout << binaryDump(data) << std::endl;
std::cout << "File size: " << data.size() << " bytes\n" << std::endl;
}
table.set("int_array", std::vector<int>{6, 3, 9, 2, 5});
table.set("float_array", std::vector<double>{1.5, 2.5, 3.5});
table.set("empty", std::vector<int>{});
// Modify arrays
auto int_ptr = table.getPointer("int_array");
auto float_ptr = table.getPointer("float_array");
auto empty_ptr = table.getPointer("empty");
BT_Type int_type = get_type(table, int_ptr);
BT_Type float_type = get_type(table, float_ptr);
BT_Type empty_type = get_type(table, empty_ptr);
if (BT_Type_is_array(int_type)) {
BT_UniformArray intArr(&table, int_ptr);
intArr.set(0, 1);
intArr.add(10);
intArr.addAll({420, 69, 1337, 1738});
std::cout << "int_array pointer: " << intArr._pointer.to_string() << std::endl;
std::cout << "Readback1: " << intArr.to_string(true) << std::endl;
} else {
std::cout << "int_array is not a BT_UniformArray!\n";
}
if (BT_Type_is_array(float_type)) {
BT_UniformArray floatArr(&table, float_ptr);
floatArr.set(1, 4.5);
floatArr.add(5.5);
floatArr.addAll({6.5, 7.5, 8.5});
std::cout << "float_array pointer: " << floatArr._pointer.to_string() << std::endl;
std::cout << "Readback2: " << floatArr.to_string(true) << std::endl;
} else {
std::cout << "float_array is not a BT_UniformArray!\n";
}
if (BT_Type_is_array(empty_type)) {
BT_UniformArray emptyArr(&table, empty_ptr);
std::cout << "Readback3: " << emptyArr.to_string(true) << std::endl;
} else {
std::cout << "empty is not a BT_UniformArray!\n";
}
std::cout << "\nFile dump:" << std::endl;
{
std::ifstream f(filename, std::ios::binary);
std::vector<uint8_t> data((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
std::cout << binaryDump(data) << std::endl;
std::cout << "File size: " << data.size() << " bytes" << std::endl;
}
return 0;
}
#endif