Files
SweepStore/cpp/binary_table.h

504 lines
18 KiB
C++

#pragma once
#include <cstdint>
#include <vector>
#include <string>
#include <fstream>
#include <map>
#include <variant>
#include <stdexcept>
#include <cstring>
#include <algorithm>
#include <sstream>
#include <iomanip>
// --- BT_Type Enum ---
enum class BT_Type : int {
POINTER = 0,
ADDRESS_TABLE = 1,
INTEGER = 2,
FLOAT = 3,
STRING = 4,
INTEGER_ARRAY = 5,
FLOAT_ARRAY = 6
};
inline int BT_Type_size(BT_Type t) {
switch (t) {
case BT_Type::POINTER: return 8;
case BT_Type::ADDRESS_TABLE: return -1;
case BT_Type::INTEGER: return 4;
case BT_Type::FLOAT: return 4;
case BT_Type::STRING: return -1;
case BT_Type::INTEGER_ARRAY: return -1;
case BT_Type::FLOAT_ARRAY: return -1;
default: throw std::invalid_argument("Invalid BT_Type");
}
}
inline bool BT_Type_is_array(BT_Type t) {
return t == BT_Type::INTEGER_ARRAY || t == BT_Type::FLOAT_ARRAY;
}
inline BT_Type BT_Type_from_id(int id) {
if (id < 0 || id > 6) throw std::invalid_argument("Invalid BT_Type id");
return static_cast<BT_Type>(id);
}
// --- FNV-1a Hash ---
inline int64_t bt_hash(const std::string& str) {
uint64_t hash = 0xcbf29ce484222325ULL;
for (unsigned char c : str) {
hash ^= c;
hash *= 0x100000001b3ULL;
}
return static_cast<int64_t>(hash);
}
// --- BT_Pointer ---
struct BT_Pointer {
int64_t address;
BT_Pointer(int64_t addr = -1) : address(addr) {}
bool is_null() const { return address == -1; }
bool operator==(const BT_Pointer& other) const { return address == other.address; }
bool operator!=(const BT_Pointer& other) const { return !(*this == other); }
std::string to_string() const {
std::ostringstream oss;
oss << "0x" << std::hex << address << " (" << std::dec << address << ")";
return oss.str();
}
};
const BT_Pointer BT_Null(-1);
// --- BT_Value Type ---
using BT_Value = std::variant<int, double, std::string, std::vector<int>, std::vector<double>>;
// --- encodeValue ---
inline std::vector<uint8_t> encodeValue(const BT_Value& value) {
std::vector<uint8_t> buffer;
if (std::holds_alternative<int>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER));
int v = std::get<int>(value);
for (int i = 0; i < 4; ++i) buffer.push_back((v >> (i * 8)) & 0xFF);
} else if (std::holds_alternative<double>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT));
double v = std::get<double>(value);
uint32_t asInt;
std::memcpy(&asInt, &v, 4); // Only use 4 bytes (float32)
for (int i = 0; i < 4; ++i) buffer.push_back((asInt >> (i * 8)) & 0xFF);
} else if (std::holds_alternative<std::string>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::STRING));
const std::string& str = std::get<std::string>(value);
int len = static_cast<int>(str.size());
for (int i = 0; i < 4; ++i) buffer.push_back((len >> (i * 8)) & 0xFF);
buffer.insert(buffer.end(), str.begin(), str.end());
} else if (std::holds_alternative<std::vector<int>>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::INTEGER_ARRAY));
const auto& arr = std::get<std::vector<int>>(value);
int len = static_cast<int>(arr.size());
for (int i = 0; i < 4; ++i) buffer.push_back((len >> (i * 8)) & 0xFF);
for (int v : arr) {
auto enc = encodeValue(v);
buffer.insert(buffer.end(), enc.begin(), enc.end());
}
} else if (std::holds_alternative<std::vector<double>>(value)) {
buffer.push_back(static_cast<uint8_t>(BT_Type::FLOAT_ARRAY));
const auto& arr = std::get<std::vector<double>>(value);
int len = static_cast<int>(arr.size());
for (int i = 0; i < 4; ++i) buffer.push_back((len >> (i * 8)) & 0xFF);
for (double v : arr) {
auto enc = encodeValue(v);
buffer.insert(buffer.end(), enc.begin(), enc.end());
}
} else {
throw std::invalid_argument("Unsupported BT_Value type");
}
return buffer;
}
// --- BT_FreeListEntry ---
struct BT_FreeListEntry {
BT_Pointer pointer;
int size;
BT_FreeListEntry(BT_Pointer p, int s) : pointer(p), size(s) {}
};
// --- File I/O Helpers ---
class BT_File {
public:
std::fstream file;
BT_File(const std::string& path) {
file.open(path, std::ios::in | std::ios::out | std::ios::binary);
if (!file.is_open()) {
// Try to create the file if it doesn't exist
file.open(path, std::ios::out | std::ios::binary);
file.close();
file.open(path, std::ios::in | std::ios::out | std::ios::binary);
}
if (!file.is_open()) throw std::runtime_error("Failed to open file");
}
void setPosition(int64_t pos) {
file.seekp(pos);
file.seekg(pos);
}
int64_t length() {
auto cur = file.tellg();
file.seekg(0, std::ios::end);
int64_t len = file.tellg();
file.seekg(cur);
file.seekp(cur);
return len;
}
std::vector<uint8_t> read(int size) {
std::vector<uint8_t> buf(size);
file.read(reinterpret_cast<char*>(buf.data()), size);
return buf;
}
void write(const std::vector<uint8_t>& buf) {
file.write(reinterpret_cast<const char*>(buf.data()), buf.size());
}
int readInt(int size = 4) {
std::vector<uint8_t> buf = read(size);
int result = 0;
for (int i = size - 1; i >= 0; --i) {
result = (result << 8) | buf[i];
}
// Sign extend if MSB is set
int signBit = 1 << (size * 8 - 1);
if (result & signBit) {
result -= 1 << (size * 8);
}
return result;
}
void writeInt(int value, int size = 4) {
std::vector<uint8_t> buf(size);
for (int i = 0; i < size; ++i) {
buf[i] = (value >> (i * 8)) & 0xFF;
}
write(buf);
}
BT_Pointer readPointer() {
int64_t addr = 0;
std::vector<uint8_t> buf = read(8);
for (int i = 7; i >= 0; --i) {
addr = (addr << 8) | buf[i];
}
return BT_Pointer(addr);
}
void writePointer(const BT_Pointer& ptr) {
int64_t addr = ptr.address;
std::vector<uint8_t> buf(8);
for (int i = 0; i < 8; ++i) {
buf[i] = (addr >> (i * 8)) & 0xFF;
}
write(buf);
}
float readFloat32() {
std::vector<uint8_t> buf = read(4);
float val;
std::memcpy(&val, buf.data(), 4);
return val;
}
void writeFloat32(float value) {
uint8_t buf[4];
std::memcpy(buf, &value, 4);
write(std::vector<uint8_t>(buf, buf + 4));
}
double readFloat64() {
std::vector<uint8_t> buf = read(8);
double val;
std::memcpy(&val, buf.data(), 8);
return val;
}
void writeFloat64(double value) {
uint8_t buf[8];
std::memcpy(buf, &value, 8);
write(std::vector<uint8_t>(buf, buf + 8));
}
uint8_t readByte() {
char c;
file.read(&c, 1);
return static_cast<uint8_t>(c);
}
void writeByte(uint8_t b) {
char c = static_cast<char>(b);
file.write(&c, 1);
}
};
// --- BT_Reference ---
class BinaryTable; // Forward declaration
class BT_Reference {
public:
BinaryTable* _table;
BT_Pointer _pointer;
BT_Reference(BinaryTable* table, BT_Pointer pointer)
: _table(table), _pointer(pointer) {}
BT_Value decodeValue();
int size();
std::string to_string() const { return _pointer.to_string(); }
};
// --- BT_UniformArray ---
class BT_UniformArray : public BT_Reference {
public:
using BT_Reference::BT_Reference;
int length();
BT_Value operator[](int index);
void set(int index, const BT_Value& value);
void add(const BT_Value& value);
void addAll(const std::vector<BT_Value>& values);
int size() override;
BT_Type elementType();
std::string to_string(bool readValues = false);
};
// --- BT_Reference Implementation ---
#include <memory>
class BinaryTable {
public:
std::unique_ptr<BT_File> _file;
BinaryTable(const std::string& path) : _file(std::make_unique<BT_File>(path)) {}
// ...other members will be added later...
};
inline BT_Value BT_Reference::decodeValue() {
if (_pointer.is_null()) return {};
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (type == BT_Type::INTEGER) {
return _table->_file->readInt(4);
} else if (type == BT_Type::FLOAT) {
return static_cast<double>(_table->_file->readFloat32());
} else if (type == BT_Type::STRING) {
int length = _table->_file->readInt(4);
std::vector<uint8_t> bytes = _table->_file->read(length);
return std::string(bytes.begin(), bytes.end());
} else if (type == BT_Type::INTEGER_ARRAY || type == BT_Type::FLOAT_ARRAY) {
// Return a BT_UniformArray wrapper
return BT_UniformArray(_table, _pointer);
} else {
throw std::runtime_error("Unsupported or unimplemented BT_Type in decodeValue");
}
}
inline int BT_Reference::size() {
if (_pointer.is_null()) return 0;
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (type == BT_Type::INTEGER || type == BT_Type::FLOAT) {
return 1 + 4;
} else if (type == BT_Type::STRING) {
int length = _table->_file->readInt(4);
return 1 + 4 + length;
} else if (type == BT_Type::ADDRESS_TABLE) {
int count = _table->_file->readInt(4);
return 1 + 4 + count * (8 + BT_Type_size(BT_Type::POINTER));
} else {
throw std::runtime_error("Unsupported BT_Type for size()");
}
}
// --- BT_UniformArray Implementation ---
inline int BT_UniformArray::length() {
if (_pointer.is_null()) return 0;
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (!BT_Type_is_array(type)) throw std::runtime_error("Not an array");
return _table->_file->readInt(4);
}
inline BT_Value BT_UniformArray::operator[](int index) {
if (_pointer.is_null()) throw std::runtime_error("Null pointer");
int len = length();
if (index < 0 || index >= len) throw std::out_of_range("Index out of range");
_table->_file->setPosition(_pointer.address + 1 + 4);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
int itemOffset = index * (1 + BT_Type_size(type));
BT_Reference itemRef(_table, BT_Pointer((_pointer.address + 1 + 4) + itemOffset));
return itemRef.decodeValue();
}
inline void BT_UniformArray::set(int index, const BT_Value& value) {
if (_pointer.is_null()) throw std::runtime_error("Null pointer");
int len = length();
if (index < 0 || index >= len) throw std::out_of_range("Index out of range");
_table->_file->setPosition(_pointer.address + 1 + 4);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (BT_Type_size(type) == -1) throw std::runtime_error("Variable size types not supported in uniform arrays");
// Type check omitted for brevity
int itemOffset = index * (1 + BT_Type_size(type));
BT_Pointer itemPointer((_pointer.address + 1 + 4) + itemOffset);
std::vector<uint8_t> valueBuffer = encodeValue(value);
_table->_file->setPosition(itemPointer.address);
_table->_file->write(valueBuffer);
}
inline int BT_UniformArray::size() {
int len = length();
if (len == 0) return 1 + 4;
_table->_file->setPosition(_pointer.address);
int typeId = _table->_file->readByte();
BT_Type type = BT_Type_from_id(typeId);
if (BT_Type_is_array(type)) {
return 1 + 4 + len * (1 + BT_Type_size(elementType()));
}
return BT_Reference::size();
}
inline BT_Type BT_UniformArray::elementType() {
if (length() == 0) return BT_Type::INTEGER; // Default/fallback
_table->_file->setPosition(_pointer.address + 1 + 4);
int typeId = _table->_file->readByte();
return BT_Type_from_id(typeId);
}
inline std::string BT_UniformArray::to_string(bool readValues) {
std::ostringstream oss;
int len = length();
if (!readValues) {
oss << "Uniform Array of length " << len;
return oss.str();
}
oss << "Uniform Array: [";
for (int i = 0; i < len; ++i) {
if (i > 0) oss << ", ";
BT_Value v = (*this)[i];
if (std::holds_alternative<int>(v)) oss << std::get<int>(v);
else if (std::holds_alternative<double>(v)) oss << std::get<double>(v);
else if (std::holds_alternative<std::string>(v)) oss << '"' << std::get<std::string>(v) << '"';
else oss << "?";
}
oss << "]";
return oss.str();
}
// --- Free List Encoding/Decoding ---
inline std::vector<uint8_t> encodeFreeList(const std::vector<BT_FreeListEntry>& freeList) {
std::vector<uint8_t> buffer;
for (const auto& entry : freeList) {
// Pointer (8 bytes, little-endian)
int64_t addr = entry.pointer.address;
for (int i = 0; i < 8; ++i) buffer.push_back((addr >> (i * 8)) & 0xFF);
// Size (4 bytes, little-endian)
int size = entry.size;
for (int i = 0; i < 4; ++i) buffer.push_back((size >> (i * 8)) & 0xFF);
}
// Entry count (4 bytes, little-endian)
int count = static_cast<int>(freeList.size());
for (int i = 0; i < 4; ++i) buffer.push_back((count >> (i * 8)) & 0xFF);
return buffer;
}
inline std::vector<BT_FreeListEntry> decodeFreeList(const std::vector<uint8_t>& buffer) {
std::vector<BT_FreeListEntry> freeList;
if (buffer.size() < 4) return freeList;
int count = 0;
for (int i = 0; i < 4; ++i) count |= (buffer[buffer.size() - 4 + i] << (i * 8));
if (count == 0) return freeList;
int entrySize = 8 + 4;
int freeListSize = count * entrySize;
if (buffer.size() < static_cast<size_t>(freeListSize + 4)) return freeList;
for (int i = 0; i < count; ++i) {
int offset = i * entrySize;
int64_t addr = 0;
for (int j = 0; j < 8; ++j) addr |= (static_cast<int64_t>(buffer[offset + j]) << (j * 8));
int size = 0;
for (int j = 0; j < 4; ++j) size |= (buffer[offset + 8 + j] << (j * 8));
freeList.emplace_back(BT_Pointer(addr), size);
}
return freeList;
}
// Helper to print BT_Value variant
inline std::string printBTValue(const BT_Value& v) {
if (std::holds_alternative<int>(v)) return std::to_string(std::get<int>(v));
if (std::holds_alternative<double>(v)) return std::to_string(std::get<double>(v));
if (std::holds_alternative<std::string>(v)) return '"' + std::get<std::string>(v) + '"';
if (std::holds_alternative<std::vector<int>>(v)) {
const auto& arr = std::get<std::vector<int>>(v);
std::ostringstream oss; oss << "[";
for (size_t i = 0; i < arr.size(); ++i) { if (i) oss << ", "; oss << arr[i]; }
oss << "]"; return oss.str();
}
if (std::holds_alternative<std::vector<double>>(v)) {
const auto& arr = std::get<std::vector<double>>(v);
std::ostringstream oss; oss << "[";
for (size_t i = 0; i < arr.size(); ++i) { if (i) oss << ", "; oss << arr[i]; }
oss << "]"; return oss.str();
}
if (std::holds_alternative<BT_UniformArray>(v)) return std::get<BT_UniformArray>(v).to_string(true);
return "<unknown>";
}
// --- Main function for testing ---
#ifdef BINARY_TABLE_MAIN
#include <iostream>
#include <fstream>
int main() {
const std::string filename = "example.bin";
std::remove(filename.c_str());
std::ofstream(filename).close();
BinaryTable table(filename);
std::cout << "File dump:" << std::endl;
{
std::ifstream f(filename, std::ios::binary);
std::vector<uint8_t> data((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
std::cout << binaryDump(data) << std::endl;
std::cout << "File size: " << data.size() << " bytes\n" << std::endl;
}
table.set("int_array", std::vector<int>{6, 3, 9, 2, 5});
table.set("float_array", std::vector<double>{1.5, 2.5, 3.5});
table.set("empty", std::vector<int>{});
// Modify arrays
auto v1 = table.get("int_array");
auto v2 = table.get("float_array");
if (std::holds_alternative<BT_UniformArray>(v1)) {
BT_UniformArray intArr = std::get<BT_UniformArray>(v1);
intArr.set(0, 1);
intArr.add(10);
intArr.addAll({420, 69, 1337, 1738});
std::cout << "int_array pointer: " << intArr._pointer.to_string() << std::endl;
std::cout << "Readback1: " << intArr.to_string(true) << std::endl;
} else {
std::cout << "int_array is not a BT_UniformArray!\n";
}
if (std::holds_alternative<BT_UniformArray>(v2)) {
BT_UniformArray floatArr = std::get<BT_UniformArray>(v2);
floatArr.set(1, 4.5);
floatArr.add(5.5);
floatArr.addAll({6.5, 7.5, 8.5});
std::cout << "float_array pointer: " << floatArr._pointer.to_string() << std::endl;
std::cout << "Readback2: " << floatArr.to_string(true) << std::endl;
} else {
std::cout << "float_array is not a BT_UniformArray!\n";
}
auto v3 = table.get("empty");
if (std::holds_alternative<BT_UniformArray>(v3)) {
BT_UniformArray emptyArr = std::get<BT_UniformArray>(v3);
std::cout << "Readback3: " << emptyArr.to_string(true) << std::endl;
} else {
std::cout << "empty is not a BT_UniformArray!\n";
}
std::cout << "\nFile dump:" << std::endl;
{
std::ifstream f(filename, std::ios::binary);
std::vector<uint8_t> data((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
std::cout << binaryDump(data) << std::endl;
std::cout << "File size: " << data.size() << " bytes" << std::endl;
}
return 0;
}
#endif