Fixed windows deadlocks, performance is shit tho
This commit is contained in:
@@ -4,6 +4,9 @@
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <stdexcept>
|
||||
#include <map>
|
||||
|
||||
#include "sweepstore/utils/timing.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
@@ -21,6 +24,22 @@ public:
|
||||
enum class Mode { Shared, Exclusive };
|
||||
|
||||
private:
|
||||
// Key: file path + offset, Value: Mode
|
||||
struct LockKey {
|
||||
std::string path;
|
||||
uint64_t offset;
|
||||
uint64_t length;
|
||||
|
||||
bool operator<(const LockKey& other) const {
|
||||
if (path != other.path) return path < other.path;
|
||||
if (offset != other.offset) return offset < other.offset;
|
||||
return length < other.length;
|
||||
}
|
||||
};
|
||||
|
||||
// Track active locks per thread to prevent self-deadlock
|
||||
static thread_local std::map<LockKey, Mode> activeLocks;
|
||||
|
||||
std::string filePath;
|
||||
uint64_t offset;
|
||||
uint64_t length;
|
||||
@@ -56,6 +75,22 @@ private:
|
||||
}
|
||||
|
||||
void acquire() {
|
||||
LockKey key{filePath, offset, length};
|
||||
|
||||
// Check if we already hold a lock on this region
|
||||
auto it = activeLocks.find(key);
|
||||
if (it != activeLocks.end()) {
|
||||
// If we're trying to upgrade from shared to exclusive, release first
|
||||
if (it->second == Mode::Shared && mode == Mode::Exclusive) {
|
||||
releaseInternal(); // Release the old shared lock
|
||||
activeLocks.erase(it);
|
||||
} else {
|
||||
// Already hold compatible or same lock
|
||||
locked = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
HANDLE handle = getOrOpenHandle(filePath);
|
||||
OVERLAPPED overlapped = {}; // Proper zero-initialization
|
||||
overlapped.Offset = static_cast<DWORD>(offset & 0xFFFFFFFF);
|
||||
@@ -69,9 +104,10 @@ private:
|
||||
throw std::runtime_error("Failed to acquire file lock");
|
||||
}
|
||||
locked = true;
|
||||
activeLocks[key] = mode;
|
||||
}
|
||||
|
||||
void release() {
|
||||
void releaseInternal() {
|
||||
if (locked) {
|
||||
HANDLE handle = getOrOpenHandle(filePath);
|
||||
OVERLAPPED overlapped = {};
|
||||
@@ -85,6 +121,14 @@ private:
|
||||
locked = false;
|
||||
}
|
||||
}
|
||||
|
||||
void release() {
|
||||
if (locked) {
|
||||
LockKey key{filePath, offset, length};
|
||||
releaseInternal();
|
||||
activeLocks.erase(key);
|
||||
}
|
||||
}
|
||||
#else
|
||||
// Thread-local FD cache - each thread has its own FD per file
|
||||
static thread_local std::unordered_map<std::string, int> fdCache;
|
||||
@@ -105,6 +149,22 @@ private:
|
||||
}
|
||||
|
||||
void acquire() {
|
||||
LockKey key{filePath, offset, length};
|
||||
|
||||
// Check if we already hold a lock on this region
|
||||
auto it = activeLocks.find(key);
|
||||
if (it != activeLocks.end()) {
|
||||
// If we're trying to upgrade from shared to exclusive, release first
|
||||
if (it->second == Mode::Shared && mode == Mode::Exclusive) {
|
||||
releaseInternal(); // Release the old shared lock
|
||||
activeLocks.erase(it);
|
||||
} else {
|
||||
// Already hold compatible or same lock
|
||||
locked = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int fd = getOrOpenFD(filePath);
|
||||
|
||||
struct flock lock_info;
|
||||
@@ -118,9 +178,10 @@ private:
|
||||
throw std::runtime_error("Failed to acquire file lock");
|
||||
}
|
||||
locked = true;
|
||||
activeLocks[key] = mode;
|
||||
}
|
||||
|
||||
void release() {
|
||||
void releaseInternal() {
|
||||
if (locked) {
|
||||
int fd = getOrOpenFD(filePath);
|
||||
|
||||
@@ -135,6 +196,14 @@ private:
|
||||
locked = false;
|
||||
}
|
||||
}
|
||||
|
||||
void release() {
|
||||
if (locked) {
|
||||
LockKey key{filePath, offset, length};
|
||||
releaseInternal();
|
||||
activeLocks.erase(key);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
@@ -145,10 +214,12 @@ public:
|
||||
~SweepstoreFileLock() { release(); }
|
||||
|
||||
void lock() {
|
||||
SWEEPSTORE_TIME_FUNCTION();
|
||||
if (!locked) acquire();
|
||||
}
|
||||
|
||||
void unlock() {
|
||||
SWEEPSTORE_TIME_FUNCTION();
|
||||
release();
|
||||
}
|
||||
|
||||
|
||||
@@ -341,6 +341,13 @@ inline void preciseSleep(std::chrono::nanoseconds duration) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
#ifdef _WIN32
|
||||
// Set timer resolution to 1ms once per process
|
||||
[[maybe_unused]] static bool timerResolutionSet = []() {
|
||||
timeBeginPeriod(1);
|
||||
std::atexit([]() { timeEndPeriod(1); });
|
||||
return true;
|
||||
}();
|
||||
|
||||
const auto windowsMinSleepTime = std::chrono::milliseconds(1);
|
||||
|
||||
if (duration < windowsMinSleepTime) {
|
||||
|
||||
213
cpp/src/Public/sweepstore/utils/timing.h
Normal file
213
cpp/src/Public/sweepstore/utils/timing.h
Normal file
@@ -0,0 +1,213 @@
|
||||
#ifndef SWEEPSTORE_TIMING_H
|
||||
#define SWEEPSTORE_TIMING_H
|
||||
|
||||
/**
|
||||
* @file timing.h
|
||||
* @brief Hierarchical scope timing system for SweepStore
|
||||
*
|
||||
* Provides microsecond-precision performance profiling for arbitrary scopes
|
||||
* (functions, loops, code blocks) with automatic parent/child relationship
|
||||
* tracking. Outputs console statistics, CSV data, and Chrome Tracing JSON.
|
||||
*
|
||||
* Usage:
|
||||
* #include "sweepstore/utils/timing.h"
|
||||
*
|
||||
* void myFunction() {
|
||||
* SWEEPSTORE_TIME_FUNCTION(); // Times entire function
|
||||
*
|
||||
* {
|
||||
* SWEEPSTORE_TIME_SCOPE("init"); // Times specific block
|
||||
* // initialization code...
|
||||
* }
|
||||
*
|
||||
* for (int i = 0; i < n; i++) {
|
||||
* SWEEPSTORE_TIME_SCOPE("loop_iteration"); // Times each iteration
|
||||
* // loop body...
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* Build with timing enabled:
|
||||
* cmake -DENABLE_TIMING=ON -B build
|
||||
* cmake --build build
|
||||
*
|
||||
* View results:
|
||||
* 1. Console statistics (automatic at program exit)
|
||||
* 2. sweepstore_timing.csv (flat aggregated data)
|
||||
* 3. sweepstore_trace.json (Chrome Tracing format: chrome://tracing)
|
||||
*
|
||||
* Performance:
|
||||
* - Enabled: ~150-300ns overhead per instrumented scope
|
||||
* - Disabled: Zero overhead (macros compile to nothing)
|
||||
*
|
||||
* Thread Safety:
|
||||
* - Completely lock-free during hot paths
|
||||
* - Thread-local storage eliminates contention
|
||||
* - Each thread maintains independent timing tree
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
/**
|
||||
* @brief Aggregated timing statistics for a scope
|
||||
*
|
||||
* Stores cumulative statistics across all invocations of a scope.
|
||||
*/
|
||||
struct ScopeTimingStats {
|
||||
uint64_t callCount = 0; ///< Total number of scope invocations
|
||||
uint64_t totalMicros = 0; ///< Cumulative execution time in microseconds
|
||||
uint64_t minMicros = UINT64_MAX; ///< Fastest single execution in microseconds
|
||||
uint64_t maxMicros = 0; ///< Slowest single execution in microseconds
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Single timing event for Chrome Tracing format
|
||||
*
|
||||
* Represents one execution of a scope. Chrome Tracing viewer automatically
|
||||
* builds hierarchical trees from timestamp overlaps.
|
||||
*/
|
||||
struct TimingTraceEvent {
|
||||
std::string name; ///< Scope name (function name or custom label)
|
||||
uint64_t startMicros; ///< Absolute timestamp in microseconds since epoch
|
||||
uint64_t durationMicros; ///< Duration in microseconds
|
||||
uint64_t threadId; ///< Thread ID (unique per thread)
|
||||
uint64_t parentEventId; ///< Parent event ID (0 if root scope)
|
||||
uint64_t eventId; ///< Unique ID for this event
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Internal timing functions (do not call directly)
|
||||
*
|
||||
* These functions are called automatically by the RAII timer class.
|
||||
* Use SWEEPSTORE_TIME_FUNCTION() or SWEEPSTORE_TIME_SCOPE() instead.
|
||||
*/
|
||||
namespace SweepstoreTiming {
|
||||
void recordScopeStart(const char* name, uint64_t eventId);
|
||||
void recordScopeEnd(const char* name, uint64_t eventId, uint64_t startMicros, uint64_t durationMicros, uint64_t threadId);
|
||||
uint64_t getCurrentEventId();
|
||||
uint64_t getNextEventId();
|
||||
uint64_t getParentEventId();
|
||||
uint64_t getThreadId(); // Get unique sequential thread ID
|
||||
void initOutputFile(); // Call once at program start
|
||||
void flushThreadData(); // Call before thread exits to save timing data
|
||||
void finalizeOutputFile(); // Call once at program end
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief RAII timer for automatic scope timing
|
||||
*
|
||||
* This class implements RAII (Resource Acquisition Is Initialization) pattern
|
||||
* for timing. Timer starts in constructor, stops in destructor. Automatically
|
||||
* tracks parent/child relationships via thread-local scope stack.
|
||||
*
|
||||
* DO NOT instantiate directly - use the macros instead:
|
||||
* - SWEEPSTORE_TIME_FUNCTION() for functions
|
||||
* - SWEEPSTORE_TIME_SCOPE(name) for arbitrary scopes
|
||||
*
|
||||
* Thread Safety:
|
||||
* - Thread-local storage ensures zero contention
|
||||
* - Each thread maintains independent scope stack
|
||||
* - Safe for concurrent use across multiple threads
|
||||
*
|
||||
* Performance:
|
||||
* - Constructor: ~5ns
|
||||
* - Destructor: ~150-300ns (includes stats update and trace event recording)
|
||||
*/
|
||||
class SweepstoreScopeTimer {
|
||||
private:
|
||||
const char* scopeName;
|
||||
std::chrono::high_resolution_clock::time_point startTime;
|
||||
uint64_t myEventId;
|
||||
uint64_t parentEventId;
|
||||
|
||||
public:
|
||||
explicit SweepstoreScopeTimer(const char* name)
|
||||
: scopeName(name)
|
||||
, startTime(std::chrono::high_resolution_clock::now())
|
||||
, myEventId(SweepstoreTiming::getNextEventId())
|
||||
, parentEventId(SweepstoreTiming::getParentEventId())
|
||||
{
|
||||
SweepstoreTiming::recordScopeStart(scopeName, myEventId);
|
||||
}
|
||||
|
||||
~SweepstoreScopeTimer() {
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(endTime - startTime);
|
||||
auto startMicros = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
startTime.time_since_epoch()
|
||||
).count();
|
||||
|
||||
// Get thread ID (use sequential thread ID instead of hash)
|
||||
uint64_t threadId = SweepstoreTiming::getThreadId();
|
||||
|
||||
SweepstoreTiming::recordScopeEnd(
|
||||
scopeName,
|
||||
myEventId,
|
||||
startMicros,
|
||||
duration.count(),
|
||||
threadId
|
||||
);
|
||||
}
|
||||
|
||||
// Disable copy and move
|
||||
SweepstoreScopeTimer(const SweepstoreScopeTimer&) = delete;
|
||||
SweepstoreScopeTimer& operator=(const SweepstoreScopeTimer&) = delete;
|
||||
SweepstoreScopeTimer(SweepstoreScopeTimer&&) = delete;
|
||||
SweepstoreScopeTimer& operator=(SweepstoreScopeTimer&&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Timing instrumentation macros
|
||||
*
|
||||
* These macros provide a simple interface for instrumenting code with timing.
|
||||
* They compile to nothing when SWEEPSTORE_ENABLE_TIMING is 0 (default).
|
||||
*
|
||||
* @def SWEEPSTORE_TIME_FUNCTION()
|
||||
* Times the entire function. Uses __FUNCTION__ for scope name.
|
||||
* Place at the start of the function body.
|
||||
*
|
||||
* Example:
|
||||
* void myFunction() {
|
||||
* SWEEPSTORE_TIME_FUNCTION();
|
||||
* // function body...
|
||||
* }
|
||||
*
|
||||
* @def SWEEPSTORE_TIME_SCOPE(name)
|
||||
* Times an arbitrary scope with a custom name. Useful for:
|
||||
* - Code blocks
|
||||
* - Loop iterations
|
||||
* - Lambda functions
|
||||
* - Critical sections
|
||||
* - Class methods (use "ClassName::methodName" as the name)
|
||||
*
|
||||
* Example:
|
||||
* for (int i = 0; i < n; i++) {
|
||||
* SWEEPSTORE_TIME_SCOPE("loop_iteration");
|
||||
* // loop body...
|
||||
* }
|
||||
*
|
||||
* void MyClass::myMethod() {
|
||||
* SWEEPSTORE_TIME_SCOPE("MyClass::myMethod");
|
||||
* // method body...
|
||||
* }
|
||||
*
|
||||
* auto lambda = [&]() {
|
||||
* SWEEPSTORE_TIME_SCOPE("lambda_processing");
|
||||
* // lambda body...
|
||||
* };
|
||||
*/
|
||||
#ifndef SWEEPSTORE_ENABLE_TIMING
|
||||
#define SWEEPSTORE_ENABLE_TIMING 0
|
||||
#endif
|
||||
|
||||
#if SWEEPSTORE_ENABLE_TIMING
|
||||
#define SWEEPSTORE_TIME_FUNCTION() SweepstoreScopeTimer __sweepstore_timer_##__LINE__(__FUNCTION__)
|
||||
#define SWEEPSTORE_TIME_SCOPE(name) SweepstoreScopeTimer __sweepstore_timer_##__LINE__(name)
|
||||
#else
|
||||
#define SWEEPSTORE_TIME_FUNCTION() ((void)0)
|
||||
#define SWEEPSTORE_TIME_SCOPE(name) ((void)0)
|
||||
#endif
|
||||
|
||||
#endif // SWEEPSTORE_TIMING_H
|
||||
Reference in New Issue
Block a user