#pragma once

#include <atomic>
#include <print>
#include <vector>

#ifdef TRACE
constexpr bool Trace_Enabled{ true };
#else
constexpr bool Trace_Enabled{ false };
#endif


enum class TraceEvent {
    Generate,
    Compile,
    Execute,
    HashAndFill,
    Max,
};

constexpr std::array<std::string_view, static_cast<size_t>(TraceEvent::Max)> Trace_Event_Names {
    "Generate",
    "Compile",
    "Execute",
    "HashAndFill",
};

struct TraceData {
    std::atomic<size_t> clock_sum{ 0 };
    std::atomic<size_t> clock_samples{ 0 };
    std::atomic<size_t> clock_min{ std::numeric_limits<size_t>::max() };
    std::atomic<size_t> clock_max{ 0 };
};

inline std::array<TraceData, static_cast<size_t>(TraceEvent::Max)> trace_data{};

template<TraceEvent evt>
struct Trace {
    size_t clock_start{ 0 };

    [[maybe_unused]] explicit Trace() {
        if constexpr (!Trace_Enabled) {
            return;
        }

        clock_start = __rdtsc();
    }

    Trace(const Trace&) = delete;
    Trace& operator=(const Trace&) = delete;
    Trace(Trace&&) = delete;
    Trace& operator=(Trace&&) = delete;

    ~Trace() {
        if constexpr (!Trace_Enabled) {
            return;
        }

        const uint64_t clock_stop{ __rdtsc() };
        auto& td{ trace_data[static_cast<size_t>(evt)] };
        const uint64_t clock_diff{ clock_stop - clock_start };
        td.clock_sum.fetch_add(clock_diff, std::memory_order_relaxed);
        td.clock_samples.fetch_add(1, std::memory_order_relaxed);
        td.clock_min.store(std::min(td.clock_min.load(std::memory_order_relaxed), clock_diff), std::memory_order_relaxed);
        td.clock_max.store(std::max(td.clock_max.load(std::memory_order_relaxed), clock_diff), std::memory_order_relaxed);
        
    }
};


struct TraceResults {
    [[maybe_unused]] TraceResults() = default;

    TraceResults(const TraceResults&) = delete;
    TraceResults& operator=(const TraceResults&) = delete;
    TraceResults(TraceResults&&) = delete;
    TraceResults& operator=(TraceResults&&) = delete;

     ~TraceResults() {
        if constexpr (!Trace_Enabled) {
            return;
        }

        size_t total_clocks{ 0 };
        for (size_t i = 0; i < static_cast<size_t>(TraceEvent::Max); ++i) {
            auto& td{ trace_data[i] };

            if (i == static_cast<size_t>(TraceEvent::HashAndFill)) {
                td.clock_samples.store(td.clock_samples.load() / 8);
                td.clock_sum.store(td.clock_sum.load() / 8);
            }

            total_clocks += td.clock_sum;
        }

        std::println("\nTrace results:");
        for (size_t i = 0; i < static_cast<size_t>(TraceEvent::Max); ++i) {
            const auto& td{ trace_data[i] };
            if (td.clock_samples == 0) {
                continue;
            }

            std::println("Trace event ({}): {} samples, clocks (min: {}, max: {}, avg: {}), {:.2f}% of total time",
                Trace_Event_Names[i], td.clock_samples.load(), td.clock_min.load(), td.clock_max.load(),
                (td.clock_sum.load() / td.clock_samples.load()), 100.0 * td.clock_sum.load() / total_clocks);
        }
    }
};

Generated by OpenCppCoverage (Version: 0.9.9.0)