#pragma once

/*
* Superscalar instruction set information: https://github.com/tevador/RandomX/blob/master/doc/specs.md#61-instructions
*/

#include <algorithm>
#include <array>
#include <optional>

namespace modernRX {
    // Defines simulated CPU execution ports with all possible configurations.
    enum class ExecutionPort : uint8_t {
        NONE = 0, P5 = 1, P0 = 2, P1 = 4,
        P01 = P0 | P1, P05 = P0 | P5, P15 = P1 | P5, P015 = P0 | P1 | P5,
    };

    // Defines all instruction types used in superscalar programs. Order must be preserved.
    enum class SuperscalarInstructionType : uint8_t {
                                                                //uOPs (decode)   execution ports         latency       code size
        ISUB_R = 0,                                             //1               p015                    1             3 (vsub)
        IXOR_R = 1,                                             //1               p015                    1             3 (xor)
        IADD_RS = 2,                                            //1               p01                     1             4 (lea)
        IMUL_R = 3,                                             //1               p1                      3             4 (imul)
        IROR_C = 4,                                             //1               p05                     1             4 (ror)
        IADD_C7 = 5,                                            //1               p015                    1             7 (vadd)
        IXOR_C7 = 6,                                            //1               p015                    1             7 (xor)
        IADD_C8 = 7,                                            //1+0             p015                    1             7+1 (vadd+nop)
        IXOR_C8 = 8,                                            //1+0             p015                    1             7+1 (xor+nop)
        IADD_C9 = 9,                                            //1+0             p015                    1             7+2 (vadd+nop)
        IXOR_C9 = 10,                                           //1+0             p015                    1             7+2 (xor+nop)
        IMULH_R = 11,                                           //1+2+1           0+(p1,p5)+0             3             3+3+3 (mov+vmul+mov)
        ISMULH_R = 12,                                          //1+2+1           0+(p1,p5)+0             3             3+3+3 (mov+imul+mov)
        IMUL_RCP = 13,                                          //1+1             p015+p1                 4             10+4 (mov+imul)

        INVALID = 14,
    };

    // Holds information about single macro operation.
    struct MacroOp {
        std::array<ExecutionPort, 2> ports{ ExecutionPort::NONE, ExecutionPort::NONE }; // If MacroOp consists of 2 uOps, second execution port will not be NONE.
        uint8_t size{ 0 }; // Size in bytes.
        uint8_t latency{ 0 }; // Latency in CPU clock cycles.
        bool dependent{ false }; // Is dependent on previous macro op.

        // Returns true if operation needs to be scheduled at any ExectionPort, false otherwise (eg. eliminated MOV instructions does not require any port).
        [[nodiscard]] bool requiresPort() const noexcept {
            return ports[0] != ExecutionPort::NONE;
        }
        
        // Returns true if macro-op is fused from two uOps.
        [[nodiscard]] bool fused() const noexcept {
            return ports[1] != ExecutionPort::NONE;
        }
    };

    // Holds common information about single instruction.
    struct SuperscalarInstructionInfo {
        std::array<MacroOp, 4> ops{}; // Macro operations instruction consists of.
        SuperscalarInstructionType type{ SuperscalarInstructionType::INVALID }; // Superscalar instruction type.
        SuperscalarInstructionType group{ SuperscalarInstructionType::INVALID }; // Superscalar instruction group type.

        std::optional<uint8_t> src_op_index{ std::nullopt }; // Defines which macro op requires source register (nullopt if source not required).
        uint8_t dst_op_index{ 0 }; // Defines which macro op requires destination register.
        uint8_t result_op_index{ 0 }; // Defines which macro op requires to vstore result (update register).

        bool src_register_as_src_value{ false }; // Specifies whether source register should be used as a source value.
        bool dst_register_as_src_register{ false }; // Defines whether destination register can be used as a source register.
    };

    // Holds superscalar instruction templates.
    using SuperscalarInstructionSet = std::array<SuperscalarInstructionInfo, 15>;

    // Superscalar instruction set.
    inline constexpr SuperscalarInstructionSet isa = []() consteval {
        constexpr SuperscalarInstructionSet isa_{
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 3 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::ISUB_R },
                .group{ SuperscalarInstructionType::IADD_RS },
                .src_op_index{ 0 },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ true },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 3 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IXOR_R },
                .group{ SuperscalarInstructionType::IXOR_R },
                .src_op_index{ 0 },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ true },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P01, ExecutionPort::NONE },
                        .size{ 4 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IADD_RS },
                .group{ SuperscalarInstructionType::IADD_RS },
                .src_op_index{ 0 },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ true },
                .dst_register_as_src_register{ false }, // According to specification this should be true, but original implementation does not support that.
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P1, ExecutionPort::NONE },
                        .size{ 4 },
                        .latency{ 3 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IMUL_R },
                .group{ SuperscalarInstructionType::IMUL_R },
                .src_op_index{ 0 },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ true },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P05, ExecutionPort::NONE },
                        .size{ 4 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IROR_C },
                .group{ SuperscalarInstructionType::IROR_C },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 7 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IADD_C7 },
                .group{ SuperscalarInstructionType::IADD_C7 },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 7 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IXOR_C7 },
                .group{ SuperscalarInstructionType::IXOR_C7 },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 8 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IADD_C8 },
                .group{ SuperscalarInstructionType::IADD_C7 },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 8 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IXOR_C8 },
                .group{ SuperscalarInstructionType::IXOR_C7 },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 9 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IADD_C9 },
                .group{ SuperscalarInstructionType::IADD_C7 },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 9 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IXOR_C9 },
                .group{ SuperscalarInstructionType::IXOR_C7 },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::NONE, ExecutionPort::NONE },
                        .size{ 3 },
                        .latency{ 0 },
                        .dependent{ false },
                    },
                    MacroOp{
                        .ports{ ExecutionPort::P1, ExecutionPort::P5 },
                        .size{ 3 },
                        .latency{ 4 },
                        .dependent{ false },
                    },
                    MacroOp{
                        .ports{ ExecutionPort::NONE, ExecutionPort::NONE },
                        .size{ 3 },
                        .latency{ 0 },
                        .dependent{ false },
                    },
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IMULH_R },
                .group{ SuperscalarInstructionType::ISMULH_R },
                .src_op_index{ 1 },
                .dst_op_index{ 0 },
                .result_op_index{ 1 },
                .src_register_as_src_value{ true },
                .dst_register_as_src_register{ true },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::NONE, ExecutionPort::NONE },
                        .size{ 3 },
                        .latency{ 0 },
                        .dependent{ false },
                    },
                    MacroOp{
                        .ports{ ExecutionPort::P1, ExecutionPort::P5 },
                        .size{ 3 },
                        .latency{ 4 },
                        .dependent{ false },
                    },
                    MacroOp{
                        .ports{ ExecutionPort::NONE, ExecutionPort::NONE },
                        .size{ 3 },
                        .latency{ 0 },
                        .dependent{ false },
                    },
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::ISMULH_R },
                .group{ SuperscalarInstructionType::ISMULH_R },
                .src_op_index{ 1 },
                .dst_op_index{ 0 },
                .result_op_index{ 1 },
                .src_register_as_src_value{ true },
                .dst_register_as_src_register{ true },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{
                        .ports{ ExecutionPort::P015, ExecutionPort::NONE },
                        .size{ 10 },
                        .latency{ 1 },
                        .dependent{ false },
                    },
                    MacroOp{
                        .ports{ ExecutionPort::P1, ExecutionPort::NONE },
                        .size{ 4 },
                        .latency{ 3 },
                        .dependent{ true },
                    },
                    MacroOp{},
                    MacroOp{},
                },
                .type{ SuperscalarInstructionType::IMUL_RCP },
                .group{ SuperscalarInstructionType::IMUL_RCP },
                .src_op_index{ std::nullopt },
                .dst_op_index{ 1 },
                .result_op_index{ 1 },
                .src_register_as_src_value{ false },
                .dst_register_as_src_register{ false },
            },
            SuperscalarInstructionInfo{
                .ops = std::array<MacroOp, 4>{
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                    MacroOp{},
                },
                .type = SuperscalarInstructionType::INVALID,
                .src_op_index{ 0 },
                .dst_op_index{ 0 },
                .result_op_index{ 0 },
            },
        };

        // This is just to ensure that enum values points to proper instruction templates.
        for (uint32_t i = 0; i < isa_.size(); ++i) {
            if (isa_[i].type != static_cast<SuperscalarInstructionType>(i)) {
                throw "Array index must have equal value to underlying instruction type.";
            }
        }

        return isa_;
    }();

    // Returns true for all types that indicate multiplications.
    constexpr bool isMultiplication(const SuperscalarInstructionType type) {
        return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R
            || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
    }

    // Finds maximum latency of all operations in the instruction set.
    // Must be 4 for reference CPU (Ivy Bridge).
    consteval uint8_t maxOpLatency(const SuperscalarInstructionSet& isa) {
        uint8_t max_latency{ 0 };

        for (const auto& info : isa) {
            for (const auto& op : info.ops) {
                max_latency = std::max(max_latency, op.latency);
            }
        };

        if (max_latency != 4) {
            throw "Maximum latency of all operations in the instruction set must be 4 for reference CPU (Ivy Bridge).";
        }

        return max_latency;
    }
}

Generated by OpenCppCoverage (Version: 0.9.9.0)