#include <format>

#include "aes.hpp"
#include "aes1rrandom.hpp"
#include "assertume.hpp"
#include "cast.hpp"
#include "randomxparams.hpp"
#include "sse.hpp"

namespace modernRX::aes {
    template void fill1R<true>(std::span<std::byte> output, std::span<std::byte, 64> seed) noexcept;
    template void fill1R<false>(std::span<std::byte> output, std::span<std::byte, 64> seed) noexcept;

    template<bool Fixed>
    void fill1R(std::span<std::byte> output, std::span<std::byte, 64> seed) noexcept {
        ASSERTUME(output.size() > 0 && output.size() % 64 == 0);

        intrinsics::xmm128i_t& seed0{ *reinterpret_cast<intrinsics::xmm128i_t*>(seed.data()) };
        intrinsics::xmm128i_t& seed1{ *reinterpret_cast<intrinsics::xmm128i_t*>(seed.data() + 16) };
        intrinsics::xmm128i_t& seed2{ *reinterpret_cast<intrinsics::xmm128i_t*>(seed.data() + 32) };
        intrinsics::xmm128i_t& seed3{ *reinterpret_cast<intrinsics::xmm128i_t*>(seed.data() + 48) };

        // key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator1R keys")
        // key0 = 53 a5 ac 6d 09 66 71 62 2b 55 b5 db 17 49 f4 b4
        // key1 = 07 af 7c 6d 0d 71 6a 84 78 d3 25 17 4e dc a1 0d
        // key2 = f1 62 12 3f c6 7e 94 9f 4f 79 c0 f4 45 e3 20 3e
        // key3 = 35 81 ef 6a 7c 31 ba b1 88 4c 31 16 54 91 16 49
        constexpr auto key0{ intrinsics::fromChars(0x53, 0xa5, 0xac, 0x6d, 0x09, 0x66, 0x71, 0x62, 0x2b, 0x55, 0xb5, 0xdb, 0x17, 0x49, 0xf4, 0xb4) };
        auto state0{ intrinsics::sse::vload<int>(seed.data()) };

        constexpr auto key1{ intrinsics::fromChars(0x07, 0xaf, 0x7c, 0x6d, 0x0d, 0x71, 0x6a, 0x84, 0x78, 0xd3, 0x25, 0x17, 0x4e, 0xdc, 0xa1, 0x0d) };
        auto state1{ intrinsics::sse::vload<int>(seed.data() + 16) };

        constexpr auto key2{ intrinsics::fromChars(0xf1, 0x62, 0x12, 0x3f, 0xc6, 0x7e, 0x94, 0x9f, 0x4f, 0x79, 0xc0, 0xf4, 0x45, 0xe3, 0x20, 0x3e) };
        auto state2{ intrinsics::sse::vload<int>(seed.data() + 32) };

        constexpr auto key3{ intrinsics::fromChars(0x35, 0x81, 0xef, 0x6a, 0x7c, 0x31, 0xba, 0xb1, 0x88, 0x4c, 0x31, 0x16, 0x54, 0x91, 0x16, 0x49) };
        auto state3{ intrinsics::sse::vload<int>(seed.data() + 48) };

        // Switch between fixed and variable output size. 
        for (size_t i = 0; i < (Fixed ? Rx_Scratchpad_L3_Size : output.size()); i += 64) {
            intrinsics::aes::decode(state0, key0);
            intrinsics::aes::encode(state1, key1);
            intrinsics::aes::decode(state2, key2);
            intrinsics::aes::encode(state3, key3);

            intrinsics::xmm128i_t& output0{ *reinterpret_cast<intrinsics::xmm128i_t*>(output.data() + i) };
            intrinsics::xmm128i_t& output1{ *reinterpret_cast<intrinsics::xmm128i_t*>(output.data() + i + 16) };
            intrinsics::xmm128i_t& output2{ *reinterpret_cast<intrinsics::xmm128i_t*>(output.data() + i + 32) };
            intrinsics::xmm128i_t& output3{ *reinterpret_cast<intrinsics::xmm128i_t*>(output.data() + i + 48) };

            output0 = state0;
            output1 = state1;
            output2 = state2;
            output3 = state3;
        }

        seed0 = state0;
        seed1 = state1;
        seed2 = state2;
        seed3 = state3;
    }
}

Generated by OpenCppCoverage (Version: 0.9.9.0)