diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c24849..d926174 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ cmake_minimum_required(VERSION 2.8.7) set (randomx_sources src/aes_hash.cpp src/argon2_ref.c +src/bytecode_machine.cpp src/dataset.cpp src/soft_aes.cpp src/virtual_memory.cpp diff --git a/makefile b/makefile index 9e72600..b0149a0 100644 --- a/makefile +++ b/makefile @@ -11,8 +11,8 @@ TESTDIR=src/tests OBJDIR=obj LDFLAGS=-lpthread RXA=$(BINDIR)/librandomx.a -BINARIES=$(RXA) $(BINDIR)/benchmark $(BINDIR)/code-generator -RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o) +BINARIES=$(RXA) $(BINDIR)/randomx-benchmark $(BINDIR)/randomx-generator $(BINDIR)/randomx-tests +RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o bytecode_machine.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o) ifeq ($(PLATFORM),amd64) RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) CXXFLAGS += -maes @@ -51,7 +51,7 @@ debug: $(BINARIES) profile: CXXFLAGS += -pg profile: CCFLAGS += -pg profile: LDFLAGS += -pg -profile: $(BINDIR)/benchmark +profile: $(BINDIR)/randomx-benchmark test: CXXFLAGS += -O0 @@ -64,7 +64,7 @@ $(BINDIR): $(OBJDIR)/benchmark.o: $(TESTDIR)/benchmark.cpp $(TESTDIR)/stopwatch.hpp \ $(TESTDIR)/utility.hpp $(SRCDIR)/randomx.h $(SRCDIR)/blake2/endian.h $(CXX) $(CXXFLAGS) -pthread -c $< -o $@ -$(BINDIR)/benchmark: $(OBJDIR)/benchmark.o $(RXA) +$(BINDIR)/randomx-benchmark: $(OBJDIR)/benchmark.o $(RXA) $(CXX) $(LDFLAGS) -pthread $< $(RXA) -o $@ $(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp \ $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ @@ -74,13 +74,30 @@ $(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/aes_hash.hpp \ $(SRCDIR)/blake2/blake2.h $(SRCDIR)/program.hpp $(CXX) $(CXXFLAGS) -c $< -o $@ -$(BINDIR)/code-generator: $(OBJDIR)/code-generator.o $(RXA) +$(BINDIR)/randomx-generator: $(OBJDIR)/code-generator.o $(RXA) + $(CXX) $(LDFLAGS) $< $(RXA) -o $@ +$(OBJDIR)/tests.o: $(TESTDIR)/tests.cpp $(TESTDIR)/utility.hpp \ + $(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h \ + $(SRCDIR)/randomx.h $(SRCDIR)/intrin_portable.h \ + $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \ + $(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp \ + $(SRCDIR)/allocator.hpp $(SRCDIR)/blake2/blake2.h \ + $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/superscalar.hpp \ + $(SRCDIR)/reciprocal.h $(SRCDIR)/jit_compiler.hpp \ + $(SRCDIR)/jit_compiler_x86.hpp + $(CXX) $(CXXFLAGS) -c $< -o $@ +$(BINDIR)/randomx-tests: $(OBJDIR)/tests.o $(RXA) $(CXX) $(LDFLAGS) $< $(RXA) -o $@ $(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR) $(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ $(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \ $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \ $(SRCDIR)/blake2/blake2.h +$(OBJDIR)/bytecode_machine.o: $(SRCDIR)/bytecode_machine.cpp $(SRCDIR)/bytecode_machine.hpp \ + $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/intrin_portable.h $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \ + $(SRCDIR)/reciprocal.h $(OBJDIR)/blake2b.o: $(SRCDIR)/blake2/blake2b.c $(SRCDIR)/blake2/blake2.h \ $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(CC) $(CCFLAGS) -c $< -o $@ @@ -108,11 +125,11 @@ $(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDI $(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h $(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp $(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ + $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/virtual_machine.hpp \ + $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/instruction_weights.hpp \ $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h \ - $(SRCDIR)/instruction_weights.hpp + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h $(OBJDIR)/allocator.o: $(SRCDIR)/allocator.cpp $(SRCDIR)/allocator.hpp $(SRCDIR)/intrin_portable.h \ $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h @@ -129,7 +146,7 @@ $(OBJDIR)/randomx.o: $(SRCDIR)/randomx.cpp $(SRCDIR)/randomx.h $(SRCDIR)/dataset $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp \ $(SRCDIR)/intrin_portable.h $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_compiled.hpp \ - $(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h + $(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h $(SRCDIR)/bytecode_machine.hpp $(OBJDIR)/superscalar.o: $(SRCDIR)/superscalar.cpp $(SRCDIR)/configuration.h $(SRCDIR)/program.hpp \ $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/randomx.h $(SRCDIR)/instruction.hpp \ $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/blake2_generator.hpp \ @@ -144,7 +161,8 @@ $(OBJDIR)/vm_interpreted_light.o: $(SRCDIR)/vm_interpreted_light.cpp \ $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp \ + $(SRCDIR)/bytecode_machine.hpp $(OBJDIR)/argon2_core.o: $(SRCDIR)/argon2_core.c $(SRCDIR)/argon2_core.h $(SRCDIR)/argon2.h \ $(SRCDIR)/blake2/blake2.h $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(OBJDIR)/blake2_generator.o: $(SRCDIR)/blake2_generator.cpp $(SRCDIR)/blake2/blake2.h \ diff --git a/randomx.sln b/randomx.sln index 3da2b50..3f003b7 100644 --- a/randomx.sln +++ b/randomx.sln @@ -31,6 +31,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime-distr", "vcxproj\ru EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx-dll", "vcxproj\randomx-dll.vcxproj", "{59560AD8-18E3-463E-A941-BBD808EC7C83}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "vcxproj\tests.vcxproj", "{41F3F4DF-8113-4029-9915-FDDC44C43D49}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -143,6 +145,14 @@ Global {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.Build.0 = Release|x64 {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.ActiveCfg = Release|Win32 {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.Build.0 = Release|Win32 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x64.ActiveCfg = Debug|x64 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x64.Build.0 = Debug|x64 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x86.ActiveCfg = Debug|Win32 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x86.Build.0 = Debug|Win32 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x64.ActiveCfg = Release|x64 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x64.Build.0 = Release|x64 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x86.ActiveCfg = Release|Win32 + {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -159,6 +169,7 @@ Global {535F2111-FA81-4C76-A354-EDD2F9AA00E3} = {4A4A689F-86AF-41C0-A974-1080506D0923} {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} = {4A4A689F-86AF-41C0-A974-1080506D0923} {F207EC8C-C55F-46C0-8851-887A71574F54} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {41F3F4DF-8113-4029-9915-FDDC44C43D49} = {4A4A689F-86AF-41C0-A974-1080506D0923} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {4EBC03DB-AE37-4141-8147-692F16E0ED02} diff --git a/src/bytecode_machine.cpp b/src/bytecode_machine.cpp new file mode 100644 index 0000000..20c4e10 --- /dev/null +++ b/src/bytecode_machine.cpp @@ -0,0 +1,482 @@ +/* +Copyright (c) 2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "bytecode_machine.hpp" +#include "reciprocal.h" + +namespace randomx { + + const int_reg_t BytecodeMachine::zero = 0; + +#define INSTR_CASE(x) case InstructionType::x: \ + exe_ ## x(ibc, pc, scratchpad, config); \ + break; + + void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) { + switch (ibc.type) + { + INSTR_CASE(IADD_RS) + INSTR_CASE(IADD_M) + INSTR_CASE(ISUB_R) + INSTR_CASE(ISUB_M) + INSTR_CASE(IMUL_R) + INSTR_CASE(IMUL_M) + INSTR_CASE(IMULH_R) + INSTR_CASE(IMULH_M) + INSTR_CASE(ISMULH_R) + INSTR_CASE(ISMULH_M) + INSTR_CASE(INEG_R) + INSTR_CASE(IXOR_R) + INSTR_CASE(IXOR_M) + INSTR_CASE(IROR_R) + INSTR_CASE(IROL_R) + INSTR_CASE(ISWAP_R) + INSTR_CASE(FSWAP_R) + INSTR_CASE(FADD_R) + INSTR_CASE(FADD_M) + INSTR_CASE(FSUB_R) + INSTR_CASE(FSUB_M) + INSTR_CASE(FSCAL_R) + INSTR_CASE(FMUL_R) + INSTR_CASE(FDIV_M) + INSTR_CASE(FSQRT_R) + INSTR_CASE(CBRANCH) + INSTR_CASE(CFROUND) + INSTR_CASE(ISTORE) + + case InstructionType::NOP: + break; + + case InstructionType::IMUL_RCP: //executed as IMUL_R + default: + UNREACHABLE; + } + } + + void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) { + int opcode = instr.opcode; + + if (opcode < ceil_IADD_RS) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IADD_RS; + ibc.idst = &nreg->r[dst]; + if (dst != RegisterNeedsDisplacement) { + ibc.isrc = &nreg->r[src]; + ibc.shift = instr.getModShift(); + ibc.imm = 0; + } + else { + ibc.isrc = &nreg->r[src]; + ibc.shift = instr.getModShift(); + ibc.imm = signExtend2sCompl(instr.getImm32()); + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IADD_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IADD_M; + ibc.idst = &nreg->r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_ISUB_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISUB_R; + ibc.idst = &nreg->r[dst]; + if (src != dst) { + ibc.isrc = &nreg->r[src]; + } + else { + ibc.imm = signExtend2sCompl(instr.getImm32()); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_ISUB_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISUB_M; + ibc.idst = &nreg->r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IMUL_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMUL_R; + ibc.idst = &nreg->r[dst]; + if (src != dst) { + ibc.isrc = &nreg->r[src]; + } + else { + ibc.imm = signExtend2sCompl(instr.getImm32()); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IMUL_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMUL_M; + ibc.idst = &nreg->r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IMULH_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMULH_R; + ibc.idst = &nreg->r[dst]; + ibc.isrc = &nreg->r[src]; + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IMULH_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMULH_M; + ibc.idst = &nreg->r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_ISMULH_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISMULH_R; + ibc.idst = &nreg->r[dst]; + ibc.isrc = &nreg->r[src]; + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_ISMULH_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISMULH_M; + ibc.idst = &nreg->r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IMUL_RCP) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::IMUL_R; + ibc.idst = &nreg->r[dst]; + ibc.imm = randomx_reciprocal(divisor); + ibc.isrc = &ibc.imm; + registerUsage[dst] = i; + } + else { + ibc.type = InstructionType::NOP; + } + return; + } + + if (opcode < ceil_INEG_R) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::INEG_R; + ibc.idst = &nreg->r[dst]; + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IXOR_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IXOR_R; + ibc.idst = &nreg->r[dst]; + if (src != dst) { + ibc.isrc = &nreg->r[src]; + } + else { + ibc.imm = signExtend2sCompl(instr.getImm32()); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IXOR_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IXOR_M; + ibc.idst = &nreg->r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IROR_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IROR_R; + ibc.idst = &nreg->r[dst]; + if (src != dst) { + ibc.isrc = &nreg->r[src]; + } + else { + ibc.imm = instr.getImm32(); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_IROL_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IROL_R; + ibc.idst = &nreg->r[dst]; + if (src != dst) { + ibc.isrc = &nreg->r[src]; + } + else { + ibc.imm = instr.getImm32(); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + return; + } + + if (opcode < ceil_ISWAP_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + if (src != dst) { + ibc.idst = &nreg->r[dst]; + ibc.isrc = &nreg->r[src]; + ibc.type = InstructionType::ISWAP_R; + registerUsage[dst] = i; + registerUsage[src] = i; + } + else { + ibc.type = InstructionType::NOP; + } + return; + } + + if (opcode < ceil_FSWAP_R) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::FSWAP_R; + if (dst < RegisterCountFlt) + ibc.fdst = &nreg->f[dst]; + else + ibc.fdst = &nreg->e[dst - RegisterCountFlt]; + return; + } + + if (opcode < ceil_FADD_R) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegisterCountFlt; + ibc.type = InstructionType::FADD_R; + ibc.fdst = &nreg->f[dst]; + ibc.fsrc = &nreg->a[src]; + return; + } + + if (opcode < ceil_FADD_M) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::FADD_M; + ibc.fdst = &nreg->f[dst]; + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + return; + } + + if (opcode < ceil_FSUB_R) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegisterCountFlt; + ibc.type = InstructionType::FSUB_R; + ibc.fdst = &nreg->f[dst]; + ibc.fsrc = &nreg->a[src]; + return; + } + + if (opcode < ceil_FSUB_M) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::FSUB_M; + ibc.fdst = &nreg->f[dst]; + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + return; + } + + if (opcode < ceil_FSCAL_R) { + auto dst = instr.dst % RegisterCountFlt; + ibc.fdst = &nreg->f[dst]; + ibc.type = InstructionType::FSCAL_R; + return; + } + + if (opcode < ceil_FMUL_R) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegisterCountFlt; + ibc.type = InstructionType::FMUL_R; + ibc.fdst = &nreg->e[dst]; + ibc.fsrc = &nreg->a[src]; + return; + } + + if (opcode < ceil_FDIV_M) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::FDIV_M; + ibc.fdst = &nreg->e[dst]; + ibc.isrc = &nreg->r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + return; + } + + if (opcode < ceil_FSQRT_R) { + auto dst = instr.dst % RegisterCountFlt; + ibc.type = InstructionType::FSQRT_R; + ibc.fdst = &nreg->e[dst]; + return; + } + + if (opcode < ceil_CBRANCH) { + ibc.type = InstructionType::CBRANCH; + //jump condition + int creg = instr.dst % RegistersCount; + ibc.idst = &nreg->r[creg]; + ibc.target = registerUsage[creg]; + int shift = instr.getModCond() + ConditionOffset; + ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); + if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 + ibc.imm &= ~(1ULL << (shift - 1)); + ibc.memMask = ConditionMask << shift; + //mark all registers as used + for (unsigned j = 0; j < RegistersCount; ++j) { + registerUsage[j] = i; + } + return; + } + + if (opcode < ceil_CFROUND) { + auto src = instr.src % RegistersCount; + ibc.isrc = &nreg->r[src]; + ibc.type = InstructionType::CFROUND; + ibc.imm = instr.getImm32() & 63; + return; + } + + if (opcode < ceil_ISTORE) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISTORE; + ibc.idst = &nreg->r[dst]; + ibc.isrc = &nreg->r[src]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (instr.getModCond() < StoreL3Condition) + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + else + ibc.memMask = ScratchpadL3Mask; + return; + } + + if (opcode < ceil_NOP) { + ibc.type = InstructionType::NOP; + return; + } + + UNREACHABLE; + } +} diff --git a/src/bytecode_machine.hpp b/src/bytecode_machine.hpp new file mode 100644 index 0000000..5e82e0d --- /dev/null +++ b/src/bytecode_machine.hpp @@ -0,0 +1,322 @@ +/* +Copyright (c) 2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include "common.hpp" +#include "intrin_portable.h" +#include "instruction.hpp" +#include "program.hpp" + +namespace randomx { + + //register file in machine byte order + struct NativeRegisterFile { + int_reg_t r[RegistersCount] = { 0 }; + rx_vec_f128 f[RegisterCountFlt]; + rx_vec_f128 e[RegisterCountFlt]; + rx_vec_f128 a[RegisterCountFlt]; + }; + + struct InstructionByteCode { + union { + int_reg_t* idst; + rx_vec_f128* fdst; + }; + union { + const int_reg_t* isrc; + const rx_vec_f128* fsrc; + }; + union { + uint64_t imm; + int64_t simm; + }; + InstructionType type; + union { + int16_t target; + uint16_t shift; + }; + uint32_t memMask; + }; + +#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr; + constexpr int ceil_NULL = 0; + OPCODE_CEIL_DECLARE(IADD_RS, NULL); + OPCODE_CEIL_DECLARE(IADD_M, IADD_RS); + OPCODE_CEIL_DECLARE(ISUB_R, IADD_M); + OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R); + OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M); + OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R); + OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M); + OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R); + OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M); + OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R); + OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M); + OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP); + OPCODE_CEIL_DECLARE(IXOR_R, INEG_R); + OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R); + OPCODE_CEIL_DECLARE(IROR_R, IXOR_M); + OPCODE_CEIL_DECLARE(IROL_R, IROR_R); + OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R); + OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R); + OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R); + OPCODE_CEIL_DECLARE(FADD_M, FADD_R); + OPCODE_CEIL_DECLARE(FSUB_R, FADD_M); + OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R); + OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M); + OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R); + OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R); + OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M); + OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R); + OPCODE_CEIL_DECLARE(CFROUND, CBRANCH); + OPCODE_CEIL_DECLARE(ISTORE, CFROUND); + OPCODE_CEIL_DECLARE(NOP, ISTORE); +#undef OPCODE_CEIL_DECLARE + +#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config +#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc + + class BytecodeMachine; + + typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS); + + class BytecodeMachine { + public: + void beginCompilation(NativeRegisterFile& regFile) { + for (unsigned i = 0; i < RegistersCount; ++i) { + registerUsage[i] = -1; + } + nreg = ®File; + } + + void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) { + beginCompilation(regFile); + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { + auto& instr = program(i); + auto& ibc = bytecode[i]; + compileInstruction(instr, i, ibc); + } + } + + static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) { + for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) { + auto& ibc = bytecode[pc]; + executeInstruction(ibc, pc, scratchpad, config); + } + } + + void compileInstruction(RANDOMX_GEN_ARGS) +#ifdef RANDOMX_GEN_TABLE + { + auto generator = genTable[instr.opcode]; + (this->*generator)(instr, i, ibc); + } +#else + ; +#endif + + static void executeInstruction(RANDOMX_EXE_ARGS); + + static void exe_IADD_RS(RANDOMX_EXE_ARGS) { + *ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm; + } + + static void exe_IADD_M(RANDOMX_EXE_ARGS) { + *ibc.idst += load64(getScratchpadAddress(ibc, scratchpad)); + } + + static void exe_ISUB_R(RANDOMX_EXE_ARGS) { + *ibc.idst -= *ibc.isrc; + } + + static void exe_ISUB_M(RANDOMX_EXE_ARGS) { + *ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad)); + } + + static void exe_IMUL_R(RANDOMX_EXE_ARGS) { + *ibc.idst *= *ibc.isrc; + } + + static void exe_IMUL_M(RANDOMX_EXE_ARGS) { + *ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad)); + } + + static void exe_IMULH_R(RANDOMX_EXE_ARGS) { + *ibc.idst = mulh(*ibc.idst, *ibc.isrc); + } + + static void exe_IMULH_M(RANDOMX_EXE_ARGS) { + *ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad))); + } + + static void exe_ISMULH_R(RANDOMX_EXE_ARGS) { + *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc)); + } + + static void exe_ISMULH_M(RANDOMX_EXE_ARGS) { + *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad)))); + } + + static void exe_INEG_R(RANDOMX_EXE_ARGS) { + *ibc.idst = ~(*ibc.idst) + 1; //two's complement negative + } + + static void exe_IXOR_R(RANDOMX_EXE_ARGS) { + *ibc.idst ^= *ibc.isrc; + } + + static void exe_IXOR_M(RANDOMX_EXE_ARGS) { + *ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad)); + } + + static void exe_IROR_R(RANDOMX_EXE_ARGS) { + *ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63); + } + + static void exe_IROL_R(RANDOMX_EXE_ARGS) { + *ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63); + } + + static void exe_ISWAP_R(RANDOMX_EXE_ARGS) { + int_reg_t temp = *ibc.isrc; + *(int_reg_t*)ibc.isrc = *ibc.idst; + *ibc.idst = temp; + } + + static void exe_FSWAP_R(RANDOMX_EXE_ARGS) { + *ibc.fdst = rx_swap_vec_f128(*ibc.fdst); + } + + static void exe_FADD_R(RANDOMX_EXE_ARGS) { + *ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc); + } + + static void exe_FADD_M(RANDOMX_EXE_ARGS) { + rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad)); + *ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc); + } + + static void exe_FSUB_R(RANDOMX_EXE_ARGS) { + *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc); + } + + static void exe_FSUB_M(RANDOMX_EXE_ARGS) { + rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad)); + *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc); + } + + static void exe_FSCAL_R(RANDOMX_EXE_ARGS) { + const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000); + *ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask); + } + + static void exe_FMUL_R(RANDOMX_EXE_ARGS) { + *ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc); + } + + static void exe_FDIV_M(RANDOMX_EXE_ARGS) { + rx_vec_f128 fsrc = maskRegisterExponentMantissa( + config, + rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad)) + ); + *ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc); + } + + static void exe_FSQRT_R(RANDOMX_EXE_ARGS) { + *ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst); + } + + static void exe_CBRANCH(RANDOMX_EXE_ARGS) { + *ibc.idst += ibc.imm; + if ((*ibc.idst & ibc.memMask) == 0) { + pc = ibc.target; + } + } + + static void exe_CFROUND(RANDOMX_EXE_ARGS) { + rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4); + } + + static void exe_ISTORE(RANDOMX_EXE_ARGS) { + store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc); + } + protected: + static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) { + const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask); + const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask); + x = rx_and_vec_f128(x, xmantissaMask); + x = rx_or_vec_f128(x, xexponentMask); + return x; + } + + private: + static const int_reg_t zero; + int registerUsage[RegistersCount]; + NativeRegisterFile* nreg; + + static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) { + uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; + return scratchpad + addr; + } + +#ifdef RANDOMX_GEN_TABLE + static InstructionGenBytecode genTable[256]; + + void gen_IADD_RS(RANDOMX_GEN_ARGS); + void gen_IADD_M(RANDOMX_GEN_ARGS); + void gen_ISUB_R(RANDOMX_GEN_ARGS); + void gen_ISUB_M(RANDOMX_GEN_ARGS); + void gen_IMUL_R(RANDOMX_GEN_ARGS); + void gen_IMUL_M(RANDOMX_GEN_ARGS); + void gen_IMULH_R(RANDOMX_GEN_ARGS); + void gen_IMULH_M(RANDOMX_GEN_ARGS); + void gen_ISMULH_R(RANDOMX_GEN_ARGS); + void gen_ISMULH_M(RANDOMX_GEN_ARGS); + void gen_IMUL_RCP(RANDOMX_GEN_ARGS); + void gen_INEG_R(RANDOMX_GEN_ARGS); + void gen_IXOR_R(RANDOMX_GEN_ARGS); + void gen_IXOR_M(RANDOMX_GEN_ARGS); + void gen_IROR_R(RANDOMX_GEN_ARGS); + void gen_IROL_R(RANDOMX_GEN_ARGS); + void gen_ISWAP_R(RANDOMX_GEN_ARGS); + void gen_FSWAP_R(RANDOMX_GEN_ARGS); + void gen_FADD_R(RANDOMX_GEN_ARGS); + void gen_FADD_M(RANDOMX_GEN_ARGS); + void gen_FSUB_R(RANDOMX_GEN_ARGS); + void gen_FSUB_M(RANDOMX_GEN_ARGS); + void gen_FSCAL_R(RANDOMX_GEN_ARGS); + void gen_FMUL_R(RANDOMX_GEN_ARGS); + void gen_FDIV_M(RANDOMX_GEN_ARGS); + void gen_FSQRT_R(RANDOMX_GEN_ARGS); + void gen_CBRANCH(RANDOMX_GEN_ARGS); + void gen_CFROUND(RANDOMX_GEN_ARGS); + void gen_ISTORE(RANDOMX_GEN_ARGS); + void gen_NOP(RANDOMX_GEN_ARGS); +#endif + }; +} diff --git a/src/common.hpp b/src/common.hpp index e6682ff..960a99b 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -108,12 +108,15 @@ namespace randomx { #endif #if defined(_M_X64) || defined(__x86_64__) + #define RANDOMX_HAVE_COMPILER 1 class JitCompilerX86; using JitCompiler = JitCompilerX86; #elif defined(__aarch64__) + #define RANDOMX_HAVE_COMPILER 0 class JitCompilerA64; using JitCompiler = JitCompilerA64; #else + #define RANDOMX_HAVE_COMPILER 0 class JitCompilerFallback; using JitCompiler = JitCompilerFallback; #endif @@ -160,14 +163,14 @@ namespace randomx { uint8_t* memory = nullptr; }; + //register file in little-endian byte order struct RegisterFile { int_reg_t r[RegistersCount]; - fpu_reg_t f[RegistersCount / 2]; - fpu_reg_t e[RegistersCount / 2]; - fpu_reg_t a[RegistersCount / 2]; + fpu_reg_t f[RegisterCountFlt]; + fpu_reg_t e[RegisterCountFlt]; + fpu_reg_t a[RegisterCountFlt]; }; - typedef void(DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]); typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); diff --git a/src/instruction_weights.hpp b/src/instruction_weights.hpp index baafe82..f6c8873 100644 --- a/src/instruction_weights.hpp +++ b/src/instruction_weights.hpp @@ -71,43 +71,3 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define REPN(x,N) REPNX(x,N) #define NUM(x) x #define WT(x) NUM(RANDOMX_FREQ_##x) - -#define REPCASE0(x) -#define REPCASE1(x) case __COUNTER__: -#define REPCASE2(x) REPCASE1(x) case __COUNTER__: -#define REPCASE3(x) REPCASE2(x) case __COUNTER__: -#define REPCASE4(x) REPCASE3(x) case __COUNTER__: -#define REPCASE5(x) REPCASE4(x) case __COUNTER__: -#define REPCASE6(x) REPCASE5(x) case __COUNTER__: -#define REPCASE7(x) REPCASE6(x) case __COUNTER__: -#define REPCASE8(x) REPCASE7(x) case __COUNTER__: -#define REPCASE9(x) REPCASE8(x) case __COUNTER__: -#define REPCASE10(x) REPCASE9(x) case __COUNTER__: -#define REPCASE11(x) REPCASE10(x) case __COUNTER__: -#define REPCASE12(x) REPCASE11(x) case __COUNTER__: -#define REPCASE13(x) REPCASE12(x) case __COUNTER__: -#define REPCASE14(x) REPCASE13(x) case __COUNTER__: -#define REPCASE15(x) REPCASE14(x) case __COUNTER__: -#define REPCASE16(x) REPCASE15(x) case __COUNTER__: -#define REPCASE17(x) REPCASE16(x) case __COUNTER__: -#define REPCASE18(x) REPCASE17(x) case __COUNTER__: -#define REPCASE19(x) REPCASE18(x) case __COUNTER__: -#define REPCASE20(x) REPCASE19(x) case __COUNTER__: -#define REPCASE21(x) REPCASE20(x) case __COUNTER__: -#define REPCASE22(x) REPCASE21(x) case __COUNTER__: -#define REPCASE23(x) REPCASE22(x) case __COUNTER__: -#define REPCASE24(x) REPCASE23(x) case __COUNTER__: -#define REPCASE25(x) REPCASE24(x) case __COUNTER__: -#define REPCASE26(x) REPCASE25(x) case __COUNTER__: -#define REPCASE27(x) REPCASE26(x) case __COUNTER__: -#define REPCASE28(x) REPCASE27(x) case __COUNTER__: -#define REPCASE29(x) REPCASE28(x) case __COUNTER__: -#define REPCASE30(x) REPCASE29(x) case __COUNTER__: -#define REPCASE31(x) REPCASE30(x) case __COUNTER__: -#define REPCASE32(x) REPCASE31(x) case __COUNTER__: -#define REPCASE64(x) REPCASE32(x) REPCASE32(x) -#define REPCASE128(x) REPCASE64(x) REPCASE64(x) -#define REPCASE256(x) REPCASE128(x) REPCASE128(x) -#define REPCASENX(x,N) REPCASE##N(x) -#define REPCASEN(x,N) REPCASENX(x,N) -#define CASE_REP(x) REPCASEN(x, WT(x)) diff --git a/src/instructions_portable.cpp b/src/instructions_portable.cpp index 7d09690..8e9abaa 100644 --- a/src/instructions_portable.cpp +++ b/src/instructions_portable.cpp @@ -26,7 +26,6 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#pragma STDC FENV_ACCESS ON #include #include #include "common.hpp" diff --git a/src/reciprocal.c b/src/reciprocal.c index 3c46759..5552821 100644 --- a/src/reciprocal.c +++ b/src/reciprocal.c @@ -67,3 +67,11 @@ uint64_t randomx_reciprocal(uint64_t divisor) { return quotient; } + +#if !RANDOMX_HAVE_FAST_RECIPROCAL + +uint64_t randomx_reciprocal_fast(uint64_t divisor) { + return randomx_reciprocal(divisor); +} + +#endif diff --git a/src/reciprocal.h b/src/reciprocal.h index 69d2592..8858df2 100644 --- a/src/reciprocal.h +++ b/src/reciprocal.h @@ -30,6 +30,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#if defined(_M_X64) || defined(__x86_64__) +#define RANDOMX_HAVE_FAST_RECIPROCAL 1 +#else +#define RANDOMX_HAVE_FAST_RECIPROCAL 0 +#endif + #if defined(__cplusplus) extern "C" { #endif diff --git a/src/superscalar_program.hpp b/src/superscalar_program.hpp index 145006a..bff586c 100644 --- a/src/superscalar_program.hpp +++ b/src/superscalar_program.hpp @@ -52,7 +52,7 @@ namespace randomx { int getAddressRegister() { return addrReg; } - void setAddressRegister(uint32_t val) { + void setAddressRegister(int val) { addrReg = val; } diff --git a/src/tests/tests.cpp b/src/tests/tests.cpp new file mode 100644 index 0000000..6584217 --- /dev/null +++ b/src/tests/tests.cpp @@ -0,0 +1,1024 @@ +#ifdef NDEBUG +#undef NDEBUG +#endif + +#include +#include +#include "utility.hpp" +#include "../bytecode_machine.hpp" +#include "../dataset.hpp" +#include "../blake2/endian.h" +#include "../blake2/blake2.h" +#include "../blake2_generator.hpp" +#include "../superscalar.hpp" +#include "../reciprocal.h" +#include "../intrin_portable.h" +#include "../jit_compiler.hpp" + +struct CacheKey { + void* key; + size_t size = 0; +}; + +randomx_cache* cache; +randomx_vm* vm = nullptr; +CacheKey currentKey; + +template +void initCache(const char (&key)[N]) { + assert(cache != nullptr); + if (N - 1 == currentKey.size && memcmp(currentKey.key, key, N - 1) == 0) + return; + //std::cout << "randomx_init_cache with key "; + //outputHex(std::cout, key, N - 1); + //std::cout << std::endl; + randomx_init_cache(cache, key, N - 1); + currentKey.key = (void*)key; + currentKey.size = N - 1; + if (vm != nullptr) + randomx_vm_set_cache(vm, cache); +} + +template +void calcStringHash(const char(&key)[K], const char(&input)[H], void* output) { + initCache(key); + assert(vm != nullptr); + randomx_calculate_hash(vm, input, H - 1, output); +} + +template +void calcHexHash(const char(&key)[K], const char(&hex)[H], void* output) { + initCache(key); + assert(vm != nullptr); + char input[H / 2]; + hex2bin((char*)hex, H - 1, input); + randomx_calculate_hash(vm, input, sizeof(input), output); +} + +int testNo = 0; +int skipped = 0; + +template +void runTest(const char* name, bool condition, FUNC f) { + std::cout << "["; + std::cout.width(2); + std::cout << std::right << ++testNo << "] "; + std::cout.width(40); + std::cout << std::left << name << " ... "; + std::cout.flush(); + if (condition) { + f(); + std::cout << "PASSED" << std::endl; + } + else { + std::cout << "SKIPPED" << std::endl; + skipped++; + } +} + +int main() { + char testHash[32]; + + //std::cout << "Allocating randomx_cache..." << std::endl; + cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); + + runTest("Cache initialization", RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { + initCache("test key 000"); + uint64_t* cacheMemory = (uint64_t*)cache->memory; + assert(cacheMemory[0] == 0x191e0e1d23c02186); + assert(cacheMemory[1568413] == 0xf1b62fe6210bf8b1); + assert(cacheMemory[33554431] == 0x1f47f056d05cd99b); + }); + + runTest("SuperscalarHash generator", RANDOMX_SUPERSCALAR_LATENCY == 170, []() { + char sprogHash[32]; + randomx::SuperscalarProgram sprog; + const char key[] = "test key 000"; + constexpr size_t keySize = sizeof(key) - 1; + randomx::Blake2Generator gen(key, keySize); + + const char superscalarReferences[10][65] = { + "d3a4a6623738756f77e6104469102f082eff2a3e60be7ad696285ef7dfc72a61", + "f5e7e0bbc7e93c609003d6359208688070afb4a77165a552ff7be63b38dfbc86", + "85ed8b11734de5b3e9836641413a8f36e99e89694f419c8cd25c3f3f16c40c5a", + "5dd956292cf5d5704ad99e362d70098b2777b2a1730520be52f772ca48cd3bc0", + "6f14018ca7d519e9b48d91af094c0f2d7e12e93af0228782671a8640092af9e5", + "134be097c92e2c45a92f23208cacd89e4ce51f1009a0b900dbe83b38de11d791", + "268f9392c20c6e31371a5131f82bd7713d3910075f2f0468baafaa1abd2f3187", + "c668a05fd909714ed4a91e8d96d67b17e44329e88bc71e0672b529a3fc16be47", + "99739351315840963011e4c5d8e90ad0bfed3facdcb713fe8f7138fbf01c4c94", + "14ab53d61880471f66e80183968d97effd5492b406876060e595fcf9682f9295", + }; + + for (int i = 0; i < 10; ++i) { + randomx::generateSuperscalar(sprog, gen); + blake2b(sprogHash, sizeof(sprogHash), &sprog.programBuffer, sizeof(randomx::Instruction) * sprog.getSize(), nullptr, 0); + assert(equalsHex(sprogHash, superscalarReferences[i])); + } + }); + + runTest("Dataset initialization (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { + initCache("test key 000"); + uint64_t datasetItem[8]; + randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 0); + assert(datasetItem[0] == 0x680588a85ae222db); + randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 10000000); + assert(datasetItem[0] == 0x7943a1f6186ffb72); + randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 20000000); + assert(datasetItem[0] == 0x9035244d718095e1); + randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 30000000); + assert(datasetItem[0] == 0x145a5091f7853099); + }); + + runTest("Dataset initialization (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { + initCache("test key 000"); + randomx::JitCompiler jit; + jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache); + jit.generateDatasetInitCode(); + uint64_t datasetItem[8]; + jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 0, 1); + assert(datasetItem[0] == 0x680588a85ae222db); + jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 10000000, 10000001); + assert(datasetItem[0] == 0x7943a1f6186ffb72); + jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 20000000, 20000001); + assert(datasetItem[0] == 0x9035244d718095e1); + jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 30000000, 30000001); + assert(datasetItem[0] == 0x145a5091f7853099); + }); + + runTest("randomx_reciprocal", true, []() { + assert(randomx_reciprocal(3) == 12297829382473034410U); + assert(randomx_reciprocal(13) == 11351842506898185609U); + assert(randomx_reciprocal(33) == 17887751829051686415U); + assert(randomx_reciprocal(65537) == 18446462603027742720U); + assert(randomx_reciprocal(15000001) == 10316166306300415204U); + assert(randomx_reciprocal(3845182035) == 10302264209224146340U); + assert(randomx_reciprocal(0xffffffff) == 9223372039002259456U); + }); + + runTest("randomx_reciprocal_fast", RANDOMX_HAVE_FAST_RECIPROCAL, []() { + assert(randomx_reciprocal_fast(3) == 12297829382473034410U); + assert(randomx_reciprocal_fast(13) == 11351842506898185609U); + assert(randomx_reciprocal_fast(33) == 17887751829051686415U); + assert(randomx_reciprocal_fast(65537) == 18446462603027742720U); + assert(randomx_reciprocal_fast(15000001) == 10316166306300415204U); + assert(randomx_reciprocal_fast(3845182035) == 10302264209224146340U); + assert(randomx_reciprocal_fast(0xffffffff) == 9223372039002259456U); + }); + + randomx::NativeRegisterFile reg; + randomx::BytecodeMachine decoder; + randomx::InstructionByteCode ibc; + alignas(16) randomx::ProgramConfiguration config; + constexpr int registerHigh = 192; + constexpr int registerDst = 0; + constexpr int registerSrc = 1; + int pc = 0; + constexpr uint32_t imm32 = 3234567890; + constexpr uint64_t imm64 = signExtend2sCompl(imm32); + + decoder.beginCompilation(reg); + + runTest("IADD_RS (decode)", RANDOMX_FREQ_IADD_RS > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IADD_RS - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.mod = UINT8_MAX; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IADD_RS); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.shift == 3); + assert(ibc.imm == 0); + }); + + runTest("IADD_RS (execute)", RANDOMX_FREQ_IADD_RS > 0, [&] { + reg.r[registerDst] = 0x8000000000000000; + reg.r[registerSrc] = 0x1000000000000000; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0); + }); + + runTest("IADD_RS with immediate (decode)", RANDOMX_FREQ_IADD_RS > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IADD_RS - 1; + instr.mod = 8; + instr.dst = registerHigh | randomx::RegisterNeedsDisplacement; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IADD_RS); + assert(ibc.idst == ®.r[randomx::RegisterNeedsDisplacement]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.shift == 2); + assert(ibc.imm == imm64); + }); + + runTest("IADD_RS with immediate (decode)", RANDOMX_FREQ_IADD_RS > 0, [&] { + reg.r[randomx::RegisterNeedsDisplacement] = 0x8000000000000000; + reg.r[registerSrc] = 0x2000000000000000; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[randomx::RegisterNeedsDisplacement] == imm64); + }); + + runTest("IADD_M (decode)", RANDOMX_FREQ_IADD_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IADD_M - 1; + instr.mod = 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IADD_M); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL1Mask); + }); + + runTest("ISUB_R (decode)", RANDOMX_FREQ_ISUB_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISUB_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISUB_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("ISUB_R (execute)", RANDOMX_FREQ_ISUB_R > 0, [&] { + reg.r[registerDst] = 1; + reg.r[registerSrc] = 0xFFFFFFFF; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0xFFFFFFFF00000002); + }); + + runTest("ISUB_R with immediate (decode)", RANDOMX_FREQ_ISUB_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISUB_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISUB_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == &ibc.imm); + }); + + runTest("ISUB_R with immediate (decode)", RANDOMX_FREQ_ISUB_R > 0, [&] { + reg.r[registerDst] = 0; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == (~imm64 + 1)); + }); + + runTest("ISUB_M (decode)", RANDOMX_FREQ_ISUB_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISUB_M - 1; + instr.mod = 0; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISUB_M); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL2Mask); + }); + + runTest("IMUL_R (decode)", RANDOMX_FREQ_IMUL_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMUL_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMUL_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("IMUL_R (execute)", RANDOMX_FREQ_IMUL_R > 0, [&] { + reg.r[registerDst] = 0xBC550E96BA88A72B; + reg.r[registerSrc] = 0xF5391FA9F18D6273; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0x28723424A9108E51); + }); + + runTest("IMUL_R with immediate (decode)", RANDOMX_FREQ_IMUL_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMUL_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMUL_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == &ibc.imm); + }); + + runTest("IMUL_R with immediate (execute)", RANDOMX_FREQ_IMUL_R > 0, [&] { + reg.r[registerDst] = 1; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == imm64); + }); + + runTest("IMUL_M (decode)", RANDOMX_FREQ_IMUL_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMUL_M - 1; + instr.mod = 0; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMUL_M); + assert(ibc.idst == ®.r[registerDst]); + assert(*ibc.isrc == 0); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL3Mask); + }); + + runTest("IMULH_R (decode)", RANDOMX_FREQ_IMULH_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMULH_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMULH_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("IMULH_R (execute)", RANDOMX_FREQ_IMULH_R > 0, [&] { + reg.r[registerDst] = 0xBC550E96BA88A72B; + reg.r[registerSrc] = 0xF5391FA9F18D6273; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0xB4676D31D2B34883); + }); + + runTest("IMULH_R squared (decode)", RANDOMX_FREQ_IMULH_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMULH_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMULH_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerDst]); + }); + + runTest("IMULH_M (decode)", RANDOMX_FREQ_IMULH_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMULH_M - 1; + instr.mod = 0; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMULH_M); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL2Mask); + }); + + runTest("ISMULH_R (decode)", RANDOMX_FREQ_ISMULH_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISMULH_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISMULH_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("ISMULH_R (execute)", RANDOMX_FREQ_ISMULH_R > 0, [&] { + reg.r[registerDst] = 0xBC550E96BA88A72B; + reg.r[registerSrc] = 0xF5391FA9F18D6273; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0x02D93EF1269D3EE5); + }); + + runTest("ISMULH_R squared (decode)", RANDOMX_FREQ_ISMULH_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISMULH_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISMULH_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerDst]); + }); + + runTest("ISMULH_M (decode)", RANDOMX_FREQ_ISMULH_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISMULH_M - 1; + instr.mod = 3; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISMULH_M); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL1Mask); + }); + + runTest("IMUL_RCP (decode)", RANDOMX_FREQ_IMUL_RCP > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMUL_RCP - 1; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IMUL_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == &ibc.imm); + assert(ibc.imm == randomx_reciprocal(imm32)); + }); + + runTest("IMUL_RCP zero imm32 (decode)", RANDOMX_FREQ_IMUL_RCP > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IMUL_RCP - 1; + instr.setImm32(0); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::NOP); + }); + + runTest("INEG_R (decode)", RANDOMX_FREQ_INEG_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_INEG_R - 1; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::INEG_R); + assert(ibc.idst == ®.r[registerDst]); + }); + + runTest("INEG_R (execute)", RANDOMX_FREQ_INEG_R > 0, [&] { + reg.r[registerDst] = 0xFFFFFFFFFFFFFFFF; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 1); + }); + + runTest("IXOR_R (decode)", RANDOMX_FREQ_IXOR_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IXOR_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IXOR_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("IXOR_R (execute)", RANDOMX_FREQ_IMUL_R > 0, [&] { + reg.r[registerDst] = 0x8888888888888888; + reg.r[registerSrc] = 0xAAAAAAAAAAAAAAAA; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0x2222222222222222); + }); + + runTest("IXOR_R with immediate (decode)", RANDOMX_FREQ_IXOR_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IXOR_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IXOR_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == &ibc.imm); + }); + + runTest("IXOR_R with immediate (execute)", RANDOMX_FREQ_IXOR_R > 0, [&] { + reg.r[registerDst] = 0xFFFFFFFFFFFFFFFF; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == ~imm64); + }); + + runTest("IXOR_M (decode)", RANDOMX_FREQ_IXOR_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IXOR_M - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IXOR_M); + assert(ibc.idst == ®.r[registerDst]); + assert(*ibc.isrc == 0); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL3Mask); + }); + + runTest("IROR_R (decode)", RANDOMX_FREQ_IROR_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IROR_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IROR_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("IROR_R (execute)", RANDOMX_FREQ_IROR_R > 0, [&] { + reg.r[registerDst] = 953360005391419562; + reg.r[registerSrc] = 4569451684712230561; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 0xD835C455069D81EF); + }); + + runTest("IROL_R (decode)", RANDOMX_FREQ_IROL_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_IROL_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::IROL_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("IROL_R (execute)", RANDOMX_FREQ_IROL_R > 0, [&] { + reg.r[registerDst] = 953360005391419562; + reg.r[registerSrc] = 4569451684712230561; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 6978065200552740799); + }); + + runTest("ISWAP_R (decode)", RANDOMX_FREQ_ISWAP_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISWAP_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISWAP_R); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + }); + + runTest("ISWAP_R (execute)", RANDOMX_FREQ_ISWAP_R > 0, [&] { + reg.r[registerDst] = 953360005391419562; + reg.r[registerSrc] = 4569451684712230561; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(reg.r[registerDst] == 4569451684712230561); + assert(reg.r[registerSrc] == 953360005391419562); + }); + + runTest("FSWAP_R (decode)", RANDOMX_FREQ_FSWAP_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FSWAP_R - 1; + instr.dst = registerHigh | registerDst; + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FSWAP_R); + assert(ibc.fdst == ®.f[registerDst]); + }); + + runTest("FSWAP_R (execute)", RANDOMX_FREQ_FSWAP_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(953360005391419562, 4569451684712230561); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex((const char*)&vec, "aa886bb0df033b0da12e95e518f4693f")); + }); + + runTest("FADD_R (decode)", RANDOMX_FREQ_FADD_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FADD_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FADD_R); + assert(ibc.fdst == ®.f[registerDst]); + assert(ibc.fsrc == ®.a[registerSrc]); + }); + + runTest("FADD_R RoundToNearest (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); + reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); + rx_set_rounding_mode(RoundToNearest); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex(&vec, "b932e048a730cec1fea6ea633bcc2d40")); + }); + + runTest("FADD_R RoundDown (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); + reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); + rx_set_rounding_mode(RoundDown); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex(&vec, "b932e048a730cec1fda6ea633bcc2d40")); + }); + + runTest("FADD_R RoundUp (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); + reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); + rx_set_rounding_mode(RoundUp); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex(&vec, "b832e048a730cec1fea6ea633bcc2d40")); + }); + + runTest("FADD_R RoundToZero (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); + reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); + rx_set_rounding_mode(RoundToZero); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex(&vec, "b832e048a730cec1fda6ea633bcc2d40")); + }); + + runTest("FADD_M (decode)", RANDOMX_FREQ_FADD_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FADD_M - 1; + instr.mod = 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FADD_M); + assert(ibc.fdst == ®.f[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL1Mask); + }); + + runTest("FADD_M (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { + uint64_t mockScratchpad; + store64(&mockScratchpad, 0x1234567890abcdef); + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(0, 0); + reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; + rx_set_rounding_mode(RoundToNearest); + decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex(&vec, "000040840cd5dbc1000000785634b241")); + }); + + runTest("FSUB_R (decode)", RANDOMX_FREQ_FSUB_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FSUB_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FSUB_R); + assert(ibc.fdst == ®.f[registerDst]); + assert(ibc.fsrc == ®.a[registerSrc]); + }); + + runTest("FSUB_M (decode)", RANDOMX_FREQ_FSUB_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FSUB_M - 1; + instr.mod = 2; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FSUB_M); + assert(ibc.fdst == ®.f[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL1Mask); + }); + + runTest("FSCAL_R (decode)", RANDOMX_FREQ_FSCAL_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FSCAL_R - 1; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FSCAL_R); + assert(ibc.fdst == ®.f[registerDst]); + }); + + runTest("FSCAL_R (execute)", RANDOMX_FREQ_FSCAL_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.f[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.f[registerDst]); + assert(equalsHex((const char*)&vec, "073d17b6abfd0dc0838724ef5cc32bc1")); + }); + + runTest("FMUL_R (decode)", RANDOMX_FREQ_FMUL_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FMUL_R - 1; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FMUL_R); + assert(ibc.fdst == ®.e[registerDst]); + assert(ibc.fsrc == ®.a[registerSrc]); + }); + + runTest("FMUL_R RoundToNearest (execute)", RANDOMX_FREQ_FMUL_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.e[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); + reg.a[registerSrc] = rx_set_vec_f128(0x40eba861aa31c7c0, 0x41c4561212ae2d50); + rx_set_rounding_mode(RoundToNearest); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "69697aff350fd3422f1589cdecfed742")); + }); + + runTest("FMUL_R RoundDown/RoundToZero (execute)", RANDOMX_FREQ_FMUL_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.e[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); + reg.a[registerSrc] = rx_set_vec_f128(0x40eba861aa31c7c0, 0x41c4561212ae2d50); + rx_set_rounding_mode(RoundDown); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "69697aff350fd3422e1589cdecfed742")); + }); + + runTest("FMUL_R RoundUp (execute)", RANDOMX_FREQ_FMUL_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.e[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); + reg.a[registerSrc] = rx_set_vec_f128(0x40eba861aa31c7c0, 0x41c4561212ae2d50); + rx_set_rounding_mode(RoundUp); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "6a697aff350fd3422f1589cdecfed742")); + }); + + runTest("FDIV_M (decode)", RANDOMX_FREQ_FDIV_M > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FDIV_M - 1; + instr.mod = 3; + instr.dst = registerHigh | registerDst; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FDIV_M); + assert(ibc.fdst == ®.e[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL1Mask); + }); + + runTest("FDIV_M RoundToNearest (execute)", RANDOMX_FREQ_FDIV_M > 0, [&] { + alignas(16) uint64_t vec[2]; + alignas(16) uint32_t mockScratchpad[2]; + store32(&mockScratchpad[0], 0xd350a1b6); + store32(&mockScratchpad[1], 0x8b2460d9); + store64(&config.eMask[0], 0x3a0000000005d11a); + store64(&config.eMask[1], 0x39000000001ba31e); + reg.e[registerDst] = rx_set_vec_f128(0x41937f76fede16ee, 0x411b414296ce93b6); + reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; + rx_set_rounding_mode(RoundToNearest); + decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "e7b269639484434632474a66635ba547")); + }); + + runTest("FDIV_M RoundDown/RoundToZero (execute)", RANDOMX_FREQ_FDIV_M > 0, [&] { + alignas(16) uint64_t vec[2]; + alignas(16) uint32_t mockScratchpad[2]; + store32(&mockScratchpad[0], 0xd350a1b6); + store32(&mockScratchpad[1], 0x8b2460d9); + store64(&config.eMask[0], 0x3a0000000005d11a); + store64(&config.eMask[1], 0x39000000001ba31e); + reg.e[registerDst] = rx_set_vec_f128(0x41937f76fede16ee, 0x411b414296ce93b6); + reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; + rx_set_rounding_mode(RoundDown); + decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "e6b269639484434632474a66635ba547")); + }); + + runTest("FDIV_M RoundUp (execute)", RANDOMX_FREQ_FDIV_M > 0, [&] { + alignas(16) uint64_t vec[2]; + alignas(16) uint32_t mockScratchpad[2]; + store32(&mockScratchpad[0], 0xd350a1b6); + store32(&mockScratchpad[1], 0x8b2460d9); + store64(&config.eMask[0], 0x3a0000000005d11a); + store64(&config.eMask[1], 0x39000000001ba31e); + reg.e[registerDst] = rx_set_vec_f128(0x41937f76fede16ee, 0x411b414296ce93b6); + reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; + rx_set_rounding_mode(RoundUp); + decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "e7b269639484434633474a66635ba547")); + }); + + runTest("FSQRT_R (decode)", RANDOMX_FREQ_FSQRT_R > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_FSQRT_R - 1; + instr.dst = registerHigh | registerDst; + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::FSQRT_R); + assert(ibc.fdst == ®.e[registerDst]); + }); + + runTest("FSQRT_R RoundToNearest (execute)", RANDOMX_FREQ_FSQRT_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.e[registerDst] = rx_set_vec_f128(0x41b6b21c11affea7, 0x40526a7e778d9824); + rx_set_rounding_mode(RoundToNearest); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "e81f300b612a21408dbaa33f570ed340")); + }); + + runTest("FSQRT_R RoundDown/RoundToZero (execute)", RANDOMX_FREQ_FSQRT_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.e[registerDst] = rx_set_vec_f128(0x41b6b21c11affea7, 0x40526a7e778d9824); + rx_set_rounding_mode(RoundDown); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "e81f300b612a21408cbaa33f570ed340")); + }); + + runTest("FSQRT_R RoundUp (execute)", RANDOMX_FREQ_FSQRT_R > 0, [&] { + alignas(16) uint64_t vec[2]; + reg.e[registerDst] = rx_set_vec_f128(0x41b6b21c11affea7, 0x40526a7e778d9824); + rx_set_rounding_mode(RoundUp); + decoder.executeInstruction(ibc, pc, nullptr, config); + rx_store_vec_f128((double*)&vec, reg.e[registerDst]); + assert(equalsHex(&vec, "e91f300b612a21408dbaa33f570ed340")); + }); + + runTest("CBRANCH (decode) 100", RANDOMX_FREQ_CBRANCH > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_CBRANCH - 1; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + instr.mod = 48; + decoder.compileInstruction(instr, 100, ibc); + assert(ibc.type == randomx::InstructionType::CBRANCH); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.imm == 0xFFFFFFFFC0CB9AD2); + assert(ibc.memMask == 0x7F800); + assert(ibc.target == pc); + }); + + runTest("CBRANCH (decode) 200", RANDOMX_FREQ_CBRANCH > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_CBRANCH - 1; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + instr.mod = 48; + decoder.compileInstruction(instr, pc = 200, ibc); + assert(ibc.type == randomx::InstructionType::CBRANCH); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.imm == 0xFFFFFFFFC0CB9AD2); + assert(ibc.memMask == 0x7F800); + assert(ibc.target == 100); + }); + + runTest("CBRANCH not taken (execute)", RANDOMX_FREQ_CBRANCH > 0, [&] { + reg.r[registerDst] = 0; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(pc == 200); + }); + + runTest("CBRANCH taken (execute)", RANDOMX_FREQ_CBRANCH > 0, [&] { + reg.r[registerDst] = 0xFFFFFFFFFFFC6800; + decoder.executeInstruction(ibc, pc, nullptr, config); + assert(pc == ibc.target); + }); + + runTest("CFROUND (decode)", RANDOMX_FREQ_CFROUND > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_CFROUND - 1; + instr.src = registerHigh | registerSrc; + instr.setImm32(imm32); + decoder.compileInstruction(instr, 100, ibc); + assert(ibc.type == randomx::InstructionType::CFROUND); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == 18); + }); + + runTest("ISTORE L1 (decode)", RANDOMX_FREQ_ISTORE > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISTORE - 1; + instr.src = registerHigh | registerSrc; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + instr.mod = 1; + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISTORE); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL1Mask); + }); + + runTest("ISTORE L2 (decode)", RANDOMX_FREQ_ISTORE > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISTORE - 1; + instr.src = registerHigh | registerSrc; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + instr.mod = 0; + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISTORE); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL2Mask); + }); + + runTest("ISTORE L3 (decode)", RANDOMX_FREQ_ISTORE > 0, [&] { + randomx::Instruction instr; + instr.opcode = randomx::ceil_ISTORE - 1; + instr.src = registerHigh | registerSrc; + instr.dst = registerHigh | registerDst; + instr.setImm32(imm32); + instr.mod = 224; + decoder.compileInstruction(instr, pc, ibc); + assert(ibc.type == randomx::InstructionType::ISTORE); + assert(ibc.idst == ®.r[registerDst]); + assert(ibc.isrc == ®.r[registerSrc]); + assert(ibc.imm == imm64); + assert(ibc.memMask == randomx::ScratchpadL3Mask); + }); + + vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr); + + auto test_a = [&] { + char hash[RANDOMX_HASH_SIZE]; + calcStringHash("test key 000", "This is a test", &hash); + assert(equalsHex(hash, "207d7cedf2a16590bd33d758e413ad129ce9888e05417984f46296252a7ba3d0")); + }; + + auto test_b = [&] { + char hash[RANDOMX_HASH_SIZE]; + calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash); + assert(equalsHex(hash, "76dd2da840d56d38153e0beaca33e7f862c5ead91a052380d99f3a62bf84579b")); + }; + + auto test_c = [&] { + char hash[RANDOMX_HASH_SIZE]; + calcStringHash("test key 000", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); + assert(equalsHex(hash, "109f6a405efe09d302336dce4389127e33aa62d4c782aca7797a628e87839a61")); + }; + + auto test_d = [&] { + char hash[RANDOMX_HASH_SIZE]; + calcStringHash("test key 001", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); + assert(equalsHex(hash, "3cbb82edf9541ab80233cdc47384cea719c8567a8bbaca8f3ff038488ce9c16c")); + }; + + auto test_e = [&] { + char hash[RANDOMX_HASH_SIZE]; + calcHexHash("test key 001", "0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416", &hash); + + //outputHex(std::cout, (const char*)hash, sizeof(hash)); + //std::cout << std::endl; + + assert(equalsHex(hash, "e003ef128b1f96d99d4a0490e03253ef11186002a8ec018cbd4e07b8ec8c82e8")); + }; + + runTest("Hash test 1a (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a); + + runTest("Hash test 1b (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_b); + + runTest("Hash test 1c (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_c); + + runTest("Hash test 1d (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_d); + + runTest("Hash test 1e (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e); + + randomx_release_cache(cache); + cache = randomx_alloc_cache(RANDOMX_FLAG_JIT); + currentKey.size = 0; + randomx_destroy_vm(vm); + vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr); + + runTest("Hash test 2a (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a); + + runTest("Hash test 2b (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_b); + + runTest("Hash test 2c (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_c); + + runTest("Hash test 2d (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_d); + + runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e); + + std::cout << std::endl << "All tests PASSED" << std::endl; + + if (skipped) { + std::cout << skipped << " tests were SKIPPED due to incompatible configuration (see above)" << std::endl; + } +} \ No newline at end of file diff --git a/src/tests/utility.hpp b/src/tests/utility.hpp index a036aee..d294f67 100644 --- a/src/tests/utility.hpp +++ b/src/tests/utility.hpp @@ -41,6 +41,36 @@ inline void outputHex(std::ostream& os, const char* data, int length) { } } +char parseNibble(char hex) { + hex &= ~0x20; + if (hex & 0x40) { + hex -= 'A' - 10; + } + else { + hex &= 0xf; + } + return hex; +} + +void hex2bin(char *in, int length, char *out) { + for (int i = 0; i < length; i += 2) { + char nibble1 = parseNibble(*in++); + char nibble2 = parseNibble(*in++); + *out++ = nibble1 << 4 | nibble2; + } +} + +constexpr bool stringsEqual(char const * a, char const * b) { + return *a == *b && (*a == '\0' || stringsEqual(a + 1, b + 1)); +} + +template +bool equalsHex(const void* hash, const char (&hex)[N]) { + char reference[N / 2]; + hex2bin((char*)hex, N - 1, reference); + return memcmp(hash, reference, sizeof(reference)) == 0; +} + inline void dump(const char* buffer, uint64_t count, const char* name) { std::ofstream fout(name, std::ios::out | std::ios::binary); fout.write(buffer, count); diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index dd5f217..64243c3 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -39,8 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - static int_reg_t Zero = 0; - template void InterpretedVm::setDataset(randomx_dataset* dataset) { datasetPtr = dataset; @@ -54,223 +52,64 @@ namespace randomx { execute(); } - template - void InterpretedVm::executeBytecode(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { - for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) { - executeBytecode(pc, r, f, e, a); - } - } - - template - FORCE_INLINE void* InterpretedVm::getScratchpadAddress(InstructionByteCode& ibc) { - uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; - return scratchpad + addr; - } - - template - FORCE_INLINE rx_vec_f128 InterpretedVm::maskRegisterExponentMantissa(rx_vec_f128 x) { - const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask); - const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask); - x = rx_and_vec_f128(x, xmantissaMask); - x = rx_or_vec_f128(x, xexponentMask); - return x; - } - - template - void InterpretedVm::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { - auto& ibc = byteCode[pc]; - switch (ibc.type) - { - case InstructionType::IADD_RS: { - *ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm; - } break; - - case InstructionType::IADD_M: { - *ibc.idst += load64(getScratchpadAddress(ibc)); - } break; - - case InstructionType::ISUB_R: { - *ibc.idst -= *ibc.isrc; - } break; - - case InstructionType::ISUB_M: { - *ibc.idst -= load64(getScratchpadAddress(ibc)); - } break; - - case InstructionType::IMUL_R: { //also handles IMUL_RCP - *ibc.idst *= *ibc.isrc; - } break; - - case InstructionType::IMUL_M: { - *ibc.idst *= load64(getScratchpadAddress(ibc)); - } break; - - case InstructionType::IMULH_R: { - *ibc.idst = mulh(*ibc.idst, *ibc.isrc); - } break; - - case InstructionType::IMULH_M: { - *ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc))); - } break; - - case InstructionType::ISMULH_R: { - *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc)); - } break; - - case InstructionType::ISMULH_M: { - *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc)))); - } break; - - case InstructionType::INEG_R: { - *ibc.idst = ~(*ibc.idst) + 1; //two's complement negative - } break; - - case InstructionType::IXOR_R: { - *ibc.idst ^= *ibc.isrc; - } break; - - case InstructionType::IXOR_M: { - *ibc.idst ^= load64(getScratchpadAddress(ibc)); - } break; - - case InstructionType::IROR_R: { - *ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63); - } break; - - case InstructionType::IROL_R: { - *ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63); - } break; - - case InstructionType::ISWAP_R: { - int_reg_t temp = *ibc.isrc; - *ibc.isrc = *ibc.idst; - *ibc.idst = temp; - } break; - - case InstructionType::FSWAP_R: { - *ibc.fdst = rx_swap_vec_f128(*ibc.fdst); - } break; - - case InstructionType::FADD_R: { - *ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc); - } break; - - case InstructionType::FADD_M: { - rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc)); - *ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc); - } break; - - case InstructionType::FSUB_R: { - *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc); - } break; - - case InstructionType::FSUB_M: { - rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc)); - *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc); - } break; - - case InstructionType::FSCAL_R: { - const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000); - *ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask); - } break; - - case InstructionType::FMUL_R: { - *ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc); - } break; - - case InstructionType::FDIV_M: { - rx_vec_f128 fsrc = maskRegisterExponentMantissa(rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc))); - *ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc); - } break; - - case InstructionType::FSQRT_R: { - *ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst); - } break; - - case InstructionType::CBRANCH: { - *ibc.isrc += ibc.imm; - if ((*ibc.isrc & ibc.memMask) == 0) { - pc = ibc.target; - } - } break; - - case InstructionType::CFROUND: { - rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4); - } break; - - case InstructionType::ISTORE: { - store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc); - } break; - - case InstructionType::NOP: { - //nothing - } break; - - default: - UNREACHABLE; - } - } - template void InterpretedVm::execute() { - int_reg_t r[RegistersCount] = { 0 }; - rx_vec_f128 f[RegisterCountFlt]; - rx_vec_f128 e[RegisterCountFlt]; - rx_vec_f128 a[RegisterCountFlt]; + + NativeRegisterFile nreg; for(unsigned i = 0; i < RegisterCountFlt; ++i) - a[i] = rx_load_vec_f128(®.a[i].lo); + nreg.a[i] = rx_load_vec_f128(®.a[i].lo); - precompileProgram(r, f, e, a); + compileProgram(program, bytecode, nreg); uint32_t spAddr0 = mem.mx; uint32_t spAddr1 = mem.ma; for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { - uint64_t spMix = r[config.readReg0] ^ r[config.readReg1]; + uint64_t spMix = nreg.r[config.readReg0] ^ nreg.r[config.readReg1]; spAddr0 ^= spMix; spAddr0 &= ScratchpadL3Mask64; spAddr1 ^= spMix >> 32; spAddr1 &= ScratchpadL3Mask64; for (unsigned i = 0; i < RegistersCount; ++i) - r[i] ^= load64(scratchpad + spAddr0 + 8 * i); + nreg.r[i] ^= load64(scratchpad + spAddr0 + 8 * i); for (unsigned i = 0; i < RegisterCountFlt; ++i) - f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i); + nreg.f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i); for (unsigned i = 0; i < RegisterCountFlt; ++i) - e[i] = maskRegisterExponentMantissa(rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i))); + nreg.e[i] = maskRegisterExponentMantissa(config, rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i))); - executeBytecode(r, f, e, a); + executeBytecode(bytecode, scratchpad, config); - mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; + mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3]; mem.mx &= CacheLineAlignMask; datasetPrefetch(datasetOffset + mem.mx); - datasetRead(datasetOffset + mem.ma, r); + datasetRead(datasetOffset + mem.ma, nreg.r); std::swap(mem.mx, mem.ma); for (unsigned i = 0; i < RegistersCount; ++i) - store64(scratchpad + spAddr1 + 8 * i, r[i]); + store64(scratchpad + spAddr1 + 8 * i, nreg.r[i]); for (unsigned i = 0; i < RegisterCountFlt; ++i) - f[i] = rx_xor_vec_f128(f[i], e[i]); + nreg.f[i] = rx_xor_vec_f128(nreg.f[i], nreg.e[i]); for (unsigned i = 0; i < RegisterCountFlt; ++i) - rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), f[i]); + rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), nreg.f[i]); spAddr0 = 0; spAddr1 = 0; } for (unsigned i = 0; i < RegistersCount; ++i) - store64(®.r[i], r[i]); + store64(®.r[i], nreg.r[i]); for (unsigned i = 0; i < RegisterCountFlt; ++i) - rx_store_vec_f128(®.f[i].lo, f[i]); + rx_store_vec_f128(®.f[i].lo, nreg.f[i]); for (unsigned i = 0; i < RegisterCountFlt; ++i) - rx_store_vec_f128(®.e[i].lo, e[i]); + rx_store_vec_f128(®.e[i].lo, nreg.e[i]); } template @@ -285,391 +124,6 @@ namespace randomx { rx_prefetch_nta(mem.memory + address); } -#include "instruction_weights.hpp" - - template - void InterpretedVm::precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { - int registerUsage[RegistersCount]; - for (unsigned i = 0; i < RegistersCount; ++i) { - registerUsage[i] = -1; - } - for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { - auto& instr = program(i); - auto& ibc = byteCode[i]; - switch (instr.opcode) { - CASE_REP(IADD_RS) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IADD_RS; - ibc.idst = &r[dst]; - if (dst != RegisterNeedsDisplacement) { - ibc.isrc = &r[src]; - ibc.shift = instr.getModShift(); - ibc.imm = 0; - } - else { - ibc.isrc = &r[src]; - ibc.shift = instr.getModShift(); - ibc.imm = signExtend2sCompl(instr.getImm32()); - } - registerUsage[dst] = i; - } break; - - CASE_REP(IADD_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IADD_M; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &Zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - } break; - - CASE_REP(ISUB_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISUB_R; - ibc.idst = &r[dst]; - if (src != dst) { - ibc.isrc = &r[src]; - } - else { - ibc.imm = signExtend2sCompl(instr.getImm32()); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - } break; - - CASE_REP(ISUB_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISUB_M; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &Zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IMUL_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMUL_R; - ibc.idst = &r[dst]; - if (src != dst) { - ibc.isrc = &r[src]; - } - else { - ibc.imm = signExtend2sCompl(instr.getImm32()); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IMUL_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMUL_M; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &Zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IMULH_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMULH_R; - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - registerUsage[dst] = i; - } break; - - CASE_REP(IMULH_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMULH_M; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &Zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - } break; - - CASE_REP(ISMULH_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISMULH_R; - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - registerUsage[dst] = i; - } break; - - CASE_REP(ISMULH_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISMULH_M; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &Zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IMUL_RCP) { - uint64_t divisor = instr.getImm32(); - if (!isPowerOf2(divisor)) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::IMUL_R; - ibc.idst = &r[dst]; - ibc.imm = randomx_reciprocal(divisor); - ibc.isrc = &ibc.imm; - registerUsage[dst] = i; - } - else { - ibc.type = InstructionType::NOP; - } - } break; - - CASE_REP(INEG_R) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::INEG_R; - ibc.idst = &r[dst]; - registerUsage[dst] = i; - } break; - - CASE_REP(IXOR_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IXOR_R; - ibc.idst = &r[dst]; - if (src != dst) { - ibc.isrc = &r[src]; - } - else { - ibc.imm = signExtend2sCompl(instr.getImm32()); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IXOR_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IXOR_M; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &Zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IROR_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IROR_R; - ibc.idst = &r[dst]; - if (src != dst) { - ibc.isrc = &r[src]; - } - else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - } break; - - CASE_REP(IROL_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IROL_R; - ibc.idst = &r[dst]; - if (src != dst) { - ibc.isrc = &r[src]; - } - else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - } break; - - CASE_REP(ISWAP_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - if (src != dst) { - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - ibc.type = InstructionType::ISWAP_R; - registerUsage[dst] = i; - registerUsage[src] = i; - } - else { - ibc.type = InstructionType::NOP; - } - } break; - - CASE_REP(FSWAP_R) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::FSWAP_R; - if (dst < RegisterCountFlt) - ibc.fdst = &f[dst]; - else - ibc.fdst = &e[dst - RegisterCountFlt]; - } break; - - CASE_REP(FADD_R) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegisterCountFlt; - ibc.type = InstructionType::FADD_R; - ibc.fdst = &f[dst]; - ibc.fsrc = &a[src]; - } break; - - CASE_REP(FADD_M) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::FADD_M; - ibc.fdst = &f[dst]; - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - ibc.imm = signExtend2sCompl(instr.getImm32()); - } break; - - CASE_REP(FSUB_R) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegisterCountFlt; - ibc.type = InstructionType::FSUB_R; - ibc.fdst = &f[dst]; - ibc.fsrc = &a[src]; - } break; - - CASE_REP(FSUB_M) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::FSUB_M; - ibc.fdst = &f[dst]; - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - ibc.imm = signExtend2sCompl(instr.getImm32()); - } break; - - CASE_REP(FSCAL_R) { - auto dst = instr.dst % RegisterCountFlt; - ibc.fdst = &f[dst]; - ibc.type = InstructionType::FSCAL_R; - } break; - - CASE_REP(FMUL_R) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegisterCountFlt; - ibc.type = InstructionType::FMUL_R; - ibc.fdst = &e[dst]; - ibc.fsrc = &a[src]; - } break; - - CASE_REP(FDIV_M) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::FDIV_M; - ibc.fdst = &e[dst]; - ibc.isrc = &r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - ibc.imm = signExtend2sCompl(instr.getImm32()); - } break; - - CASE_REP(FSQRT_R) { - auto dst = instr.dst % RegisterCountFlt; - ibc.type = InstructionType::FSQRT_R; - ibc.fdst = &e[dst]; - } break; - - CASE_REP(CBRANCH) { - ibc.type = InstructionType::CBRANCH; - //jump condition - int reg = instr.dst % RegistersCount; - ibc.isrc = &r[reg]; - ibc.target = registerUsage[reg]; - int shift = instr.getModCond() + ConditionOffset; - const uint64_t conditionMask = ConditionMask << shift; - ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); - if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 - ibc.imm &= ~(1ULL << (shift - 1)); - ibc.memMask = ConditionMask << shift; - //mark all registers as used - for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j] = i; - } - } break; - - CASE_REP(CFROUND) { - auto src = instr.src % RegistersCount; - ibc.isrc = &r[src]; - ibc.type = InstructionType::CFROUND; - ibc.imm = instr.getImm32() & 63; - } break; - - CASE_REP(ISTORE) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISTORE; - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (instr.getModCond() < StoreL3Condition) - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - else - ibc.memMask = ScratchpadL3Mask; - } break; - - CASE_REP(NOP) { - ibc.type = InstructionType::NOP; - } break; - - default: - UNREACHABLE; - } - } - } - template class InterpretedVm, false>; template class InterpretedVm, true>; template class InterpretedVm; diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index 25795a6..2fac2ed 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -32,36 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "common.hpp" #include "virtual_machine.hpp" +#include "bytecode_machine.hpp" #include "intrin_portable.h" #include "allocator.hpp" namespace randomx { - struct InstructionByteCode { - union { - int_reg_t* idst; - rx_vec_f128* fdst; - }; - union { - int_reg_t* isrc; - rx_vec_f128* fsrc; - }; - union { - uint64_t imm; - int64_t simm; - }; - InstructionType type; - union { - int16_t target; - uint16_t shift; - }; - uint32_t memMask; - }; - - static_assert(sizeof(InstructionByteCode) == 32, "Invalid packing of struct InstructionByteCode"); - template - class InterpretedVm : public VmBase { + class InterpretedVm : public VmBase, public BytecodeMachine { public: using VmBase::mem; using VmBase::scratchpad; @@ -86,13 +64,8 @@ namespace randomx { virtual void datasetPrefetch(uint64_t blockNumber); private: void execute(); - void precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); - void executeBytecode(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); - void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); - void* getScratchpadAddress(InstructionByteCode& ibc); - rx_vec_f128 maskRegisterExponentMantissa(rx_vec_f128); - InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; + InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE]; }; using InterpretedVmDefault = InterpretedVm, true>; diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj index 59bc214..fe16367 100644 --- a/vcxproj/randomx.vcxproj +++ b/vcxproj/randomx.vcxproj @@ -106,13 +106,14 @@ - Level3 + Level4 MaxSpeed true true false true AssemblyCode + _MBCS;NDEBUG;%(PreprocessorDefinitions) true @@ -135,6 +136,7 @@ SET ERRORLEVEL = 0 + @@ -164,6 +166,7 @@ SET ERRORLEVEL = 0 + diff --git a/vcxproj/randomx.vcxproj.filters b/vcxproj/randomx.vcxproj.filters index d21e1b1..12f6187 100644 --- a/vcxproj/randomx.vcxproj.filters +++ b/vcxproj/randomx.vcxproj.filters @@ -78,6 +78,9 @@ Source Files + + Source Files + @@ -179,6 +182,9 @@ Header Files + + Header Files + diff --git a/vcxproj/tests.vcxproj b/vcxproj/tests.vcxproj new file mode 100644 index 0000000..8f9a11b --- /dev/null +++ b/vcxproj/tests.vcxproj @@ -0,0 +1,132 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {41F3F4DF-8113-4029-9915-FDDC44C43D49} + tests + 10.0.17763.0 + tests + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + + + + \ No newline at end of file diff --git a/vcxproj/tests.vcxproj.filters b/vcxproj/tests.vcxproj.filters new file mode 100644 index 0000000..d04c815 --- /dev/null +++ b/vcxproj/tests.vcxproj.filters @@ -0,0 +1,27 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + \ No newline at end of file