Regression tests (#73)

* instruction decode/execute separated into class BytecodeMachine
* added randomx-tests project
* removed the use of non-portable __COUNTER__ macro
* removed the use of unsupported FENV_ACCESS pragma
1.1.6-wow
tevador 5 years ago committed by GitHub
parent 776723dd40
commit 07293a9378
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -31,6 +31,7 @@ cmake_minimum_required(VERSION 2.8.7)
set (randomx_sources
src/aes_hash.cpp
src/argon2_ref.c
src/bytecode_machine.cpp
src/dataset.cpp
src/soft_aes.cpp
src/virtual_memory.cpp

@ -11,8 +11,8 @@ TESTDIR=src/tests
OBJDIR=obj
LDFLAGS=-lpthread
RXA=$(BINDIR)/librandomx.a
BINARIES=$(RXA) $(BINDIR)/benchmark $(BINDIR)/code-generator
RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o)
BINARIES=$(RXA) $(BINDIR)/randomx-benchmark $(BINDIR)/randomx-generator $(BINDIR)/randomx-tests
RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o bytecode_machine.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o)
ifeq ($(PLATFORM),amd64)
RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o)
CXXFLAGS += -maes
@ -51,7 +51,7 @@ debug: $(BINARIES)
profile: CXXFLAGS += -pg
profile: CCFLAGS += -pg
profile: LDFLAGS += -pg
profile: $(BINDIR)/benchmark
profile: $(BINDIR)/randomx-benchmark
test: CXXFLAGS += -O0
@ -64,7 +64,7 @@ $(BINDIR):
$(OBJDIR)/benchmark.o: $(TESTDIR)/benchmark.cpp $(TESTDIR)/stopwatch.hpp \
$(TESTDIR)/utility.hpp $(SRCDIR)/randomx.h $(SRCDIR)/blake2/endian.h
$(CXX) $(CXXFLAGS) -pthread -c $< -o $@
$(BINDIR)/benchmark: $(OBJDIR)/benchmark.o $(RXA)
$(BINDIR)/randomx-benchmark: $(OBJDIR)/benchmark.o $(RXA)
$(CXX) $(LDFLAGS) -pthread $< $(RXA) -o $@
$(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
@ -74,13 +74,30 @@ $(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/aes_hash.hpp \
$(SRCDIR)/blake2/blake2.h $(SRCDIR)/program.hpp
$(CXX) $(CXXFLAGS) -c $< -o $@
$(BINDIR)/code-generator: $(OBJDIR)/code-generator.o $(RXA)
$(BINDIR)/randomx-generator: $(OBJDIR)/code-generator.o $(RXA)
$(CXX) $(LDFLAGS) $< $(RXA) -o $@
$(OBJDIR)/tests.o: $(TESTDIR)/tests.cpp $(TESTDIR)/utility.hpp \
$(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h \
$(SRCDIR)/randomx.h $(SRCDIR)/intrin_portable.h \
$(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \
$(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp \
$(SRCDIR)/allocator.hpp $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/superscalar.hpp \
$(SRCDIR)/reciprocal.h $(SRCDIR)/jit_compiler.hpp \
$(SRCDIR)/jit_compiler_x86.hpp
$(CXX) $(CXXFLAGS) -c $< -o $@
$(BINDIR)/randomx-tests: $(OBJDIR)/tests.o $(RXA)
$(CXX) $(LDFLAGS) $< $(RXA) -o $@
$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR)
$(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
$(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \
$(SRCDIR)/blake2/blake2.h
$(OBJDIR)/bytecode_machine.o: $(SRCDIR)/bytecode_machine.cpp $(SRCDIR)/bytecode_machine.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \
$(SRCDIR)/reciprocal.h
$(OBJDIR)/blake2b.o: $(SRCDIR)/blake2/blake2b.c $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h
$(CC) $(CCFLAGS) -c $< -o $@
@ -108,11 +125,11 @@ $(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDI
$(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h
$(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp
$(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \
$(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/virtual_machine.hpp \
$(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/instruction_weights.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h \
$(SRCDIR)/instruction_weights.hpp
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h
$(OBJDIR)/allocator.o: $(SRCDIR)/allocator.cpp $(SRCDIR)/allocator.hpp $(SRCDIR)/intrin_portable.h \
$(SRCDIR)/virtual_memory.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h
@ -129,7 +146,7 @@ $(OBJDIR)/randomx.o: $(SRCDIR)/randomx.cpp $(SRCDIR)/randomx.h $(SRCDIR)/dataset
$(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \
$(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_compiled.hpp \
$(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h
$(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h $(SRCDIR)/bytecode_machine.hpp
$(OBJDIR)/superscalar.o: $(SRCDIR)/superscalar.cpp $(SRCDIR)/configuration.h $(SRCDIR)/program.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/randomx.h $(SRCDIR)/instruction.hpp \
$(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/blake2_generator.hpp \
@ -144,7 +161,8 @@ $(OBJDIR)/vm_interpreted_light.o: $(SRCDIR)/vm_interpreted_light.cpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/bytecode_machine.hpp
$(OBJDIR)/argon2_core.o: $(SRCDIR)/argon2_core.c $(SRCDIR)/argon2_core.h $(SRCDIR)/argon2.h \
$(SRCDIR)/blake2/blake2.h $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h
$(OBJDIR)/blake2_generator.o: $(SRCDIR)/blake2_generator.cpp $(SRCDIR)/blake2/blake2.h \

@ -31,6 +31,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime-distr", "vcxproj\ru
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx-dll", "vcxproj\randomx-dll.vcxproj", "{59560AD8-18E3-463E-A941-BBD808EC7C83}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "vcxproj\tests.vcxproj", "{41F3F4DF-8113-4029-9915-FDDC44C43D49}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
@ -143,6 +145,14 @@ Global
{59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.Build.0 = Release|x64
{59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.ActiveCfg = Release|Win32
{59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.Build.0 = Release|Win32
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x64.ActiveCfg = Debug|x64
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x64.Build.0 = Debug|x64
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x86.ActiveCfg = Debug|Win32
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x86.Build.0 = Debug|Win32
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x64.ActiveCfg = Release|x64
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x64.Build.0 = Release|x64
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x86.ActiveCfg = Release|Win32
{41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -159,6 +169,7 @@ Global
{535F2111-FA81-4C76-A354-EDD2F9AA00E3} = {4A4A689F-86AF-41C0-A974-1080506D0923}
{F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} = {4A4A689F-86AF-41C0-A974-1080506D0923}
{F207EC8C-C55F-46C0-8851-887A71574F54} = {4A4A689F-86AF-41C0-A974-1080506D0923}
{41F3F4DF-8113-4029-9915-FDDC44C43D49} = {4A4A689F-86AF-41C0-A974-1080506D0923}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {4EBC03DB-AE37-4141-8147-692F16E0ED02}

@ -0,0 +1,482 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bytecode_machine.hpp"
#include "reciprocal.h"
namespace randomx {
const int_reg_t BytecodeMachine::zero = 0;
#define INSTR_CASE(x) case InstructionType::x: \
exe_ ## x(ibc, pc, scratchpad, config); \
break;
void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) {
switch (ibc.type)
{
INSTR_CASE(IADD_RS)
INSTR_CASE(IADD_M)
INSTR_CASE(ISUB_R)
INSTR_CASE(ISUB_M)
INSTR_CASE(IMUL_R)
INSTR_CASE(IMUL_M)
INSTR_CASE(IMULH_R)
INSTR_CASE(IMULH_M)
INSTR_CASE(ISMULH_R)
INSTR_CASE(ISMULH_M)
INSTR_CASE(INEG_R)
INSTR_CASE(IXOR_R)
INSTR_CASE(IXOR_M)
INSTR_CASE(IROR_R)
INSTR_CASE(IROL_R)
INSTR_CASE(ISWAP_R)
INSTR_CASE(FSWAP_R)
INSTR_CASE(FADD_R)
INSTR_CASE(FADD_M)
INSTR_CASE(FSUB_R)
INSTR_CASE(FSUB_M)
INSTR_CASE(FSCAL_R)
INSTR_CASE(FMUL_R)
INSTR_CASE(FDIV_M)
INSTR_CASE(FSQRT_R)
INSTR_CASE(CBRANCH)
INSTR_CASE(CFROUND)
INSTR_CASE(ISTORE)
case InstructionType::NOP:
break;
case InstructionType::IMUL_RCP: //executed as IMUL_R
default:
UNREACHABLE;
}
}
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
int opcode = instr.opcode;
if (opcode < ceil_IADD_RS) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS;
ibc.idst = &nreg->r[dst];
if (dst != RegisterNeedsDisplacement) {
ibc.isrc = &nreg->r[src];
ibc.shift = instr.getModShift();
ibc.imm = 0;
}
else {
ibc.isrc = &nreg->r[src];
ibc.shift = instr.getModShift();
ibc.imm = signExtend2sCompl(instr.getImm32());
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IADD_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISUB_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISUB_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMUL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMUL_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_R;
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_R;
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!isPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &nreg->r[dst];
ibc.imm = randomx_reciprocal(divisor);
ibc.isrc = &ibc.imm;
registerUsage[dst] = i;
}
else {
ibc.type = InstructionType::NOP;
}
return;
}
if (opcode < ceil_INEG_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &nreg->r[dst];
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IXOR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IXOR_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IROR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROR_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IROL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROL_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISWAP_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
if (src != dst) {
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::ISWAP_R;
registerUsage[dst] = i;
registerUsage[src] = i;
}
else {
ibc.type = InstructionType::NOP;
}
return;
}
if (opcode < ceil_FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
if (dst < RegisterCountFlt)
ibc.fdst = &nreg->f[dst];
else
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
return;
}
if (opcode < ceil_FADD_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
ibc.fdst = &nreg->f[dst];
ibc.fsrc = &nreg->a[src];
return;
}
if (opcode < ceil_FADD_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
ibc.fdst = &nreg->f[dst];
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
if (opcode < ceil_FSUB_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
ibc.fdst = &nreg->f[dst];
ibc.fsrc = &nreg->a[src];
return;
}
if (opcode < ceil_FSUB_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
ibc.fdst = &nreg->f[dst];
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
if (opcode < ceil_FSCAL_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &nreg->f[dst];
ibc.type = InstructionType::FSCAL_R;
return;
}
if (opcode < ceil_FMUL_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
ibc.fdst = &nreg->e[dst];
ibc.fsrc = &nreg->a[src];
return;
}
if (opcode < ceil_FDIV_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
ibc.fdst = &nreg->e[dst];
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
if (opcode < ceil_FSQRT_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &nreg->e[dst];
return;
}
if (opcode < ceil_CBRANCH) {
ibc.type = InstructionType::CBRANCH;
//jump condition
int creg = instr.dst % RegistersCount;
ibc.idst = &nreg->r[creg];
ibc.target = registerUsage[creg];
int shift = instr.getModCond() + ConditionOffset;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = ConditionMask << shift;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
}
return;
}
if (opcode < ceil_CFROUND) {
auto src = instr.src % RegistersCount;
ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63;
return;
}
if (opcode < ceil_ISTORE) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISTORE;
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.getModCond() < StoreL3Condition)
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
else
ibc.memMask = ScratchpadL3Mask;
return;
}
if (opcode < ceil_NOP) {
ibc.type = InstructionType::NOP;
return;
}
UNREACHABLE;
}
}

@ -0,0 +1,322 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "common.hpp"
#include "intrin_portable.h"
#include "instruction.hpp"
#include "program.hpp"
namespace randomx {
//register file in machine byte order
struct NativeRegisterFile {
int_reg_t r[RegistersCount] = { 0 };
rx_vec_f128 f[RegisterCountFlt];
rx_vec_f128 e[RegisterCountFlt];
rx_vec_f128 a[RegisterCountFlt];
};
struct InstructionByteCode {
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
const int_reg_t* isrc;
const rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
};
#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr;
constexpr int ceil_NULL = 0;
OPCODE_CEIL_DECLARE(IADD_RS, NULL);
OPCODE_CEIL_DECLARE(IADD_M, IADD_RS);
OPCODE_CEIL_DECLARE(ISUB_R, IADD_M);
OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R);
OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M);
OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R);
OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M);
OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R);
OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M);
OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R);
OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M);
OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP);
OPCODE_CEIL_DECLARE(IXOR_R, INEG_R);
OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R);
OPCODE_CEIL_DECLARE(IROR_R, IXOR_M);
OPCODE_CEIL_DECLARE(IROL_R, IROR_R);
OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R);
OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R);
OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R);
OPCODE_CEIL_DECLARE(FADD_M, FADD_R);
OPCODE_CEIL_DECLARE(FSUB_R, FADD_M);
OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R);
OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M);
OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R);
OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R);
OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M);
OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R);
OPCODE_CEIL_DECLARE(CFROUND, CBRANCH);
OPCODE_CEIL_DECLARE(ISTORE, CFROUND);
OPCODE_CEIL_DECLARE(NOP, ISTORE);
#undef OPCODE_CEIL_DECLARE
#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc
class BytecodeMachine;
typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS);
class BytecodeMachine {
public:
void beginCompilation(NativeRegisterFile& regFile) {
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
nreg = &regFile;
}
void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) {
beginCompilation(regFile);
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
auto& instr = program(i);
auto& ibc = bytecode[i];
compileInstruction(instr, i, ibc);
}
}
static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) {
for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
auto& ibc = bytecode[pc];
executeInstruction(ibc, pc, scratchpad, config);
}
}
void compileInstruction(RANDOMX_GEN_ARGS)
#ifdef RANDOMX_GEN_TABLE
{
auto generator = genTable[instr.opcode];
(this->*generator)(instr, i, ibc);
}
#else
;
#endif
static void executeInstruction(RANDOMX_EXE_ARGS);
static void exe_IADD_RS(RANDOMX_EXE_ARGS) {
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
}
static void exe_IADD_M(RANDOMX_EXE_ARGS) {
*ibc.idst += load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_ISUB_R(RANDOMX_EXE_ARGS) {
*ibc.idst -= *ibc.isrc;
}
static void exe_ISUB_M(RANDOMX_EXE_ARGS) {
*ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_IMUL_R(RANDOMX_EXE_ARGS) {
*ibc.idst *= *ibc.isrc;
}
static void exe_IMUL_M(RANDOMX_EXE_ARGS) {
*ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_IMULH_R(RANDOMX_EXE_ARGS) {
*ibc.idst = mulh(*ibc.idst, *ibc.isrc);
}
static void exe_IMULH_M(RANDOMX_EXE_ARGS) {
*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad)));
}
static void exe_ISMULH_R(RANDOMX_EXE_ARGS) {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
}
static void exe_ISMULH_M(RANDOMX_EXE_ARGS) {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad))));
}
static void exe_INEG_R(RANDOMX_EXE_ARGS) {
*ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
}
static void exe_IXOR_R(RANDOMX_EXE_ARGS) {
*ibc.idst ^= *ibc.isrc;
}
static void exe_IXOR_M(RANDOMX_EXE_ARGS) {
*ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_IROR_R(RANDOMX_EXE_ARGS) {
*ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63);
}
static void exe_IROL_R(RANDOMX_EXE_ARGS) {
*ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63);
}
static void exe_ISWAP_R(RANDOMX_EXE_ARGS) {
int_reg_t temp = *ibc.isrc;
*(int_reg_t*)ibc.isrc = *ibc.idst;
*ibc.idst = temp;
}
static void exe_FSWAP_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
}
static void exe_FADD_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
}
static void exe_FADD_M(RANDOMX_EXE_ARGS) {
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
}
static void exe_FSUB_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
}
static void exe_FSUB_M(RANDOMX_EXE_ARGS) {
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
}
static void exe_FSCAL_R(RANDOMX_EXE_ARGS) {
const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
*ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
}
static void exe_FMUL_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
}
static void exe_FDIV_M(RANDOMX_EXE_ARGS) {
rx_vec_f128 fsrc = maskRegisterExponentMantissa(
config,
rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad))
);
*ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
}
static void exe_FSQRT_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
}
static void exe_CBRANCH(RANDOMX_EXE_ARGS) {
*ibc.idst += ibc.imm;
if ((*ibc.idst & ibc.memMask) == 0) {
pc = ibc.target;
}
}
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4);
}
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
}
protected:
static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) {
const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
x = rx_and_vec_f128(x, xmantissaMask);
x = rx_or_vec_f128(x, xexponentMask);
return x;
}
private:
static const int_reg_t zero;
int registerUsage[RegistersCount];
NativeRegisterFile* nreg;
static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
return scratchpad + addr;
}
#ifdef RANDOMX_GEN_TABLE
static InstructionGenBytecode genTable[256];
void gen_IADD_RS(RANDOMX_GEN_ARGS);
void gen_IADD_M(RANDOMX_GEN_ARGS);
void gen_ISUB_R(RANDOMX_GEN_ARGS);
void gen_ISUB_M(RANDOMX_GEN_ARGS);
void gen_IMUL_R(RANDOMX_GEN_ARGS);
void gen_IMUL_M(RANDOMX_GEN_ARGS);
void gen_IMULH_R(RANDOMX_GEN_ARGS);
void gen_IMULH_M(RANDOMX_GEN_ARGS);
void gen_ISMULH_R(RANDOMX_GEN_ARGS);
void gen_ISMULH_M(RANDOMX_GEN_ARGS);
void gen_IMUL_RCP(RANDOMX_GEN_ARGS);
void gen_INEG_R(RANDOMX_GEN_ARGS);
void gen_IXOR_R(RANDOMX_GEN_ARGS);
void gen_IXOR_M(RANDOMX_GEN_ARGS);
void gen_IROR_R(RANDOMX_GEN_ARGS);
void gen_IROL_R(RANDOMX_GEN_ARGS);
void gen_ISWAP_R(RANDOMX_GEN_ARGS);
void gen_FSWAP_R(RANDOMX_GEN_ARGS);
void gen_FADD_R(RANDOMX_GEN_ARGS);
void gen_FADD_M(RANDOMX_GEN_ARGS);
void gen_FSUB_R(RANDOMX_GEN_ARGS);
void gen_FSUB_M(RANDOMX_GEN_ARGS);
void gen_FSCAL_R(RANDOMX_GEN_ARGS);
void gen_FMUL_R(RANDOMX_GEN_ARGS);
void gen_FDIV_M(RANDOMX_GEN_ARGS);
void gen_FSQRT_R(RANDOMX_GEN_ARGS);
void gen_CBRANCH(RANDOMX_GEN_ARGS);
void gen_CFROUND(RANDOMX_GEN_ARGS);
void gen_ISTORE(RANDOMX_GEN_ARGS);
void gen_NOP(RANDOMX_GEN_ARGS);
#endif
};
}

@ -108,12 +108,15 @@ namespace randomx {
#endif
#if defined(_M_X64) || defined(__x86_64__)
#define RANDOMX_HAVE_COMPILER 1
class JitCompilerX86;
using JitCompiler = JitCompilerX86;
#elif defined(__aarch64__)
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerA64;
using JitCompiler = JitCompilerA64;
#else
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerFallback;
using JitCompiler = JitCompilerFallback;
#endif
@ -160,14 +163,14 @@ namespace randomx {
uint8_t* memory = nullptr;
};
//register file in little-endian byte order
struct RegisterFile {
int_reg_t r[RegistersCount];
fpu_reg_t f[RegistersCount / 2];
fpu_reg_t e[RegistersCount / 2];
fpu_reg_t a[RegistersCount / 2];
fpu_reg_t f[RegisterCountFlt];
fpu_reg_t e[RegisterCountFlt];
fpu_reg_t a[RegisterCountFlt];
};
typedef void(DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(&reg)[RegistersCount]);
typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);

@ -71,43 +71,3 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define REPN(x,N) REPNX(x,N)
#define NUM(x) x
#define WT(x) NUM(RANDOMX_FREQ_##x)
#define REPCASE0(x)
#define REPCASE1(x) case __COUNTER__:
#define REPCASE2(x) REPCASE1(x) case __COUNTER__:
#define REPCASE3(x) REPCASE2(x) case __COUNTER__:
#define REPCASE4(x) REPCASE3(x) case __COUNTER__:
#define REPCASE5(x) REPCASE4(x) case __COUNTER__:
#define REPCASE6(x) REPCASE5(x) case __COUNTER__:
#define REPCASE7(x) REPCASE6(x) case __COUNTER__:
#define REPCASE8(x) REPCASE7(x) case __COUNTER__:
#define REPCASE9(x) REPCASE8(x) case __COUNTER__:
#define REPCASE10(x) REPCASE9(x) case __COUNTER__:
#define REPCASE11(x) REPCASE10(x) case __COUNTER__:
#define REPCASE12(x) REPCASE11(x) case __COUNTER__:
#define REPCASE13(x) REPCASE12(x) case __COUNTER__:
#define REPCASE14(x) REPCASE13(x) case __COUNTER__:
#define REPCASE15(x) REPCASE14(x) case __COUNTER__:
#define REPCASE16(x) REPCASE15(x) case __COUNTER__:
#define REPCASE17(x) REPCASE16(x) case __COUNTER__:
#define REPCASE18(x) REPCASE17(x) case __COUNTER__:
#define REPCASE19(x) REPCASE18(x) case __COUNTER__:
#define REPCASE20(x) REPCASE19(x) case __COUNTER__:
#define REPCASE21(x) REPCASE20(x) case __COUNTER__:
#define REPCASE22(x) REPCASE21(x) case __COUNTER__:
#define REPCASE23(x) REPCASE22(x) case __COUNTER__:
#define REPCASE24(x) REPCASE23(x) case __COUNTER__:
#define REPCASE25(x) REPCASE24(x) case __COUNTER__:
#define REPCASE26(x) REPCASE25(x) case __COUNTER__:
#define REPCASE27(x) REPCASE26(x) case __COUNTER__:
#define REPCASE28(x) REPCASE27(x) case __COUNTER__:
#define REPCASE29(x) REPCASE28(x) case __COUNTER__:
#define REPCASE30(x) REPCASE29(x) case __COUNTER__:
#define REPCASE31(x) REPCASE30(x) case __COUNTER__:
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
#define REPCASE64(x) REPCASE32(x) REPCASE32(x)
#define REPCASE128(x) REPCASE64(x) REPCASE64(x)
#define REPCASE256(x) REPCASE128(x) REPCASE128(x)
#define REPCASENX(x,N) REPCASE##N(x)
#define REPCASEN(x,N) REPCASENX(x,N)
#define CASE_REP(x) REPCASEN(x, WT(x))

@ -26,7 +26,6 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma STDC FENV_ACCESS ON
#include <cfenv>
#include <cmath>
#include "common.hpp"

@ -67,3 +67,11 @@ uint64_t randomx_reciprocal(uint64_t divisor) {
return quotient;
}
#if !RANDOMX_HAVE_FAST_RECIPROCAL
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
return randomx_reciprocal(divisor);
}
#endif

@ -30,6 +30,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#if defined(_M_X64) || defined(__x86_64__)
#define RANDOMX_HAVE_FAST_RECIPROCAL 1
#else
#define RANDOMX_HAVE_FAST_RECIPROCAL 0
#endif
#if defined(__cplusplus)
extern "C" {
#endif

@ -52,7 +52,7 @@ namespace randomx {
int getAddressRegister() {
return addrReg;
}
void setAddressRegister(uint32_t val) {
void setAddressRegister(int val) {
addrReg = val;
}

File diff suppressed because it is too large Load Diff

@ -41,6 +41,36 @@ inline void outputHex(std::ostream& os, const char* data, int length) {
}
}
char parseNibble(char hex) {
hex &= ~0x20;
if (hex & 0x40) {
hex -= 'A' - 10;
}
else {
hex &= 0xf;
}
return hex;
}
void hex2bin(char *in, int length, char *out) {
for (int i = 0; i < length; i += 2) {
char nibble1 = parseNibble(*in++);
char nibble2 = parseNibble(*in++);
*out++ = nibble1 << 4 | nibble2;
}
}
constexpr bool stringsEqual(char const * a, char const * b) {
return *a == *b && (*a == '\0' || stringsEqual(a + 1, b + 1));
}
template<size_t N>
bool equalsHex(const void* hash, const char (&hex)[N]) {
char reference[N / 2];
hex2bin((char*)hex, N - 1, reference);
return memcmp(hash, reference, sizeof(reference)) == 0;
}
inline void dump(const char* buffer, uint64_t count, const char* name) {
std::ofstream fout(name, std::ios::out | std::ios::binary);
fout.write(buffer, count);

@ -39,8 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
static int_reg_t Zero = 0;
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
datasetPtr = dataset;
@ -54,223 +52,64 @@ namespace randomx {
execute();
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) {
for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
executeBytecode(pc, r, f, e, a);
}
}
template<class Allocator, bool softAes>
FORCE_INLINE void* InterpretedVm<Allocator, softAes>::getScratchpadAddress(InstructionByteCode& ibc) {
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
return scratchpad + addr;
}
template<class Allocator, bool softAes>
FORCE_INLINE rx_vec_f128 InterpretedVm<Allocator, softAes>::maskRegisterExponentMantissa(rx_vec_f128 x) {
const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
x = rx_and_vec_f128(x, xmantissaMask);
x = rx_or_vec_f128(x, xexponentMask);
return x;
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) {
auto& ibc = byteCode[pc];
switch (ibc.type)
{
case InstructionType::IADD_RS: {
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
} break;
case InstructionType::IADD_M: {
*ibc.idst += load64(getScratchpadAddress(ibc));
} break;
case InstructionType::ISUB_R: {
*ibc.idst -= *ibc.isrc;
} break;
case InstructionType::ISUB_M: {
*ibc.idst -= load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IMUL_R: { //also handles IMUL_RCP
*ibc.idst *= *ibc.isrc;
} break;
case InstructionType::IMUL_M: {
*ibc.idst *= load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IMULH_R: {
*ibc.idst = mulh(*ibc.idst, *ibc.isrc);
} break;
case InstructionType::IMULH_M: {
*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc)));
} break;
case InstructionType::ISMULH_R: {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
} break;
case InstructionType::ISMULH_M: {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc))));
} break;
case InstructionType::INEG_R: {
*ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
} break;
case InstructionType::IXOR_R: {
*ibc.idst ^= *ibc.isrc;
} break;
case InstructionType::IXOR_M: {
*ibc.idst ^= load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IROR_R: {
*ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63);
} break;
case InstructionType::IROL_R: {
*ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63);
} break;
case InstructionType::ISWAP_R: {
int_reg_t temp = *ibc.isrc;
*ibc.isrc = *ibc.idst;
*ibc.idst = temp;
} break;
case InstructionType::FSWAP_R: {
*ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
} break;
case InstructionType::FADD_R: {
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
} break;
case InstructionType::FADD_M: {
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc));
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
} break;
case InstructionType::FSUB_R: {
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
} break;
case InstructionType::FSUB_M: {
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc));
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
} break;
case InstructionType::FSCAL_R: {
const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
*ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
} break;
case InstructionType::FMUL_R: {
*ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
} break;
case InstructionType::FDIV_M: {
rx_vec_f128 fsrc = maskRegisterExponentMantissa(rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc)));
*ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
} break;
case InstructionType::FSQRT_R: {
*ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
} break;
case InstructionType::CBRANCH: {
*ibc.isrc += ibc.imm;
if ((*ibc.isrc & ibc.memMask) == 0) {
pc = ibc.target;
}
} break;
case InstructionType::CFROUND: {
rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4);
} break;
case InstructionType::ISTORE: {
store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
} break;
case InstructionType::NOP: {
//nothing
} break;
default:
UNREACHABLE;
}
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::execute() {
int_reg_t r[RegistersCount] = { 0 };
rx_vec_f128 f[RegisterCountFlt];
rx_vec_f128 e[RegisterCountFlt];
rx_vec_f128 a[RegisterCountFlt];
NativeRegisterFile nreg;
for(unsigned i = 0; i < RegisterCountFlt; ++i)
a[i] = rx_load_vec_f128(&reg.a[i].lo);
nreg.a[i] = rx_load_vec_f128(&reg.a[i].lo);
precompileProgram(r, f, e, a);
compileProgram(program, bytecode, nreg);
uint32_t spAddr0 = mem.mx;
uint32_t spAddr1 = mem.ma;
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
uint64_t spMix = nreg.r[config.readReg0] ^ nreg.r[config.readReg1];
spAddr0 ^= spMix;
spAddr0 &= ScratchpadL3Mask64;
spAddr1 ^= spMix >> 32;
spAddr1 &= ScratchpadL3Mask64;
for (unsigned i = 0; i < RegistersCount; ++i)
r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
nreg.r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i);
nreg.f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
e[i] = maskRegisterExponentMantissa(rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
nreg.e[i] = maskRegisterExponentMantissa(config, rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
executeBytecode(r, f, e, a);
executeBytecode(bytecode, scratchpad, config);
mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3];
mem.mx &= CacheLineAlignMask;
datasetPrefetch(datasetOffset + mem.mx);
datasetRead(datasetOffset + mem.ma, r);
datasetRead(datasetOffset + mem.ma, nreg.r);
std::swap(mem.mx, mem.ma);
for (unsigned i = 0; i < RegistersCount; ++i)
store64(scratchpad + spAddr1 + 8 * i, r[i]);
store64(scratchpad + spAddr1 + 8 * i, nreg.r[i]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
f[i] = rx_xor_vec_f128(f[i], e[i]);
nreg.f[i] = rx_xor_vec_f128(nreg.f[i], nreg.e[i]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), f[i]);
rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), nreg.f[i]);
spAddr0 = 0;
spAddr1 = 0;
}
for (unsigned i = 0; i < RegistersCount; ++i)
store64(&reg.r[i], r[i]);
store64(&reg.r[i], nreg.r[i]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
rx_store_vec_f128(&reg.f[i].lo, f[i]);
rx_store_vec_f128(&reg.f[i].lo, nreg.f[i]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
rx_store_vec_f128(&reg.e[i].lo, e[i]);
rx_store_vec_f128(&reg.e[i].lo, nreg.e[i]);
}
template<class Allocator, bool softAes>
@ -285,391 +124,6 @@ namespace randomx {
rx_prefetch_nta(mem.memory + address);
}
#include "instruction_weights.hpp"
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) {
int registerUsage[RegistersCount];
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
auto& instr = program(i);
auto& ibc = byteCode[i];
switch (instr.opcode) {
CASE_REP(IADD_RS) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS;
ibc.idst = &r[dst];
if (dst != RegisterNeedsDisplacement) {
ibc.isrc = &r[src];
ibc.shift = instr.getModShift();
ibc.imm = 0;
}
else {
ibc.isrc = &r[src];
ibc.shift = instr.getModShift();
ibc.imm = signExtend2sCompl(instr.getImm32());
}
registerUsage[dst] = i;
} break;
CASE_REP(IADD_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
} break;
CASE_REP(ISUB_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_R;
ibc.idst = &r[dst];
if (src != dst) {
ibc.isrc = &r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
} break;
CASE_REP(ISUB_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
} break;
CASE_REP(IMUL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &r[dst];
if (src != dst) {
ibc.isrc = &r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
} break;
CASE_REP(IMUL_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
} break;
CASE_REP(IMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
registerUsage[dst] = i;
} break;
CASE_REP(IMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
} break;
CASE_REP(ISMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
registerUsage[dst] = i;
} break;
CASE_REP(ISMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
} break;
CASE_REP(IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!isPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &r[dst];
ibc.imm = randomx_reciprocal(divisor);
ibc.isrc = &ibc.imm;
registerUsage[dst] = i;
}
else {
ibc.type = InstructionType::NOP;
}
} break;
CASE_REP(INEG_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &r[dst];
registerUsage[dst] = i;
} break;
CASE_REP(IXOR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_R;
ibc.idst = &r[dst];
if (src != dst) {
ibc.isrc = &r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
} break;
CASE_REP(IXOR_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
} break;
CASE_REP(IROR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROR_R;
ibc.idst = &r[dst];
if (src != dst) {
ibc.isrc = &r[src];
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
} break;
CASE_REP(IROL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROL_R;
ibc.idst = &r[dst];
if (src != dst) {
ibc.isrc = &r[src];
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
} break;
CASE_REP(ISWAP_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
if (src != dst) {
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.type = InstructionType::ISWAP_R;
registerUsage[dst] = i;
registerUsage[src] = i;
}
else {
ibc.type = InstructionType::NOP;
}
} break;
CASE_REP(FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
if (dst < RegisterCountFlt)
ibc.fdst = &f[dst];
else
ibc.fdst = &e[dst - RegisterCountFlt];
} break;
CASE_REP(FADD_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
ibc.fdst = &f[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FADD_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
} break;
CASE_REP(FSUB_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
ibc.fdst = &f[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FSUB_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
} break;
CASE_REP(FSCAL_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &f[dst];
ibc.type = InstructionType::FSCAL_R;
} break;
CASE_REP(FMUL_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
ibc.fdst = &e[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FDIV_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
ibc.fdst = &e[dst];
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
} break;
CASE_REP(FSQRT_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &e[dst];
} break;
CASE_REP(CBRANCH) {
ibc.type = InstructionType::CBRANCH;
//jump condition
int reg = instr.dst % RegistersCount;
ibc.isrc = &r[reg];
ibc.target = registerUsage[reg];
int shift = instr.getModCond() + ConditionOffset;
const uint64_t conditionMask = ConditionMask << shift;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = ConditionMask << shift;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
}
} break;
CASE_REP(CFROUND) {
auto src = instr.src % RegistersCount;
ibc.isrc = &r[src];
ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63;
} break;
CASE_REP(ISTORE) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISTORE;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.getModCond() < StoreL3Condition)
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
else
ibc.memMask = ScratchpadL3Mask;
} break;
CASE_REP(NOP) {
ibc.type = InstructionType::NOP;
} break;
default:
UNREACHABLE;
}
}
}
template class InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
template class InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
template class InterpretedVm<LargePageAllocator, false>;

@ -32,36 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <vector>
#include "common.hpp"
#include "virtual_machine.hpp"
#include "bytecode_machine.hpp"
#include "intrin_portable.h"
#include "allocator.hpp"
namespace randomx {
struct InstructionByteCode {
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
};
static_assert(sizeof(InstructionByteCode) == 32, "Invalid packing of struct InstructionByteCode");
template<class Allocator, bool softAes>
class InterpretedVm : public VmBase<Allocator, softAes> {
class InterpretedVm : public VmBase<Allocator, softAes>, public BytecodeMachine {
public:
using VmBase<Allocator, softAes>::mem;
using VmBase<Allocator, softAes>::scratchpad;
@ -86,13 +64,8 @@ namespace randomx {
virtual void datasetPrefetch(uint64_t blockNumber);
private:
void execute();
void precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]);
void executeBytecode(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]);
void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]);
void* getScratchpadAddress(InstructionByteCode& ibc);
rx_vec_f128 maskRegisterExponentMantissa(rx_vec_f128);
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE];
};
using InterpretedVmDefault = InterpretedVm<AlignedAllocator<CacheLineSize>, true>;

@ -106,13 +106,14 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<WarningLevel>Level4</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>false</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
<PreprocessorDefinitions>_MBCS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -135,6 +136,7 @@ SET ERRORLEVEL = 0</Command>
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
<ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\bytecode_machine.cpp" />
<ClCompile Include="..\src\vm_compiled_light.cpp" />
<ClCompile Include="..\src\vm_compiled.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
@ -164,6 +166,7 @@ SET ERRORLEVEL = 0</Command>
<ClInclude Include="..\src\blake2\blamka-round-ref.h" />
<ClInclude Include="..\src\blake2\endian.h" />
<ClInclude Include="..\src\blake2_generator.hpp" />
<ClInclude Include="..\src\bytecode_machine.hpp" />
<ClInclude Include="..\src\common.hpp" />
<ClInclude Include="..\src\jit_compiler.hpp" />
<ClInclude Include="..\src\jit_compiler_a64.hpp" />

@ -78,6 +78,9 @@
<ClCompile Include="..\src\blake2_generator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\bytecode_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\argon2.h">
@ -179,6 +182,9 @@
<ClInclude Include="..\src\blake2\blake2.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\bytecode_machine.hpp">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\jit_compiler_x86_static.asm">

@ -0,0 +1,132 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{41F3F4DF-8113-4029-9915-FDDC44C43D49}</ProjectGuid>
<RootNamespace>tests</RootNamespace>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
<ProjectName>tests</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\src\tests\tests.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="randomx.vcxproj">
<Project>{3346a4ad-c438-4324-8b77-47a16452954b}</Project>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\tests\utility.hpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\tests\utility.hpp">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\tests\tests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>
Loading…
Cancel
Save