Code cleanup & refactoring

1.1.6-wow
tevador 5 years ago
parent 22a3aa8d79
commit 7f6bdd9a52

4
.gitignore vendored

@ -3,4 +3,6 @@ obj/
*.user
*.suo
.vs
x64
x64/
Release/
Debug/

@ -3,7 +3,7 @@
AR=gcc-ar
PLATFORM=$(shell uname -m)
CXXFLAGS=-std=c++11
CCFLAGS=
CCFLAGS=-std=c99
ARFLAGS=rcs
BINDIR=bin
SRCDIR=src
@ -80,7 +80,8 @@ $(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
$(SRCDIR)/intrin_portable.h
$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \
@ -90,7 +91,6 @@ $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compi
$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \
$(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \
$(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \
$(SRCDIR)/asm/program_read_dataset_light.inc \
$(SRCDIR)/asm/program_read_dataset_sshash_init.inc \
$(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \
$(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \

@ -1,5 +0,0 @@
xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx"
mov ecx, ebp ;# ecx = ma
and ecx, 2147483584 ;# align "ma" to the start of a cache line
shr ecx, 6 ;# ecx = Dataset block number

@ -27,12 +27,12 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace randomx {
static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
static const char* regFE[8] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regF[4] = { "xmm0", "xmm1", "xmm2", "xmm3" };
static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
static const char* tempRegx = "xmm12";
static const char* mantissaMask = "xmm13";
@ -49,7 +49,9 @@ namespace randomx {
}
asmCode.str(std::string()); //clear
for (unsigned i = 0; i < prog.getSize(); ++i) {
#if RANDOMX_JUMP
asmCode << "randomx_isn_" << i << ":" << std::endl;
#endif
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
@ -469,14 +471,14 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
@ -484,14 +486,14 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
@ -499,20 +501,20 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl;
@ -522,7 +524,7 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
traceflt(instr);
}
@ -566,7 +568,7 @@ namespace randomx {
void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
const int shift = instr.getModShift();
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
registerUsage[reg] = i;
@ -579,7 +581,9 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
#if RANDOMX_JUMP
handleCondition(instr, i);
#endif
asmCode << "\txor ecx, ecx" << std::endl;
asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
@ -602,7 +606,6 @@ namespace randomx {
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
//Integer
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
@ -620,27 +623,18 @@ namespace randomx {
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
//Common floating point
INST_HANDLE(FSWAP_R)
//Floating point group F
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
//Floating point group E
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
//Control
INST_HANDLE(COND_R)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
}

@ -19,6 +19,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once
#include "common.hpp"
#include <sstream>
namespace randomx {
@ -48,40 +49,40 @@ namespace randomx {
void traceint(Instruction&);
void traceflt(Instruction&);
void tracenop(Instruction&);
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_ISDIV_C(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_COND_R(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_NOP(Instruction&, int);
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_ISDIV_C(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_COND_R(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_NOP(Instruction&, int);
static InstructionGenerator engine[256];
std::stringstream asmCode;
int registerUsage[8];
int registerUsage[RegistersCount];
};
}

@ -51,8 +51,6 @@ namespace randomx {
static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
using addr_t = uint32_t;
constexpr int ArgonBlockSize = 1024;
constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
@ -78,6 +76,10 @@ namespace randomx {
#endif
#endif
#define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0)
using addr_t = uint32_t;
using int_reg_t = uint64_t;
struct fpu_reg_t {
@ -95,6 +97,7 @@ namespace randomx {
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
constexpr int RegistersCount = 8;
constexpr int RegisterCountFlt = RegistersCount / 2;
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
constexpr int RegisterNeedsSib = 4; //x86 r12 register
@ -118,5 +121,3 @@ namespace randomx {
typedef void(*CacheDeallocFunc)(randomx_cache*);
typedef void(*CacheInitializeFunc)(randomx_cache*, const void*, size_t);
}
std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf);

@ -34,7 +34,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
//Target latency for SuperscalarHash (in cycles of the reference CPU).
#define RANDOMX_SUPERSCALAR_LATENCY 170
//The maximum size of a SuperscalarHash program (number of instructions).
#define RANDOMX_SUPERSCALAR_MAX_SIZE 512
//Dataset base size in bytes. Must be a power of 2.
@ -61,8 +64,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
//How many register bits must be zero for a jump condition to be triggered
#define RANDOMX_CONDITION_BITS 7
//How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled.
#define RANDOMX_JUMP_BITS 7
/*
Instruction frequencies (per 256 opcodes)

@ -39,6 +39,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "blake2/endian.h"
#include "argon2.h"
#include "argon2_core.h"
#include "jit_compiler_x86.hpp"
#include "intrin_portable.h"
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
@ -146,6 +148,7 @@ namespace randomx {
rl[7] = rl[0] ^ superscalarAdd7;
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
mixBlock = getMixBlock(registerValue, cache->memory);
PREFETCHNTA(mixBlock);
SuperscalarProgram& prog = cache->programs[i];
executeSuperscalar(rl, prog, &cache->reciprocalCache);

@ -24,7 +24,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <type_traits>
#include "common.hpp"
#include "superscalar_program.hpp"
#include "jit_compiler_x86.hpp"
#include "allocator.hpp"
/* Global scope for C binding */
@ -33,6 +32,10 @@ struct randomx_dataset {
randomx::DatasetDeallocFunc dealloc;
};
namespace randomx {
class JitCompilerX86;
}
/* Global scope for C binding */
struct randomx_cache {
uint8_t* memory = nullptr;

@ -29,12 +29,12 @@ namespace randomx {
}
void Instruction::genAddressReg(std::ostream& os) const {
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
os << (getModMem() ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressRegDst(std::ostream& os) const {
if (getModCond())
os << ((mod % 4) ? "L1" : "L2");
os << (getModMem() ? "L1" : "L2");
else
os << "L3";
os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
@ -49,7 +49,7 @@ namespace randomx {
if(dst == RegisterNeedsDisplacement) {
os << ", " << (int32_t)getImm32();
}
os << ", LSH " << (int)(mod % 4) << std::endl;
os << ", LSH " << (int)getModMem() << std::endl;
}
void Instruction::h_IADD_M(std::ostream& os) const {
@ -65,7 +65,6 @@ namespace randomx {
}
}
//1 uOP
void Instruction::h_ISUB_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
@ -197,57 +196,57 @@ namespace randomx {
}
void Instruction::h_FSWAP_R(std::ostream& os) const {
const char reg = (dst >= 4) ? 'e' : 'f';
auto dstIndex = dst % 4;
const char reg = (dst >= RegisterCountFlt) ? 'e' : 'f';
auto dstIndex = dst % RegisterCountFlt;
os << reg << dstIndex << std::endl;
}
void Instruction::h_FADD_R(std::ostream& os) const {
auto dstIndex = dst % 4;
auto srcIndex = src % 4;
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegisterCountFlt;
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FADD_M(std::ostream& os) const {
auto dstIndex = dst % 4;
auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << ", ";
genAddressReg(os);
os << std::endl;
}
void Instruction::h_FSUB_R(std::ostream& os) const {
auto dstIndex = dst % 4;
auto srcIndex = src % 4;
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegisterCountFlt;
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FSUB_M(std::ostream& os) const {
auto dstIndex = dst % 4;
auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << ", ";
genAddressReg(os);
os << std::endl;
}
void Instruction::h_FSCAL_R(std::ostream& os) const {
auto dstIndex = dst % 4;
auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << std::endl;
}
void Instruction::h_FMUL_R(std::ostream& os) const {
auto dstIndex = dst % 4;
auto srcIndex = src % 4;
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegisterCountFlt;
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FDIV_M(std::ostream& os) const {
auto dstIndex = dst % 4;
auto dstIndex = dst % RegisterCountFlt;
os << "e" << dstIndex << ", ";
genAddressReg(os);
os << std::endl;
}
void Instruction::h_FSQRT_R(std::ostream& os) const {
auto dstIndex = dst % 4;
auto dstIndex = dst % RegisterCountFlt;
os << "e" << dstIndex << std::endl;
}
@ -280,7 +279,7 @@ namespace randomx {
}
void Instruction::h_COND_R(std::ostream& os) const {
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl;
os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
@ -297,7 +296,6 @@ namespace randomx {
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
const char* Instruction::names[256] = {
//Integer
INST_NAME(IADD_RS)
INST_NAME(IADD_M)
INST_NAME(ISUB_R)
@ -314,33 +312,22 @@ namespace randomx {
INST_NAME(IXOR_M)
INST_NAME(IROR_R)
INST_NAME(ISWAP_R)
//Common floating point
INST_NAME(FSWAP_R)
//Floating point group F
INST_NAME(FADD_R)
INST_NAME(FADD_M)
INST_NAME(FSUB_R)
INST_NAME(FSUB_M)
INST_NAME(FSCAL_R)
//Floating point group E
INST_NAME(FMUL_R)
INST_NAME(FDIV_M)
INST_NAME(FSQRT_R)
//Control
INST_NAME(COND_R)
INST_NAME(CFROUND)
INST_NAME(ISTORE)
INST_NAME(NOP)
};
InstructionFormatter Instruction::engine[256] = {
//Integer
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
@ -358,22 +345,15 @@ namespace randomx {
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
//Common floating point
INST_HANDLE(FSWAP_R)
//Floating point group F
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
//Floating point group E
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(COND_R)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)

@ -103,36 +103,36 @@ namespace randomx {
void genAddressReg(std::ostream& os) const;
void genAddressImm(std::ostream& os) const;
void genAddressRegDst(std::ostream&) const;
void h_IADD_RS(std::ostream&) const;
void h_IADD_M(std::ostream&) const;
void h_ISUB_R(std::ostream&) const;
void h_ISUB_M(std::ostream&) const;
void h_IMUL_R(std::ostream&) const;
void h_IMUL_M(std::ostream&) const;
void h_IMULH_R(std::ostream&) const;
void h_IMULH_M(std::ostream&) const;
void h_ISMULH_R(std::ostream&) const;
void h_ISMULH_M(std::ostream&) const;
void h_IMUL_RCP(std::ostream&) const;
void h_INEG_R(std::ostream&) const;
void h_IXOR_R(std::ostream&) const;
void h_IXOR_M(std::ostream&) const;
void h_IROR_R(std::ostream&) const;
void h_IROL_R(std::ostream&) const;
void h_ISWAP_R(std::ostream&) const;
void h_FSWAP_R(std::ostream&) const;
void h_FADD_R(std::ostream&) const;
void h_FADD_M(std::ostream&) const;
void h_FSUB_R(std::ostream&) const;
void h_FSUB_M(std::ostream&) const;
void h_FSCAL_R(std::ostream&) const;
void h_FMUL_R(std::ostream&) const;
void h_FDIV_M(std::ostream&) const;
void h_FSQRT_R(std::ostream&) const;
void h_COND_R(std::ostream&) const;
void h_CFROUND(std::ostream&) const;
void h_ISTORE(std::ostream&) const;
void h_NOP(std::ostream&) const;
void h_IADD_RS(std::ostream&) const;
void h_IADD_M(std::ostream&) const;
void h_ISUB_R(std::ostream&) const;
void h_ISUB_M(std::ostream&) const;
void h_IMUL_R(std::ostream&) const;
void h_IMUL_M(std::ostream&) const;
void h_IMULH_R(std::ostream&) const;
void h_IMULH_M(std::ostream&) const;
void h_ISMULH_R(std::ostream&) const;
void h_ISMULH_M(std::ostream&) const;
void h_IMUL_RCP(std::ostream&) const;
void h_INEG_R(std::ostream&) const;
void h_IXOR_R(std::ostream&) const;
void h_IXOR_M(std::ostream&) const;
void h_IROR_R(std::ostream&) const;
void h_IROL_R(std::ostream&) const;
void h_ISWAP_R(std::ostream&) const;
void h_FSWAP_R(std::ostream&) const;
void h_FADD_R(std::ostream&) const;
void h_FADD_M(std::ostream&) const;
void h_FSUB_R(std::ostream&) const;
void h_FSUB_M(std::ostream&) const;
void h_FSCAL_R(std::ostream&) const;
void h_FMUL_R(std::ostream&) const;
void h_FDIV_M(std::ostream&) const;
void h_FSQRT_R(std::ostream&) const;
void h_COND_R(std::ostream&) const;
void h_CFROUND(std::ostream&) const;
void h_ISTORE(std::ostream&) const;
void h_NOP(std::ostream&) const;
};
static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");

@ -318,18 +318,6 @@ constexpr uint64_t ieee_get_exponent_mask() {
return (uint64_t)(E + 1023U) << 52;
}
template<int E>
__m128d ieee_set_exponent(__m128d x) {
static_assert(E > -1023, "Invalid exponent value");
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
constexpr uint64_t exponent64 = (uint64_t)(E + 1023U) << 52;
const __m128d exponentMask = _mm_castsi128_pd(_mm_set_epi64x(exponent64, exponent64));
x = _mm_and_pd(x, mantissaMask);
x = _mm_or_pd(x, exponentMask);
return x;
}
double loadDoublePortable(const void* addr);
uint64_t mulh(uint64_t, uint64_t);
int64_t smulh(int64_t, int64_t);

@ -20,8 +20,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <stdexcept>
#include "jit_compiler_x86.hpp"
#define RANDOMX_JUMP
#if !defined(_M_X64) && !defined(__x86_64__)
namespace randomx {
@ -113,7 +111,6 @@ namespace randomx {
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
@ -128,8 +125,7 @@ namespace randomx {
const int32_t prologueSize = codeLoopBegin - codePrologue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight;
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
@ -299,7 +295,7 @@ namespace randomx {
}
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
#ifdef RANDOMX_JUMP
#if RANDOMX_JUMP
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
@ -336,7 +332,7 @@ namespace randomx {
}
void JitCompilerX86::generateCode(Instruction& instr, int i) {
#ifdef RANDOMX_JUMP
#if RANDOMX_JUMP
instructionOffsets.push_back(codePos);
#endif
auto generator = engine[instr.opcode];
@ -467,15 +463,6 @@ namespace randomx {
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
/*if (instr.src != instr.dst) {
emit(REX_ADD_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
emit(REX_81);
emitByte(0xc0 + instr.dst);
emit32(instr.getImm32());
}*/
emit(REX_LEA);
if (instr.dst == RegisterNeedsDisplacement)
emitByte(0xac);
@ -505,14 +492,6 @@ namespace randomx {
emitByte((scale << 6) | (index << 3) | base);
}
void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_LEA);
emitByte(0x84 + 8 * instr.dst);
genSIB(0, instr.src, instr.dst);
emit32(instr.getImm32());
}
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
@ -541,14 +520,6 @@ namespace randomx {
}
}
void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_LEA);
emitByte(0x84 + 8 * instr.dst);
genSIB(3, instr.dst, instr.dst);
emit32(instr.getImm32());
}
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
@ -645,10 +616,6 @@ namespace randomx {
}
}
void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) {
}
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_NEG);
@ -729,17 +696,14 @@ namespace randomx {
}
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
emit(REX_ADDPD);
emitByte(0xc0 + instr.src + 8 * instr.dst);
//emit(REX_PADD);
//emitByte(PADD_OPCODES[instr.mod % 4]);
//emitByte(0xf8 + instr.dst);
}
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ADDPD);
@ -747,17 +711,14 @@ namespace randomx {
}
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
emit(REX_SUBPD);
emitByte(0xc0 + instr.src + 8 * instr.dst);
//emit(REX_PADD);
//emitByte(PADD_OPCODES[instr.mod % 4]);
//emitByte(0xf8 + instr.dst);
}
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_SUBPD);
@ -765,40 +726,20 @@ namespace randomx {
}
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
emit(REX_XORPS);
emitByte(0xc7 + 8 * instr.dst);
}
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
emit(REX_MULPD);
emitByte(0xe0 + instr.src + 8 * instr.dst);
}
void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) {
instr.dst %= 4;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ANDPS_XMM12);
emit(REX_MULPD);
emitByte(0xe4 + 8 * instr.dst);
emit(REX_MAXPD);
emitByte(0xe5 + 8 * instr.dst);
}
void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
emit(REX_DIVPD);
emitByte(0xe0 + instr.src + 8 * instr.dst);
emit(REX_MAXPD);
emitByte(0xe5 + 8 * instr.dst);
}
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ANDPS_XMM12);
@ -807,7 +748,7 @@ namespace randomx {
}
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.dst %= RegisterCountFlt;
emit(SQRTPD);
emitByte(0xe4 + 9 * instr.dst);
}
@ -883,7 +824,7 @@ namespace randomx {
void JitCompilerX86::handleCondition(Instruction& instr, int i) {
const int shift = instr.getModShift();
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
emit(REX_ADD_I);
@ -900,7 +841,7 @@ namespace randomx {
}
void JitCompilerX86::h_COND_R(Instruction& instr, int i) {
#ifdef RANDOMX_JUMP
#if RANDOMX_JUMP
handleCondition(instr, i);
#endif
emit(XOR_ECX_ECX);
@ -914,40 +855,15 @@ namespace randomx {
emitByte(0xc1 + 8 * instr.dst);
}
void JitCompilerX86::h_COND_M(Instruction& instr, int i) {
#ifdef RANDOMX_JUMP
handleCondition(instr, i);
#endif
emit(XOR_ECX_ECX);
genAddressReg(instr);
emit(REX_CMP_M32I);
emit32(instr.getImm32());
emitByte(0x0f);
emitByte(condition(instr));
emitByte(0xc1);
emit(REX_ADD_RM);
emitByte(0xc1 + 8 * instr.dst);
}
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
//if (instr.getModCond())
emit(REX_MOV_MR);
//else
// emit(MOVNTI);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}
void JitCompilerX86::h_FSTORE(Instruction& instr, int i) {
genAddressRegDst(instr, true);
emit(MOVAPD);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
emitByte(0x90);
emit(NOP1);
}
#include "instruction_weights.hpp"

@ -110,43 +110,36 @@ namespace randomx {
codePos += count;
}
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_IADD_RC(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_9C(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_ISDIV_C(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FMUL_M(Instruction&, int);
void h_FDIV_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_COND_R(Instruction&, int);
void h_COND_M(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_FSTORE(Instruction&, int);
void h_NOP(Instruction&, int);
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_COND_R(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_NOP(Instruction&, int);
};
}

@ -31,7 +31,6 @@
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_light)
.global DECL(randomx_program_read_dataset_sshash_init)
.global DECL(randomx_program_read_dataset_sshash_fin)
.global DECL(randomx_program_loop_store)
@ -66,9 +65,6 @@ DECL(randomx_program_start):
DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc"
DECL(randomx_program_read_dataset_light):
#include "asm/program_read_dataset_light.inc"
DECL(randomx_program_read_dataset_sshash_init):
#include "asm/program_read_dataset_sshash_init.inc"

@ -24,7 +24,6 @@ PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_read_dataset_light
PUBLIC randomx_program_read_dataset_sshash_init
PUBLIC randomx_program_read_dataset_sshash_fin
PUBLIC randomx_dataset_init
@ -62,10 +61,6 @@ randomx_program_read_dataset PROC
include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP
randomx_program_read_dataset_light PROC
include asm/program_read_dataset_light.inc
randomx_program_read_dataset_light ENDP
randomx_program_read_dataset_sshash_init PROC
include asm/program_read_dataset_sshash_init.inc
randomx_program_read_dataset_sshash_init ENDP

@ -25,7 +25,6 @@ extern "C" {
void randomx_program_loop_load();
void randomx_program_start();
void randomx_program_read_dataset();
void randomx_program_read_dataset_light();
void randomx_program_read_dataset_sshash_init();
void randomx_program_read_dataset_sshash_fin();
void randomx_program_loop_store();

@ -76,22 +76,6 @@ void randomx_vm::initialize() {
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
}
//TODO
std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) {
for (int i = 0; i < randomx::RegistersCount; ++i)
os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
for (int i = 0; i < 4; ++i)
os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
<< " = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec;
for (int i = 0; i < 4; ++i)
os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl
<< " = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec;
for (int i = 0; i < 4; ++i)
os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl
<< " = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec;
return os;
}
namespace randomx {
alignas(16) volatile static __m128i aesDummy;

@ -17,10 +17,6 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
//#define TRACE
//#define FPUCHECK
#define RANDOMX_JUMP
#include <iostream>
#include <iomanip>
#include <stdexcept>
@ -33,12 +29,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "intrin_portable.h"
#include "reciprocal.h"
#ifdef FPUCHECK
constexpr bool fpuCheck = true;
#else
constexpr bool fpuCheck = false;
#endif
namespace randomx {
static int_reg_t Zero = 0;
@ -53,49 +43,16 @@ namespace randomx {
void InterpretedVm<Allocator, softAes>::run(void* seed) {
VmBase<Allocator, softAes>::generateProgram(seed);
randomx_vm::initialize();
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
program(i).src %= RegistersCount;
program(i).dst %= RegistersCount;
}
execute();
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
executeBytecode(ic, r, f, e, a);
void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
executeBytecode(pc, r, f, e, a);
}
}
static void print(int_reg_t r) {
std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
}
static void print(__m128d f) {
uint64_t lo = *(((uint64_t*)&f) + 0);
uint64_t hi = *(((uint64_t*)&f) + 1);
std::cout << std::hex << std::setw(16) << std::setfill('0') << hi << '-' << std::hex << std::setw(16) << std::setfill('0') << lo << std::endl;
}
static void printState(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
for (int i = 0; i < 8; ++i) {
std::cout << "r" << i << " = "; print(r[i]);
}
for (int i = 0; i < 4; ++i) {
std::cout << "f" << i << " = "; print(f[i]);
}
for (int i = 0; i < 4; ++i) {
std::cout << "e" << i << " = "; print(e[i]);
}
for (int i = 0; i < 4; ++i) {
std::cout << "a" << i << " = "; print(a[i]);
}
}
static bool isDenormal(double x) {
return std::fpclassify(x) == FP_SUBNORMAL;
}
template<class Allocator, bool softAes>
FORCE_INLINE void* InterpretedVm<Allocator, softAes>::getScratchpadAddress(InstructionByteCode& ibc) {
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
@ -113,9 +70,8 @@ namespace randomx {
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic];
if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
void InterpretedVm<Allocator, softAes>::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
auto& ibc = byteCode[pc];
switch (ibc.type)
{
case InstructionType::IADD_RS: {
@ -225,11 +181,11 @@ namespace randomx {
} break;
case InstructionType::COND_R: {
#ifdef RANDOMX_JUMP
#if RANDOMX_JUMP
*ibc.creg += (1 << ibc.shift);
const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift;
if ((*ibc.creg & conditionMask) == 0) {
ic = ibc.target;
pc = ibc.target;
break;
}
#endif
@ -251,50 +207,23 @@ namespace randomx {
default:
UNREACHABLE;
}
if (trace && ibc.type != InstructionType::NOP) {
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
print(*ibc.idst);
else //if(ibc.type >= 20 && ibc.type <= 30)
print(0);
}
#ifdef FPUCHECK
if (ibc.type >= 26 && ibc.type <= 30) {
double lo = *(((double*)ibc.fdst) + 0);
double hi = *(((double*)ibc.fdst) + 1);
if (lo <= 0 || hi <= 0) {
std::stringstream ss;
ss << "Underflow in operation " << ibc.type;
printState(r, f, e, a);
throw std::runtime_error(ss.str());
}
}
#endif
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::execute() {
int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
__m128d f[4];
__m128d e[4];
__m128d a[4];
int_reg_t r[RegistersCount] = { 0 };
__m128d f[RegisterCountFlt];
__m128d e[RegisterCountFlt];
__m128d a[RegisterCountFlt];
a[0] = _mm_load_pd(&reg.a[0].lo);
a[1] = _mm_load_pd(&reg.a[1].lo);
a[2] = _mm_load_pd(&reg.a[2].lo);
a[3] = _mm_load_pd(&reg.a[3].lo);
for(unsigned i = 0; i < RegisterCountFlt; ++i)
a[i] = _mm_load_pd(&reg.a[i].lo);
precompileProgram(r, f, e, a);
uint32_t spAddr0 = mem.mx;
uint32_t spAddr1 = mem.ma;
if (trace) {
std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
printState(r, f, e, a);
}
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
spAddr0 ^= spMix;
@ -302,31 +231,14 @@ namespace randomx {
spAddr1 ^= spMix >> 32;
spAddr1 &= ScratchpadL3Mask64;
r[0] ^= load64(scratchpad + spAddr0 + 0);
r[1] ^= load64(scratchpad + spAddr0 + 8);
r[2] ^= load64(scratchpad + spAddr0 + 16);
r[3] ^= load64(scratchpad + spAddr0 + 24);
r[4] ^= load64(scratchpad + spAddr0 + 32);
r[5] ^= load64(scratchpad + spAddr0 + 40);
r[6] ^= load64(scratchpad + spAddr0 + 48);
r[7] ^= load64(scratchpad + spAddr0 + 56);
f[0] = load_cvt_i32x2(scratchpad + spAddr1 + 0);
f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
if (trace) {
std::cout << "iteration " << std::dec << ic << std::endl;
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
printState(r, f, e, a);
std::cout << "-----------------------------------" << std::endl;
}
for (unsigned i = 0; i < RegistersCount; ++i)
r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
f[i] = load_cvt_i32x2(scratchpad + spAddr1 + 8 * i);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
e[i] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
executeBytecode(r, f, e, a);
@ -335,72 +247,33 @@ namespace randomx {
datasetRead(datasetOffset + mem.ma, r);
std::swap(mem.mx, mem.ma);
if (trace) {
std::cout << "iteration " << std::dec << ic << std::endl;
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
printState(r, f, e, a);
std::cout << "===================================" << std::endl;
}
for (unsigned i = 0; i < RegistersCount; ++i)
store64(scratchpad + spAddr1 + 8 * i, r[i]);
store64(scratchpad + spAddr1 + 0, r[0]);
store64(scratchpad + spAddr1 + 8, r[1]);
store64(scratchpad + spAddr1 + 16, r[2]);
store64(scratchpad + spAddr1 + 24, r[3]);
store64(scratchpad + spAddr1 + 32, r[4]);
store64(scratchpad + spAddr1 + 40, r[5]);
store64(scratchpad + spAddr1 + 48, r[6]);
store64(scratchpad + spAddr1 + 56, r[7]);
f[0] = _mm_xor_pd(f[0], e[0]);
f[1] = _mm_xor_pd(f[1], e[1]);
f[2] = _mm_xor_pd(f[2], e[2]);
f[3] = _mm_xor_pd(f[3], e[3]);
#ifdef FPUCHECK
for(int i = 0; i < 4; ++i) {
double lo = *(((double*)&f[i]) + 0);
double hi = *(((double*)&f[i]) + 1);
if (isDenormal(lo) || isDenormal(hi)) {
std::stringstream ss;
ss << "Denormal f" << i;
throw std::runtime_error(ss.str());
}
}
#endif
for (unsigned i = 0; i < RegisterCountFlt; ++i)
f[i] = _mm_xor_pd(f[i], e[i]);
_mm_store_pd((double*)(scratchpad + spAddr0 + 0), f[0]);
_mm_store_pd((double*)(scratchpad + spAddr0 + 16), f[1]);
_mm_store_pd((double*)(scratchpad + spAddr0 + 32), f[2]);
_mm_store_pd((double*)(scratchpad + spAddr0 + 48), f[3]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
_mm_store_pd((double*)(scratchpad + spAddr0 + 16 * i), f[i]);
spAddr0 = 0;
spAddr1 = 0;
}
store64(&reg.r[0], r[0]);
store64(&reg.r[1], r[1]);
store64(&reg.r[2], r[2]);
store64(&reg.r[3], r[3]);
store64(&reg.r[4], r[4]);
store64(&reg.r[5], r[5]);
store64(&reg.r[6], r[6]);
store64(&reg.r[7], r[7]);
_mm_store_pd(&reg.f[0].lo, f[0]);
_mm_store_pd(&reg.f[1].lo, f[1]);
_mm_store_pd(&reg.f[2].lo, f[2]);
_mm_store_pd(&reg.f[3].lo, f[3]);
_mm_store_pd(&reg.e[0].lo, e[0]);
_mm_store_pd(&reg.e[1].lo, e[1]);
_mm_store_pd(&reg.e[2].lo, e[2]);
_mm_store_pd(&reg.e[3].lo, e[3]);
for (unsigned i = 0; i < RegistersCount; ++i)
store64(&reg.r[i], r[i]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
_mm_store_pd(&reg.f[i].lo, f[i]);
for (unsigned i = 0; i < RegisterCountFlt; ++i)
_mm_store_pd(&reg.e[i].lo, e[i]);
}
static int getConditionRegister(int(&registerUsage)[8]) {
static int getConditionRegister(int(&registerUsage)[RegistersCount]) {
int min = INT_MAX;
int minIndex;
for (unsigned i = 0; i < 8; ++i) {
for (unsigned i = 0; i < RegistersCount; ++i) {
if (registerUsage[i] < min) {
min = registerUsage[i];
minIndex = i;
@ -410,7 +283,7 @@ namespace randomx {
}
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[8]) {
void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[RegistersCount]) {
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
for (int i = 0; i < RegistersCount; ++i)
r[i] ^= datasetLine[i];
@ -419,9 +292,9 @@ namespace randomx {
#include "instruction_weights.hpp"
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
int registerUsage[8];
for (unsigned i = 0; i < 8; ++i) {
void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
int registerUsage[RegistersCount];
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
@ -443,7 +316,7 @@ namespace randomx {
ibc.shift = instr.getModMem();
ibc.imm = signExtend2sCompl(instr.getImm32());
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IADD_M) {
@ -452,7 +325,7 @@ namespace randomx {
ibc.type = InstructionType::IADD_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -460,7 +333,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(ISUB_R) {
@ -475,7 +348,7 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(ISUB_M) {
@ -484,7 +357,7 @@ namespace randomx {
ibc.type = InstructionType::ISUB_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -492,7 +365,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IMUL_R) {
@ -507,7 +380,7 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IMUL_M) {
@ -516,7 +389,7 @@ namespace randomx {
ibc.type = InstructionType::IMUL_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -524,7 +397,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IMULH_R) {
@ -533,7 +406,7 @@ namespace randomx {
ibc.type = InstructionType::IMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IMULH_M) {
@ -542,7 +415,7 @@ namespace randomx {
ibc.type = InstructionType::IMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -550,7 +423,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(ISMULH_R) {
@ -559,7 +432,7 @@ namespace randomx {
ibc.type = InstructionType::ISMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(ISMULH_M) {
@ -568,7 +441,7 @@ namespace randomx {
ibc.type = InstructionType::ISMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -576,7 +449,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IMUL_RCP) {
@ -587,7 +460,7 @@ namespace randomx {
ibc.idst = &r[dst];
ibc.imm = randomx_reciprocal(divisor);
ibc.isrc = &ibc.imm;
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
}
else {
ibc.type = InstructionType::NOP;
@ -598,7 +471,7 @@ namespace randomx {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &r[dst];
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IXOR_R) {
@ -613,7 +486,7 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IXOR_M) {
@ -622,7 +495,7 @@ namespace randomx {
ibc.type = InstructionType::IXOR_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -630,7 +503,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IROR_R) {
@ -645,7 +518,7 @@ namespace randomx {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(IROL_R) {
@ -660,7 +533,7 @@ namespace randomx {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[instr.dst] = i;
registerUsage[dst] = i;
} break;
CASE_REP(ISWAP_R) {
@ -670,8 +543,8 @@ namespace randomx {
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.type = InstructionType::ISWAP_R;
registerUsage[instr.dst] = i;
registerUsage[instr.src] = i;
registerUsage[dst] = i;
registerUsage[src] = i;
}
else {
ibc.type = InstructionType::NOP;
@ -681,23 +554,23 @@ namespace randomx {
CASE_REP(FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
if (dst < 4)
if (dst < RegisterCountFlt)
ibc.fdst = &f[dst];
else
ibc.fdst = &e[dst - 4];
ibc.fdst = &e[dst - RegisterCountFlt];
} break;
CASE_REP(FADD_R) {
auto dst = instr.dst % 4;
auto src = instr.src % 4;
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
ibc.fdst = &f[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FADD_M) {
auto dst = instr.dst % 4;
auto src = instr.src % 8;
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
@ -706,16 +579,16 @@ namespace randomx {
} break;
CASE_REP(FSUB_R) {
auto dst = instr.dst % 4;
auto src = instr.src % 4;
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
ibc.fdst = &f[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FSUB_M) {
auto dst = instr.dst % 4;
auto src = instr.src % 8;
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
@ -724,22 +597,22 @@ namespace randomx {
} break;
CASE_REP(FSCAL_R) {
auto dst = instr.dst % 4;
auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &f[dst];
ibc.type = InstructionType::FSCAL_R;
} break;
CASE_REP(FMUL_R) {
auto dst = instr.dst % 4;
auto src = instr.src % 4;
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
ibc.fdst = &e[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FDIV_M) {
auto dst = instr.dst % 4;
auto src = instr.src % 8;
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
ibc.fdst = &e[dst];
ibc.isrc = &r[src];
@ -748,7 +621,7 @@ namespace randomx {
} break;
CASE_REP(FSQRT_R) {
auto dst = instr.dst % 4;
auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &e[dst];
} break;
@ -766,13 +639,13 @@ namespace randomx {
ibc.target = registerUsage[reg];
ibc.shift = instr.getModShift();
ibc.creg = &r[reg];
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
registerUsage[j] = i;
}
} break;
CASE_REP(CFROUND) {
auto src = instr.src % 8;
auto src = instr.src % RegistersCount;
ibc.isrc = &r[src];
ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63;

@ -71,12 +71,12 @@ namespace randomx {
void run(void* seed) override;
void setDataset(randomx_dataset* dataset) override;
protected:
virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]);
virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[RegistersCount]);
private:
void execute();
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
void executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
void* getScratchpadAddress(InstructionByteCode& ibc);
__m128d maskRegisterExponentMantissa(__m128d);

@ -106,7 +106,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<SDLCheck>false</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>

@ -26,20 +26,20 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>

Loading…
Cancel
Save