diff --git a/doc/program.asm b/doc/program.asm index 4016aa5..c203d1c 100644 --- a/doc/program.asm +++ b/doc/program.asm @@ -19,9 +19,9 @@ randomx_isn_4: mov ecx, r13d ror r11, cl randomx_isn_5: - ; CBRANCH -1891017657, COND 15 - add r10, -1886823353 - test r10, 2139095040 + ; CBRANCH r7, -1891017657, COND 15 + add r15, -1886823353 + test r15, 2139095040 jz randomx_isn_0 randomx_isn_6: ; ISUB_M r3, L1[r7-1023302103] @@ -41,10 +41,10 @@ randomx_isn_10: ; FADD_R f0, a3 addpd xmm0, xmm11 randomx_isn_11: - ; CBRANCH -1981570318, COND 4 - add r8, -1981566222 - test r8, 1044480 - jz randomx_isn_6 + ; CBRANCH r3, -1981570318, COND 4 + add r11, -1981566222 + test r11, 1044480 + jz randomx_isn_10 randomx_isn_12: ; FSUB_R f0, a1 subpd xmm0, xmm9 @@ -55,9 +55,9 @@ randomx_isn_14: ; FSQRT_R e2 sqrtpd xmm6, xmm6 randomx_isn_15: - ; CBRANCH -1278791788, COND 14 - add r11, -1278791788 - test r11, 1069547520 + ; CBRANCH r5, -1278791788, COND 14 + add r13, -1278791788 + test r13, 1069547520 jz randomx_isn_12 randomx_isn_16: ; ISUB_R r3, -1310797453 @@ -167,10 +167,10 @@ randomx_isn_42: ; ISUB_R r4, r3 sub r12, r11 randomx_isn_43: - ; CBRANCH 335851892, COND 5 - add r13, 335847796 - test r13, 2088960 - jz randomx_isn_16 + ; CBRANCH r6, 335851892, COND 5 + add r14, 335847796 + test r14, 2088960 + jz randomx_isn_25 randomx_isn_44: ; IADD_RS r7, r5, SHFT 3 lea r15, [r15+r13*8] @@ -194,10 +194,10 @@ randomx_isn_48: ; IMUL_R r0, r5 imul r8, r13 randomx_isn_49: - ; CBRANCH -272659465, COND 15 - add r9, -272659465 - test r9, 2139095040 - jz randomx_isn_44 + ; CBRANCH r2, -272659465, COND 15 + add r10, -272659465 + test r10, 2139095040 + jz randomx_isn_48 randomx_isn_50: ; ISTORE L1[r6+1414933948], r5 lea eax, [r14d+1414933948] @@ -212,9 +212,9 @@ randomx_isn_52: ; FSCAL_R f1 xorps xmm1, xmm15 randomx_isn_53: - ; CBRANCH -2143810604, COND 1 - add r12, -2143810860 - test r12, 130560 + ; CBRANCH r6, -2143810604, COND 1 + add r14, -2143810860 + test r14, 130560 jz randomx_isn_50 randomx_isn_54: ; ISUB_M r3, L1[r1-649360673] @@ -241,7 +241,7 @@ randomx_isn_58: ; IADD_RS r4, r2, SHFT 1 lea r12, [r12+r10*2] randomx_isn_59: - ; CBRANCH -704407571, COND 10 + ; CBRANCH r6, -704407571, COND 10 add r14, -704276499 test r14, 66846720 jz randomx_isn_54 @@ -263,9 +263,9 @@ randomx_isn_64: and eax, 2097144 mov qword ptr [rsi+rax], r8 randomx_isn_65: - ; CBRANCH -67701844, COND 5 - add r15, -67705940 - test r15, 2088960 + ; CBRANCH r1, -67701844, COND 5 + add r9, -67705940 + test r9, 2088960 jz randomx_isn_60 randomx_isn_66: ; IROR_R r3, r1 @@ -345,10 +345,10 @@ randomx_isn_86: mul r12 mov r11, rdx randomx_isn_87: - ; CBRANCH -1821955951, COND 5 - add r8, -1821955951 - test r8, 2088960 - jz randomx_isn_66 + ; CBRANCH r3, -1821955951, COND 5 + add r11, -1821955951 + test r11, 2088960 + jz randomx_isn_87 randomx_isn_88: ; FADD_R f2, a3 addpd xmm2, xmm11 @@ -356,9 +356,9 @@ randomx_isn_89: ; IXOR_R r6, r3 xor r14, r11 randomx_isn_90: - ; CBRANCH -1780348372, COND 15 - add r9, -1784542676 - test r9, 2139095040 + ; CBRANCH r4, -1780348372, COND 15 + add r12, -1784542676 + test r12, 2139095040 jz randomx_isn_88 randomx_isn_91: ; IROR_R r4, 55 @@ -403,10 +403,10 @@ randomx_isn_102: ; IXOR_R r4, r7 xor r12, r15 randomx_isn_103: - ; CBRANCH -607792642, COND 4 - add r11, -607792642 - test r11, 1044480 - jz randomx_isn_91 + ; CBRANCH r7, -607792642, COND 4 + add r15, -607792642 + test r15, 1044480 + jz randomx_isn_99 randomx_isn_104: ; FMUL_R e1, a1 mulpd xmm5, xmm9 @@ -484,10 +484,10 @@ randomx_isn_123: ; FSQRT_R e2 sqrtpd xmm6, xmm6 randomx_isn_124: - ; CBRANCH -1807592127, COND 12 - add r15, -1806543551 - test r15, 267386880 - jz randomx_isn_104 + ; CBRANCH r1, -1807592127, COND 12 + add r9, -1806543551 + test r9, 267386880 + jz randomx_isn_118 randomx_isn_125: ; IADD_RS r4, r4, SHFT 0 lea r12, [r12+r12*1] @@ -703,10 +703,10 @@ randomx_isn_180: and eax, 16376 xor r15, qword ptr [rsi+rax] randomx_isn_181: - ; CBRANCH -759703940, COND 2 - add r14, -759704452 - test r14, 261120 - jz randomx_isn_144 + ; CBRANCH r2, -759703940, COND 2 + add r10, -759704452 + test r10, 261120 + jz randomx_isn_175 randomx_isn_182: ; FADD_R f1, a2 addpd xmm1, xmm10 @@ -781,9 +781,9 @@ randomx_isn_202: ; FSUB_R f0, a0 subpd xmm0, xmm8 randomx_isn_203: - ; CBRANCH -1282235504, COND 2 - add r12, -1282234992 - test r12, 261120 + ; CBRANCH r1, -1282235504, COND 2 + add r9, -1282234992 + test r9, 261120 jz randomx_isn_182 randomx_isn_204: ; IMUL_M r1, L3[176744] @@ -792,9 +792,9 @@ randomx_isn_205: ; FSWAP_R e1 shufpd xmm5, xmm5, 1 randomx_isn_206: - ; CBRANCH -1557284726, COND 14 - add r10, -1555187574 - test r10, 1069547520 + ; CBRANCH r0, -1557284726, COND 14 + add r8, -1555187574 + test r8, 1069547520 jz randomx_isn_204 randomx_isn_207: ; IADD_M r3, L1[r0+72267507] @@ -945,10 +945,10 @@ randomx_isn_246: mov rax, 9887096364157721599 imul r12, rax randomx_isn_247: - ; CBRANCH -722123512, COND 2 - add r13, -722123512 - test r13, 261120 - jz randomx_isn_217 + ; CBRANCH r3, -722123512, COND 2 + add r11, -722123512 + test r11, 261120 + jz randomx_isn_246 randomx_isn_248: ; ISMULH_R r7, r6 mov rax, r15 @@ -977,7 +977,7 @@ randomx_isn_254: ; FMUL_R e3, a2 mulpd xmm7, xmm10 randomx_isn_255: - ; CBRANCH -2007380935, COND 9 - add r9, -2007315399 - test r9, 33423360 - jz randomx_isn_248 + ; CBRANCH r7, -2007380935, COND 9 + add r15, -2007315399 + test r15, 33423360 + jz randomx_isn_249 diff --git a/doc/specs.md b/doc/specs.md index 5154849..ed1d136 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -598,51 +598,42 @@ There are 2 control instructions. |frequency|instruction|dst|src|operation| |-|-|-|-|-| -|1/256|CFROUND|`fprc`|R|`fprc = src >>> imm32` -|16/256|CBRANCH|-|-|(conditional jump) +|1/256|CFROUND|-|R|`fprc = src >>> imm32` +|16/256|CBRANCH|R|-|`dst = dst + cimm`, conditional jump #### 5.4.1 CFROUND This instruction calculates a 2-bit value by rotating the source register right by `imm32` bits and taking the 2 least significant bits (the value of the source register is unaffected). The result is stored in the `fprc` register. This changes the rounding mode of all subsequent floating point instructions. #### 5.4.2 CBRANCH -This instruction performs a conditional jump in the Program Buffer. It uses an implicit integer register operand `creg`. This register is determined based on preceding instructions. For this purpose, the VM assigns each integer register two tag values: +This instruction adds an immediate value `cimm` (constructed from `imm32`, see below) to the destination register and then performs a conditional jump in the Program Buffer based on the value of the destination register. The target of the jump is the instruction following the instruction when register `dst` was last modified. -* `lastUsed` - the index of the instruction when the register was last modified. The initial value at the start of each program iteration is `-1`, meaning the register is unmodified. -* `count` - the number of times the register has been selected as the operand of a CBRANCH instruction. The initial value at the start of each program iteration is `0`. - -A register is considered as modified by an instruction in the following cases: +At the beginning of each program iteration, all registers are considered to be unmodified. A register is considered as modified by an instruction in the following cases: * It is the destination register of an integer instruction except IMUL_RCP and ISWAP_R. * It is the destination register of IMUL_RCP and `imm32` is not zero or a power of 2. * It is the source or the destination register of ISWAP_R and the destination and source registers are distinct. * The CBRANCH instruction is considered to modify all integer registers. -There are 3 rules for the selection of the `creg` register, evaluated in this order: - -1. The register with the lowest value of `lastUsed` tag is selected. -1. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected from them. -1. In case multiple registers have the same values of both `lastUsed` and `count` tags, the register with the lowest index is selected (`r0` before `r1` etc.) from them. - -Whenever a register is selected as the operand of a CBRANCH instruction, its `count` tag is increased by 1. +If register `dst` has not been modified yet, the jump target is the first instruction in the Program Buffer. The CBRANCH instruction performs the following steps: 1. A constant `b` is calculated as `mod.cond + RANDOMX_JUMP_OFFSET`. 1. A constant `cimm` is constructed as sign-extended `imm32` with bit `b` set to 1 and bit `b-1` set to 0 (if `b > 0`). -1. `cimm` is added to `creg`. -1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of `creg` are zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified). +1. `cimm` is added to the destination register. +1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of the destination register are zero, the jump is executed (target is the instruction following the instruction where `dst` was last modified). Bits in immediate and register values are numbered from 0 to 63 with 0 being the least significant bit. For example, for `b = 10` and `RANDOMX_JUMP_BITS = 8`, the bits are arranged like this: ``` cimm = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMMMMMMMMMMMMMMMMMMMMM10MMMMMMMMM -creg = ..............................................XXXXXXXX.......... + dst = ..............................................XXXXXXXX.......... ``` -`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value would be added to `creg`. +`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value will be added to `dst`. -The second line uses `X` to mark bits of `creg` that would be checked by the condition. If all these bits are 0 after adding `cimm`, the jump is executed. +The second line uses `X` to mark bits of `dst` that will be checked by the condition. If all these bits are 0 after adding `cimm`, the jump is executed. The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program. diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index 2414e4b..b9866d6 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -53,8 +53,7 @@ namespace randomx { void AssemblyGeneratorX86::generateProgram(Program& prog) { for (unsigned i = 0; i < RegistersCount; ++i) { - registerUsage[i].lastUsed = -1; - registerUsage[i].count = 0; + registerUsage[i] = -1; } asmCode.str(std::string()); //clear for (unsigned i = 0; i < prog.getSize(); ++i) { @@ -270,7 +269,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if(instr.dst == RegisterNeedsDisplacement) asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; else @@ -279,7 +278,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; @@ -291,7 +290,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -302,7 +301,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; @@ -314,7 +313,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -325,7 +324,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; @@ -337,7 +336,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\tmul " << regR[instr.src] << std::endl; asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; @@ -345,7 +344,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, "ecx"); asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; @@ -360,7 +359,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\timul " << regR[instr.src] << std::endl; asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; @@ -368,7 +367,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, "ecx"); asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; @@ -383,13 +382,13 @@ namespace randomx { } void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; asmCode << "\tneg " << regR[instr.dst] << std::endl; traceint(instr); } void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -400,7 +399,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; @@ -412,7 +411,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl; @@ -424,7 +423,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl; @@ -438,7 +437,7 @@ namespace randomx { void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { uint64_t divisor = instr.getImm32(); if (!isPowerOf2(divisor)) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; traceint(instr); @@ -450,8 +449,8 @@ namespace randomx { void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { - registerUsage[instr.dst].lastUsed = i; - registerUsage[instr.src].lastUsed = i; + registerUsage[instr.dst] = i; + registerUsage[instr.src] = i; asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; traceint(instr); } @@ -538,9 +537,8 @@ namespace randomx { } void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) { - int reg = getConditionRegister(registerUsage); - int target = registerUsage[reg].lastUsed + 1; - registerUsage[reg].count++; + int reg = instr.dst; + int target = registerUsage[reg] + 1; int shift = instr.getModCond() + ConditionOffset; int32_t imm = instr.getImm32() | (1L << shift); if (ConditionOffset > 0 || shift > 0) @@ -550,7 +548,7 @@ namespace randomx { asmCode << "\tjz randomx_isn_" << target << std::endl; //mark all registers as used for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j].lastUsed = i; + registerUsage[j] = i; } } diff --git a/src/assembly_generator_x86.hpp b/src/assembly_generator_x86.hpp index 58b18a4..e962398 100644 --- a/src/assembly_generator_x86.hpp +++ b/src/assembly_generator_x86.hpp @@ -89,6 +89,6 @@ namespace randomx { static InstructionGenerator engine[256]; std::stringstream asmCode; - RegisterUsage registerUsage[RegistersCount]; + int registerUsage[RegistersCount]; }; } \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index 926c8fd..9df86f5 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -112,11 +112,6 @@ namespace randomx { double hi; }; - struct RegisterUsage { - int32_t lastUsed; - int32_t count; - }; - constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t); constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t); constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t); @@ -131,21 +126,6 @@ namespace randomx { constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register constexpr int RegisterNeedsSib = 4; //x86 r12 register - inline int getConditionRegister(RegisterUsage(®isterUsage)[RegistersCount]) { - int min = INT_MAX; - int minCount = 0; - int minIndex; - //prefer registers that have been used as a condition register fewer times - for (unsigned i = 0; i < RegistersCount; ++i) { - if (registerUsage[i].lastUsed < min || (registerUsage[i].lastUsed == min && registerUsage[i].count < minCount)) { - min = registerUsage[i].lastUsed; - minCount = registerUsage[i].count; - minIndex = i; - } - } - return minIndex; - } - inline bool isPowerOf2(uint64_t x) { return (x & (x - 1)) == 0; } diff --git a/src/instruction.cpp b/src/instruction.cpp index 221fb6f..feb53f1 100644 --- a/src/instruction.cpp +++ b/src/instruction.cpp @@ -288,7 +288,7 @@ namespace randomx { } void Instruction::h_CBRANCH(std::ostream& os) const { - os << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl; + os << "r" << (int)dst << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl; } void Instruction::h_ISTORE(std::ostream& os) const { diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 6955ed6..bb2ae76 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -267,8 +267,7 @@ namespace randomx { void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { instructionOffsets.clear(); for (unsigned i = 0; i < 8; ++i) { - registerUsage[i].lastUsed = -1; - registerUsage[i].count = 0; + registerUsage[i] = -1; } codePos = prologueSize; memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); @@ -435,7 +434,7 @@ namespace randomx { } void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; emit(REX_LEA); if (instr.dst == RegisterNeedsDisplacement) emitByte(0xac); @@ -447,7 +446,7 @@ namespace randomx { } void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_ADD_RM); @@ -466,7 +465,7 @@ namespace randomx { } void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_SUB_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -479,7 +478,7 @@ namespace randomx { } void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_SUB_RM); @@ -494,7 +493,7 @@ namespace randomx { } void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_IMUL_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -507,7 +506,7 @@ namespace randomx { } void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_IMUL_RM); @@ -522,7 +521,7 @@ namespace randomx { } void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -532,7 +531,7 @@ namespace randomx { } void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, false); emit(REX_MOV_RR64); @@ -551,7 +550,7 @@ namespace randomx { } void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -561,7 +560,7 @@ namespace randomx { } void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, false); emit(REX_MOV_RR64); @@ -582,7 +581,7 @@ namespace randomx { void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { uint64_t divisor = instr.getImm32(); if (!isPowerOf2(divisor)) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; emit(MOV_RAX_I); emit64(randomx_reciprocal_fast(divisor)); emit(REX_IMUL_RM); @@ -591,13 +590,13 @@ namespace randomx { } void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; emit(REX_NEG); emitByte(0xd8 + instr.dst); } void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_XOR_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -610,7 +609,7 @@ namespace randomx { } void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_XOR_RM); @@ -625,7 +624,7 @@ namespace randomx { } void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_MOV_RR); emitByte(0xc8 + instr.src); @@ -640,7 +639,7 @@ namespace randomx { } void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { - registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_MOV_RR); emitByte(0xc8 + instr.src); @@ -656,8 +655,8 @@ namespace randomx { void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { - registerUsage[instr.dst].lastUsed = i; - registerUsage[instr.src].lastUsed = i; + registerUsage[instr.dst] = i; + registerUsage[instr.src] = i; emit(REX_XCHG); emitByte(0xc0 + instr.src + 8 * instr.dst); } @@ -739,9 +738,8 @@ namespace randomx { } void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { - int reg = getConditionRegister(registerUsage); - int target = registerUsage[reg].lastUsed + 1; - registerUsage[reg].count++; + int reg = instr.dst; + int target = registerUsage[reg] + 1; emit(REX_ADD_I); emitByte(0xc0 + reg); int shift = instr.getModCond() + ConditionOffset; @@ -756,7 +754,7 @@ namespace randomx { emit32(instructionOffsets[target] - (codePos + 4)); //mark all registers as used for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j].lastUsed = i; + registerUsage[j] = i; } } diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp index 6282778..bd068c7 100644 --- a/src/jit_compiler_x86.hpp +++ b/src/jit_compiler_x86.hpp @@ -67,7 +67,7 @@ namespace randomx { private: static InstructionGeneratorX86 engine[256]; std::vector instructionOffsets; - RegisterUsage registerUsage[RegistersCount]; + int registerUsage[RegistersCount]; uint8_t* code; int32_t codePos; diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 1891afa..5b33719 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -241,7 +241,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: d908c4ce0329e2e104c08c3a76b427dd9dad3622a04b06af965cd00cd62b2d2e" << std::endl; + std::cout << "Reference result: 0063222e8c4c687cc7c91ea86f3747d8dbd53af6bdf937167736b9284e4d7dac" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 072a27d..f62b536 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -283,10 +283,9 @@ namespace randomx { template void InterpretedVm::precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { - RegisterUsage registerUsage[RegistersCount]; + int registerUsage[RegistersCount]; for (unsigned i = 0; i < RegistersCount; ++i) { - registerUsage[i].lastUsed = -1; - registerUsage[i].count = 0; + registerUsage[i] = -1; } for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { auto& instr = program(i); @@ -307,7 +306,7 @@ namespace randomx { ibc.shift = instr.getModShift(); ibc.imm = signExtend2sCompl(instr.getImm32()); } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IADD_M) { @@ -324,7 +323,7 @@ namespace randomx { ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(ISUB_R) { @@ -339,7 +338,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.isrc = &ibc.imm; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(ISUB_M) { @@ -356,7 +355,7 @@ namespace randomx { ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IMUL_R) { @@ -371,7 +370,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.isrc = &ibc.imm; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IMUL_M) { @@ -388,7 +387,7 @@ namespace randomx { ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IMULH_R) { @@ -397,7 +396,7 @@ namespace randomx { ibc.type = InstructionType::IMULH_R; ibc.idst = &r[dst]; ibc.isrc = &r[src]; - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IMULH_M) { @@ -414,7 +413,7 @@ namespace randomx { ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(ISMULH_R) { @@ -423,7 +422,7 @@ namespace randomx { ibc.type = InstructionType::ISMULH_R; ibc.idst = &r[dst]; ibc.isrc = &r[src]; - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(ISMULH_M) { @@ -440,7 +439,7 @@ namespace randomx { ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IMUL_RCP) { @@ -451,7 +450,7 @@ namespace randomx { ibc.idst = &r[dst]; ibc.imm = randomx_reciprocal(divisor); ibc.isrc = &ibc.imm; - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } else { ibc.type = InstructionType::NOP; @@ -462,7 +461,7 @@ namespace randomx { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::INEG_R; ibc.idst = &r[dst]; - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IXOR_R) { @@ -477,7 +476,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.isrc = &ibc.imm; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IXOR_M) { @@ -494,7 +493,7 @@ namespace randomx { ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IROR_R) { @@ -509,7 +508,7 @@ namespace randomx { ibc.imm = instr.getImm32(); ibc.isrc = &ibc.imm; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(IROL_R) { @@ -524,7 +523,7 @@ namespace randomx { ibc.imm = instr.getImm32(); ibc.isrc = &ibc.imm; } - registerUsage[dst].lastUsed = i; + registerUsage[dst] = i; } break; CASE_REP(ISWAP_R) { @@ -534,8 +533,8 @@ namespace randomx { ibc.idst = &r[dst]; ibc.isrc = &r[src]; ibc.type = InstructionType::ISWAP_R; - registerUsage[dst].lastUsed = i; - registerUsage[src].lastUsed = i; + registerUsage[dst] = i; + registerUsage[src] = i; } else { ibc.type = InstructionType::NOP; @@ -620,10 +619,9 @@ namespace randomx { CASE_REP(CBRANCH) { ibc.type = InstructionType::CBRANCH; //jump condition - int reg = getConditionRegister(registerUsage); + int reg = instr.dst % RegistersCount; ibc.isrc = &r[reg]; - ibc.target = registerUsage[reg].lastUsed; - registerUsage[reg].count++; + ibc.target = registerUsage[reg]; int shift = instr.getModCond() + ConditionOffset; const uint64_t conditionMask = ConditionMask << shift; ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); @@ -632,7 +630,7 @@ namespace randomx { ibc.memMask = ConditionMask << shift; //mark all registers as used for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j].lastUsed = i; + registerUsage[j] = i; } } break;