From a09bee8d60bc078b6dcd119dc133550a98aa8c15 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 28 Dec 2018 14:18:41 +0100 Subject: [PATCH] js -> jz to enable macro-op fusion on Intel CPUs (~1% speed-up) --- src/AssemblyGeneratorX86.cpp | 2 +- src/JitCompilerX86.cpp | 10 +++++----- src/executeProgram-win64.asm | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 1922b0e..0d61f43 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -49,7 +49,7 @@ namespace RandomX { void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) { asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl; asmCode << "\tdec edi" << std::endl; - asmCode << "\tjs rx_finish" << std::endl; + asmCode << "\tjz rx_finish" << std::endl; auto generator = engine[instr.opcode]; (this->*generator)(instr, i); } diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 644fd9b..fe10229 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -81,10 +81,10 @@ namespace RandomX { */ - constexpr uint8_t ic3 = (InstructionCount >> 24); - constexpr uint8_t ic2 = (InstructionCount >> 16); - constexpr uint8_t ic1 = (InstructionCount >> 8); - constexpr uint8_t ic0 = (InstructionCount >> 0); + constexpr uint8_t ic3 = ((InstructionCount + 1) >> 24); + constexpr uint8_t ic2 = ((InstructionCount + 1) >> 16); + constexpr uint8_t ic1 = ((InstructionCount + 1) >> 8); + constexpr uint8_t ic0 = ((InstructionCount + 1) >> 0); const uint8_t prologue[] = { 0x53, //push rbx @@ -245,7 +245,7 @@ namespace RandomX { void JitCompilerX86::generateCode(Instruction& instr, int i) { instructionOffsets.push_back(codePos); - emit(0x880fcfff); //dec edx; js + emit(0x840fcfff); //dec edx; jz emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative) gena(instr); auto generator = engine[instr.opcode]; diff --git a/src/executeProgram-win64.asm b/src/executeProgram-win64.asm index 0d92b60..e5ff87d 100644 --- a/src/executeProgram-win64.asm +++ b/src/executeProgram-win64.asm @@ -84,7 +84,7 @@ executeProgram PROC push r9 mov rbp, rsp ; beginning of VM stack - mov rdi, 1048576 ; number of VM instructions to execute + mov rdi, 1048577 ; number of VM instructions to execute + 1 ; load VM register values mov r8, qword ptr [rcx+0]