From c02ee4291d174e8a6511ba2f8839a15b3e34f257 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 11 Jan 2019 10:52:12 +0100 Subject: [PATCH] FPROUND - variable flag offset --- doc/isa-ops.md | 3 ++- doc/isa.md | 9 +++++---- src/AssemblyGeneratorX86.cpp | 4 +++- src/JitCompilerX86.cpp | 10 +++++++++- src/program.inc | 2 +- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/doc/isa-ops.md b/doc/isa-ops.md index fd5f286..5e389e3 100644 --- a/doc/isa-ops.md +++ b/doc/isa-ops.md @@ -12,6 +12,7 @@ There are 31 unique instructions divided into 3 groups: ## Integer instructions There are 22 integer instructions. They are divided into 3 classes (MATH, DIV, SHIFT) with different B operand selection rules. + |# opcodes|instruction|class|signed|A width|B width|C|C width| |-|-|-|-|-|-|-|-| |12|ADD_64|MATH|no|64|64|`A + B`|64| @@ -55,7 +56,7 @@ The shift/rotate instructions use just the bottom 6 bits of the `B` operand (`im There are 5 floating point instructions. All floating point instructions are vector instructions that operate on two packed double precision floating point values. |# opcodes|instruction|C| -|-|-|-|-| +|-|-|-| |20|FPADD|`A + B`| |20|FPSUB|`A - B`| |22|FPMUL|`A * B`| diff --git a/doc/isa.md b/doc/isa.md index 4f1cc5d..cedece9 100644 --- a/doc/isa.md +++ b/doc/isa.md @@ -9,6 +9,7 @@ The encoding of each 128-bit instruction word is following: There are 256 opcodes, which are distributed between 3 groups of instructions. There are 31 distinct operations (each operation can be encoded using multiple opcodes - for example opcodes `0x00` to `0x0d` correspond to integer addition). **Table 1: Instruction groups** + |group|# operations|# opcodes|| |---------|-----------------|----|-| |integer (IA)|22|144|56.3%| @@ -31,8 +32,8 @@ The `A.LOC.W` flag determines the address width when reading operand A from the **Table 3: Operand A read address width** -|`A.LOC.W`|address width (W) -|---------|-|-| +|`A.LOC.W`|address width (W)| +|---------|-| |0|15 bits (256 KiB)| |1-3|11 bits (16 KiB)| @@ -125,8 +126,8 @@ The `C.LOC.W` flag determines the address width when writing operand C to the sc **Table 10: Operand C write address width** -|`C.LOC.W`|address width (W) -|---------|-|-| +|`C.LOC.W`|address width (W)| +|---------|-| |0|15 bits (256 KiB)| |1-3|11 bits (16 KiB)| diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 2b8db69..1fbf2f2 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -466,7 +466,9 @@ namespace RandomX { void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) { genar(instr, i); asmCode << "\tmov rcx, rax" << std::endl; - asmCode << "\tshl eax, 13" << std::endl; + int rotate = (13 - (instr.imm8 & 63)) & 63; + if (rotate != 0) + asmCode << "\trol rax, " << rotate << std::endl; asmCode << "\tand eax, 24576" << std::endl; asmCode << "\tor eax, 40896" << std::endl; asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl; diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 7018b97..1f09cd9 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -574,7 +574,15 @@ namespace RandomX { void JitCompilerX86::h_FPROUND(Instruction& instr, int i) { genar(instr); - emit(0x00250de0c1c88b48); //mov rcx,rax; shl eax,0xd + emitByte(0x48); + emit(uint16_t(0xc88b)); //mov rcx,rax + int rotate = (13 - (instr.imm8 & 63)) & 63; + if (rotate != 0) { + emitByte(0x48); + emit(uint16_t(0xc0c1)); //rol rax + emitByte(rotate); + } + emit(uint16_t(0x0025)); emit(0x00009fc00d000060); //and eax,0x6000; or eax,0x9fc0 emit(0x2454ae0ff8244489); //ldmxcsr DWORD PTR [rsp-0x8] emitByte(0xf8); diff --git a/src/program.inc b/src/program.inc index dd8cb36..66b9147 100644 --- a/src/program.inc +++ b/src/program.inc @@ -8859,7 +8859,7 @@ rx_body_509: and ecx, 2047 mov rax, qword ptr [rsi+rcx*8] mov rcx, rax - shl eax, 13 + rol rax, 34 and eax, 24576 or eax, 40896 mov dword ptr [rsp - 8], eax