Updated JIT compiler and assembly generator for new int -> float conversion

feature/branches 0.3.2-beta
tevador 5 years ago
parent 790b382eda
commit d9bc6cfeda

@ -40,6 +40,8 @@ For floating point instructions, the destination can be a group F or group E reg
Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`.
Memory operands for group E registers are loaded as described above, then their sign bit is cleared and their exponent value is set to `0x30F` (corresponds to 2<sup>-240</sup>).
|frequency|instruction|dst|src|operation|
|-|-|-|-|-|
|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
@ -58,8 +60,7 @@ This instruction negates the number and multiplies it by <code>2<sup>x</sup></co
The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`.
#### Denormal and NaN values
Due to restrictions on the values of the floating point registers, no operation results in `NaN`.
`FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number.
Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number.
#### Rounding
All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register:

@ -385,9 +385,9 @@ namespace RandomX {
instr.dst %= 4;
genAddressReg(instr);
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
asmCode << "\tandps xmm12, xmm14" << std::endl;
asmCode << "\tandps xmm12, xmm13" << std::endl;
asmCode << "\torps xmm12, xmm14" << std::endl;
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
traceflt(instr);
}

@ -73,9 +73,9 @@ namespace RandomX {
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> DBL_MIN
; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff
; xmm15 -> sign mask 0x80000000000000008000000000000000
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/
@ -165,7 +165,7 @@ namespace RandomX {
static const uint8_t JMP = 0xe9;
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0f, 0x54, 0xe6 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
@ -556,8 +556,6 @@ namespace RandomX {
emit(REX_ANDPS_XMM12);
emit(REX_DIVPD);
emitByte(0xe4 + 8 * instr.dst);
emit(REX_MAXPD);
emitByte(0xe5 + 8 * instr.dst);
}
void JitCompilerX86::h_FSQRT_R(Instruction& instr) {

@ -22,7 +22,11 @@
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14
andps xmm5, xmm14
andps xmm6, xmm14
andps xmm7, xmm14
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14

@ -8,10 +8,10 @@
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
mulpd xmm3, xmm7
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2

@ -32,8 +32,8 @@
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr minDbl[rip]
movapd xmm14, xmmword ptr absMask[rip]
movapd xmm15, xmmword ptr signMask[rip]
movapd xmm13, xmmword ptr mantissaMask[rip]
movapd xmm14, xmmword ptr exp240[rip]
movapd xmm15, xmmword ptr scaleMask[rip]
jmp DECL(randomx_program_loop_begin)

@ -45,8 +45,8 @@
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl]
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
jmp randomx_program_loop_begin

@ -1,6 +1,6 @@
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
mantissaMask:
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
exp240:
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129

@ -52,9 +52,9 @@ executeProgram PROC
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> DBL_MIN
; xmm14 -> absolute value mask
; xmm15 -> sign mask
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
; store callee-saved registers
push rbx
@ -103,18 +103,18 @@ executeProgram PROC
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl]
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
jmp program_begin
ALIGN 64
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
mantissaMask:
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
exp240:
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
ALIGN 64
@ -145,10 +145,14 @@ program_begin:
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14
andps xmm5, xmm14
andps xmm6, xmm14
andps xmm7, xmm14
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
;# 256 instructions
include program.inc
@ -181,10 +185,10 @@ IF 1
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
mulpd xmm3, xmm7
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2

@ -341,7 +341,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if(programCount == 1000)
std::cout << "Reference result: d3ae5a9365196ed48bb98ebfc3316498e29443ea7f056ecbd272f749c6af7730" << std::endl;
std::cout << "Reference result: e1b4144293ff9ab5aa4c98f2389bb18950d8c3fd874891ac64628e028a286006" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
}

Loading…
Cancel
Save