diff --git a/src/asm/program_prologue_linux.inc b/src/asm/program_prologue_linux.inc index ffde152..033584a 100644 --- a/src/asm/program_prologue_linux.inc +++ b/src/asm/program_prologue_linux.inc @@ -15,6 +15,7 @@ mov rsi, rdx ;# uint8_t* scratchpad mov rax, rbp + ror rbp, 32 ;# zero integer registers xor r8, r8 diff --git a/src/asm/program_prologue_win64.inc b/src/asm/program_prologue_win64.inc index 590a98d..10f21d3 100644 --- a/src/asm/program_prologue_win64.inc +++ b/src/asm/program_prologue_win64.inc @@ -28,6 +28,7 @@ mov rbx, r9 ;# loop counter mov rax, rbp + ror rbp, 32 ;# zero integer registers xor r8, r8 diff --git a/src/asm/program_read_dataset.inc b/src/asm/program_read_dataset.inc index b81d0c3..9c61092 100644 --- a/src/asm/program_read_dataset.inc +++ b/src/asm/program_read_dataset.inc @@ -1,17 +1,16 @@ + mov ecx, ebp ;# ecx = ma + and ecx, RANDOMX_DATASET_BASE_MASK + xor r8, qword ptr [rdi+rcx] + ror rbp, 32 ;# swap "ma" and "mx" xor rbp, rax ;# modify "mx" mov edx, ebp ;# edx = mx and edx, RANDOMX_DATASET_BASE_MASK prefetchnta byte ptr [rdi+rdx] - ror rbp, 32 ;# swap "ma" and "mx" - mov edx, ebp ;# edx = ma - and edx, RANDOMX_DATASET_BASE_MASK - lea rcx, [rdi+rdx] ;# dataset cache line - xor r8, qword ptr [rcx+0] - xor r9, qword ptr [rcx+8] - xor r10, qword ptr [rcx+16] - xor r11, qword ptr [rcx+24] - xor r12, qword ptr [rcx+32] - xor r13, qword ptr [rcx+40] - xor r14, qword ptr [rcx+48] - xor r15, qword ptr [rcx+56] + xor r9, qword ptr [rdi+rcx+8] + xor r10, qword ptr [rdi+rcx+16] + xor r11, qword ptr [rdi+rcx+24] + xor r12, qword ptr [rdi+rcx+32] + xor r13, qword ptr [rdi+rcx+40] + xor r14, qword ptr [rdi+rcx+48] + xor r15, qword ptr [rdi+rcx+56] \ No newline at end of file diff --git a/src/asm/program_read_dataset_sshash_init.inc b/src/asm/program_read_dataset_sshash_init.inc index 6fe9525..9491f3d 100644 --- a/src/asm/program_read_dataset_sshash_init.inc +++ b/src/asm/program_read_dataset_sshash_init.inc @@ -8,10 +8,10 @@ mov qword ptr [rsp+16], r13 mov qword ptr [rsp+8], r14 mov qword ptr [rsp+0], r15 - xor rbp, rax ;# modify "mx" ror rbp, 32 ;# swap "ma" and "mx" - mov ebx, ebp ;# ecx = ma - and ebx, RANDOMX_DATASET_BASE_MASK - shr ebx, 6 ;# ebx = Dataset block number + xor rbp, rax ;# modify "mx" + mov rbx, rbp ;# ebx = ma + shr rbx, 38 + and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number ;# add ebx, datasetOffset / 64 ;# call 32768 \ No newline at end of file diff --git a/src/jit_compiler_a64_static.S b/src/jit_compiler_a64_static.S index 598eca2..7fe6599 100644 --- a/src/jit_compiler_a64_static.S +++ b/src/jit_compiler_a64_static.S @@ -307,6 +307,9 @@ literal_v14: .fill 2,8,0 literal_v15: .fill 2,8,0 DECL(randomx_program_aarch64_vm_instructions_end): + # Calculate dataset pointer for dataset read + # Do it here to break false dependency from readReg2 and readReg3 (see next line) + lsr x10, x9, 32 # mx ^= r[readReg2] ^ r[readReg3]; eor x9, x9, x18 @@ -324,8 +327,6 @@ DECL(randomx_program_aarch64_cacheline_align_mask1): # mx <-> ma ror x9, x9, 32 - # Calculate dataset pointer for dataset read - mov w10, w9 DECL(randomx_program_aarch64_cacheline_align_mask2): # Actual mask will be inserted by JIT compiler and x10, x10, 1