parent
360c8ed913
commit
41b51a4858
@ -1,155 +0,0 @@
|
||||
prefetcht0 byte ptr [rbp]
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
initBlock_loop:
|
||||
;# c0
|
||||
mov rbx, r8
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r8+r9]
|
||||
call squareHash
|
||||
mov r9, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c1
|
||||
mov rbx, r9
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r9+r10]
|
||||
call squareHash
|
||||
mov r10, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c2
|
||||
mov rbx, r10
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r10+r11]
|
||||
call squareHash
|
||||
mov r11, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c3
|
||||
mov rbx, r11
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r11+r12]
|
||||
call squareHash
|
||||
mov r12, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c4
|
||||
mov rbx, r12
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r12+r13]
|
||||
call squareHash
|
||||
mov r13, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c5
|
||||
mov rbx, r13
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r13+r14]
|
||||
call squareHash
|
||||
mov r14, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c6
|
||||
mov rbx, r14
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r14+r15]
|
||||
call squareHash
|
||||
mov r15, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c7
|
||||
mov rbx, r15
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r15+r8]
|
||||
call squareHash
|
||||
mov r8, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
sub rsi, 1
|
||||
jnz initBlock_loop
|
||||
mov qword ptr [rbp+0], r8
|
||||
mov qword ptr [rbp+8], r9
|
||||
mov qword ptr [rbp+16], r10
|
||||
mov qword ptr [rbp+24], r11
|
||||
mov qword ptr [rbp+32], r12
|
||||
mov qword ptr [rbp+40], r13
|
||||
mov qword ptr [rbp+48], r14
|
||||
mov qword ptr [rbp+56], r15
|
@ -1,21 +0,0 @@
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
||||
movapd xmm13, xmmword ptr [minDbl]
|
||||
movapd xmm14, xmmword ptr [absMask]
|
||||
movapd xmm15, xmmword ptr [signMask]
|
@ -1,171 +0,0 @@
|
||||
;# rdi -> Cache pointer
|
||||
;# rcx -> Dataset block number
|
||||
;# rax, rbx, rcx, rdx -> scratch registers
|
||||
sub rsp, 72
|
||||
mov qword ptr [rsp+64], rbx
|
||||
mov qword ptr [rsp+56], r8
|
||||
mov qword ptr [rsp+48], r9
|
||||
mov qword ptr [rsp+40], r10
|
||||
mov qword ptr [rsp+32], r11
|
||||
mov qword ptr [rsp+24], r12
|
||||
mov qword ptr [rsp+16], r13
|
||||
mov qword ptr [rsp+8], r14
|
||||
mov qword ptr [rsp+0], r15
|
||||
mov r8, rcx
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
;# iteration 0
|
||||
;# c0
|
||||
mov rbx, r8
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r8+r9]
|
||||
call squareHashSub
|
||||
mov r9, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c1
|
||||
mov rbx, r9
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r9+r10]
|
||||
call squareHashSub
|
||||
mov r10, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c2
|
||||
mov rbx, r10
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r10+r11]
|
||||
call squareHashSub
|
||||
mov r11, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c3
|
||||
mov rbx, r11
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r11+r12]
|
||||
call squareHashSub
|
||||
mov r12, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c4
|
||||
mov rbx, r12
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r12+r13]
|
||||
call squareHashSub
|
||||
mov r13, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c5
|
||||
mov rbx, r13
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r13+r14]
|
||||
call squareHashSub
|
||||
mov r14, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c6
|
||||
mov rbx, r14
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r14+r15]
|
||||
call squareHashSub
|
||||
mov r15, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c7
|
||||
mov rbx, r15
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r15+r8]
|
||||
call squareHashSub
|
||||
mov r8, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# --------------------------
|
||||
mov rbx, qword ptr [rsp+64]
|
||||
xor r8, qword ptr [rsp+56]
|
||||
xor r9, qword ptr [rsp+48]
|
||||
xor r10, qword ptr [rsp+40]
|
||||
xor r11, qword ptr [rsp+32]
|
||||
xor r12, qword ptr [rsp+24]
|
||||
xor r13, qword ptr [rsp+16]
|
||||
xor r14, qword ptr [rsp+8]
|
||||
xor r15, qword ptr [rsp+0]
|
||||
add rsp, 72
|
||||
;# xor eax, eax
|
||||
ret
|
@ -1,87 +0,0 @@
|
||||
mov rax, 9507361525245169745
|
||||
add rax, rcx
|
||||
mul rax
|
||||
sub rax, rdx ;# 1
|
||||
mul rax
|
||||
sub rax, rdx ;# 2
|
||||
mul rax
|
||||
sub rax, rdx ;# 3
|
||||
mul rax
|
||||
sub rax, rdx ;# 4
|
||||
mul rax
|
||||
sub rax, rdx ;# 5
|
||||
mul rax
|
||||
sub rax, rdx ;# 6
|
||||
mul rax
|
||||
sub rax, rdx ;# 7
|
||||
mul rax
|
||||
sub rax, rdx ;# 8
|
||||
mul rax
|
||||
sub rax, rdx ;# 9
|
||||
mul rax
|
||||
sub rax, rdx ;# 10
|
||||
mul rax
|
||||
sub rax, rdx ;# 11
|
||||
mul rax
|
||||
sub rax, rdx ;# 12
|
||||
mul rax
|
||||
sub rax, rdx ;# 13
|
||||
mul rax
|
||||
sub rax, rdx ;# 14
|
||||
mul rax
|
||||
sub rax, rdx ;# 15
|
||||
mul rax
|
||||
sub rax, rdx ;# 16
|
||||
mul rax
|
||||
sub rax, rdx ;# 17
|
||||
mul rax
|
||||
sub rax, rdx ;# 18
|
||||
mul rax
|
||||
sub rax, rdx ;# 19
|
||||
mul rax
|
||||
sub rax, rdx ;# 20
|
||||
mul rax
|
||||
sub rax, rdx ;# 21
|
||||
mul rax
|
||||
sub rax, rdx ;# 22
|
||||
mul rax
|
||||
sub rax, rdx ;# 23
|
||||
mul rax
|
||||
sub rax, rdx ;# 24
|
||||
mul rax
|
||||
sub rax, rdx ;# 25
|
||||
mul rax
|
||||
sub rax, rdx ;# 26
|
||||
mul rax
|
||||
sub rax, rdx ;# 27
|
||||
mul rax
|
||||
sub rax, rdx ;# 28
|
||||
mul rax
|
||||
sub rax, rdx ;# 29
|
||||
mul rax
|
||||
sub rax, rdx ;# 30
|
||||
mul rax
|
||||
sub rax, rdx ;# 31
|
||||
mul rax
|
||||
sub rax, rdx ;# 32
|
||||
mul rax
|
||||
sub rax, rdx ;# 33
|
||||
mul rax
|
||||
sub rax, rdx ;# 34
|
||||
mul rax
|
||||
sub rax, rdx ;# 35
|
||||
mul rax
|
||||
sub rax, rdx ;# 36
|
||||
mul rax
|
||||
sub rax, rdx ;# 37
|
||||
mul rax
|
||||
sub rax, rdx ;# 38
|
||||
mul rax
|
||||
sub rax, rdx ;# 39
|
||||
mul rax
|
||||
sub rax, rdx ;# 40
|
||||
mul rax
|
||||
sub rax, rdx ;# 41
|
||||
mul rax
|
||||
sub rax, rdx ;# 42
|
||||
ret
|
@ -1,41 +0,0 @@
|
||||
.intel_syntax noprefix
|
||||
#if defined(__APPLE__)
|
||||
.text
|
||||
#else
|
||||
.section .text
|
||||
#endif
|
||||
#if defined(__WIN32__) || defined(__APPLE__)
|
||||
#define DECL(x) _##x
|
||||
#else
|
||||
#define DECL(x) x
|
||||
#endif
|
||||
|
||||
#include "configuration.h"
|
||||
|
||||
.global DECL(squareHash)
|
||||
.global DECL(initBlock)
|
||||
|
||||
DECL(squareHash):
|
||||
mov rcx, rdi
|
||||
#include "asm/squareHash.inc"
|
||||
|
||||
DECL(initBlock):
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, qword ptr [rdi]
|
||||
mov rbp, rsi
|
||||
mov r8, rdx
|
||||
mov rsi, rcx
|
||||
#define squareHash DECL(squareHash)
|
||||
#include "asm/initBlock.inc"
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
@ -1,43 +0,0 @@
|
||||
IFDEF RAX
|
||||
|
||||
PUBLIC squareHash
|
||||
PUBLIC initBlock
|
||||
|
||||
.code
|
||||
|
||||
squareHash PROC
|
||||
include asm/squareHash.inc
|
||||
squareHash ENDP
|
||||
|
||||
; rcx = cache
|
||||
; rdx = out
|
||||
; r8 = blockNumber
|
||||
; r9 = iterations
|
||||
initBlock PROC
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, qword ptr [rcx]
|
||||
mov rbp, rdx
|
||||
; r8 = blockNumber
|
||||
mov rsi, r9
|
||||
include asm/initBlock.inc
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
initBlock ENDP
|
||||
|
||||
ENDIF
|
||||
|
||||
END
|
@ -1,76 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
Based on the original idea by SChernykh:
|
||||
https://github.com/SChernykh/xmr-stak-cpu/issues/1#issuecomment-414336613
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(_M_X64) && !defined(__x86_64__)
|
||||
|
||||
typedef struct {
|
||||
uint64_t lo;
|
||||
uint64_t hi;
|
||||
} uint128_t;
|
||||
|
||||
#define LO(x) ((x)&0xffffffff)
|
||||
#define HI(x) ((x)>>32)
|
||||
static inline uint128_t square128(uint64_t x) {
|
||||
uint64_t xh = HI(x), xl = LO(x);
|
||||
uint64_t xll = xl * xl;
|
||||
uint64_t xlh = xl * xh;
|
||||
uint64_t xhh = xh * xh;
|
||||
uint64_t m1 = 2 * LO(xlh) + HI(xll);
|
||||
uint64_t m2 = 2 * HI(xlh) + LO(xhh) + HI(m1);
|
||||
uint64_t m3 = HI(xhh) + HI(m2);
|
||||
|
||||
uint128_t x2;
|
||||
|
||||
x2.lo = (m1 << 32) + LO(xll);
|
||||
x2.hi = (m3 << 32) + LO(m2);
|
||||
|
||||
return x2;
|
||||
}
|
||||
#undef LO
|
||||
#undef HI
|
||||
|
||||
inline uint64_t squareHash(uint64_t x) {
|
||||
x += 9507361525245169745ULL;
|
||||
for (int i = 0; i < 42; ++i) {
|
||||
uint128_t x2 = square128(x);
|
||||
x = x2.lo - x2.hi;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint64_t squareHash(uint64_t);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Reference in new issue