Compare commits

...

10 Commits

Author SHA1 Message Date
wowario 27b099b6dd
RandomWOW parameters
6 months ago
tevador 102f8acf90 bump benchmark version to 1.2.1
7 months ago
tevador 73d70fe82a
Merge pull request #286 from tevador/pr-zext
7 months ago
tevador e895d451a3 Avoid `zext.b`
7 months ago
tevador e5b9c02417
Merge pull request #285 from tevador/pr-120
7 months ago
tevador 06a7cc1c33 Update README and benchmark version
7 months ago
SChernykh 7db92b73f7
Merge pull request #284 from SChernykh/opt-rcp
7 months ago
SChernykh 5c49ab12a0 Optimized randomx_reciprocal
7 months ago
SChernykh 5fc512e71c
Merge pull request #282 from SChernykh/fix-ub
7 months ago
SChernykh 8f91d31b8b Fixed UB in ARM64 JIT compiler
7 months ago

@ -37,7 +37,7 @@ RandomX is written in C++11 and builds a static library with a C API provided by
### Linux
Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
Build dependencies: `cmake` (minimum 3.5) and `gcc` (minimum version 4.8, but version 7+ is recommended).
To build optimized binaries for your machine, run:
```
@ -82,7 +82,7 @@ Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)|
Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)|
Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)|
Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower.
Note that RandomX currently includes a JIT compiler for x86-64, ARM64 and RISCV64. Other architectures have to use the portable interpreter, which is much slower.
### GPU performance
@ -129,6 +129,7 @@ The reference implementation has been validated on the following platforms:
* ARMv7+VFPv3 (32-bit, little-endian)
* ARMv8 (64-bit, little-endian)
* PPC64 (64-bit, big-endian)
* RISCV64 (64-bit, little-endian)
### Can FPGAs mine RandomX?

@ -175,10 +175,10 @@ template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
#define AES_GEN_4R_KEY0 0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d
#define AES_GEN_4R_KEY1 0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450
#define AES_GEN_4R_KEY2 0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4
#define AES_GEN_4R_KEY3 0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
@ -197,10 +197,6 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
@ -210,23 +206,23 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key0);
state2 = aesdec<softAes>(state2, key4);
state3 = aesenc<softAes>(state3, key4);
state2 = aesdec<softAes>(state2, key0);
state3 = aesenc<softAes>(state3, key0);
state0 = aesdec<softAes>(state0, key1);
state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key5);
state3 = aesenc<softAes>(state3, key5);
state2 = aesdec<softAes>(state2, key1);
state3 = aesenc<softAes>(state3, key1);
state0 = aesdec<softAes>(state0, key2);
state1 = aesenc<softAes>(state1, key2);
state2 = aesdec<softAes>(state2, key6);
state3 = aesenc<softAes>(state3, key6);
state2 = aesdec<softAes>(state2, key2);
state3 = aesenc<softAes>(state3, key2);
state0 = aesdec<softAes>(state0, key3);
state1 = aesenc<softAes>(state1, key3);
state2 = aesdec<softAes>(state2, key7);
state3 = aesenc<softAes>(state3, key7);
state2 = aesdec<softAes>(state2, key3);
state3 = aesenc<softAes>(state3, key3);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);

@ -15,7 +15,7 @@ RANDOMX_SCRATCHPAD_L2 EQU 262144t
RANDOMX_SCRATCHPAD_L1 EQU 16384t
RANDOMX_JUMP_BITS EQU 8t
RANDOMX_JUMP_OFFSET EQU 8t
RANDOMX_FREQ_IADD_RS EQU 16t
RANDOMX_FREQ_IADD_RS EQU 25t
RANDOMX_FREQ_IADD_M EQU 7t
RANDOMX_FREQ_ISUB_R EQU 16t
RANDOMX_FREQ_ISUB_M EQU 7t
@ -29,19 +29,19 @@ RANDOMX_FREQ_IMUL_RCP EQU 8t
RANDOMX_FREQ_INEG_R EQU 2t
RANDOMX_FREQ_IXOR_R EQU 15t
RANDOMX_FREQ_IXOR_M EQU 5t
RANDOMX_FREQ_IROR_R EQU 8t
RANDOMX_FREQ_IROL_R EQU 2t
RANDOMX_FREQ_IROR_R EQU 10t
RANDOMX_FREQ_IROL_R EQU 0t
RANDOMX_FREQ_ISWAP_R EQU 4t
RANDOMX_FREQ_FSWAP_R EQU 4t
RANDOMX_FREQ_FADD_R EQU 16t
RANDOMX_FREQ_FSWAP_R EQU 8t
RANDOMX_FREQ_FADD_R EQU 20t
RANDOMX_FREQ_FADD_M EQU 5t
RANDOMX_FREQ_FSUB_R EQU 16t
RANDOMX_FREQ_FSUB_R EQU 20t
RANDOMX_FREQ_FSUB_M EQU 5t
RANDOMX_FREQ_FSCAL_R EQU 6t
RANDOMX_FREQ_FMUL_R EQU 32t
RANDOMX_FREQ_FMUL_R EQU 20t
RANDOMX_FREQ_FDIV_M EQU 4t
RANDOMX_FREQ_FSQRT_R EQU 6t
RANDOMX_FREQ_CBRANCH EQU 25t
RANDOMX_FREQ_CBRANCH EQU 16t
RANDOMX_FREQ_CFROUND EQU 1t
RANDOMX_FREQ_ISTORE EQU 16t
RANDOMX_FREQ_NOP EQU 0t

@ -445,7 +445,7 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;

@ -243,7 +243,7 @@ namespace randomx {
}
if (opcode < ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;

@ -67,7 +67,7 @@ namespace randomx {
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, Wownero Inc., a Monero Enterprise Alliance partner company
All rights reserved.
@ -38,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_ARGON_LANES 1
//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomX\x03"
#define RANDOMX_ARGON_SALT "RandomWOW\x01"
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
@ -56,16 +57,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_PROGRAM_SIZE 256
//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 2048
#define RANDOMX_PROGRAM_ITERATIONS 1024
//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 8
#define RANDOMX_PROGRAM_COUNT 16
//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_L3 1048576
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144
#define RANDOMX_SCRATCHPAD_L2 131072
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384
@ -82,7 +83,7 @@ Total sum of frequencies must be 256
*/
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_RS 25
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
@ -96,23 +97,23 @@ Total sum of frequencies must be 256
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_IROR_R 10
#define RANDOMX_FREQ_IROL_R 0
#define RANDOMX_FREQ_ISWAP_R 4
//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FSWAP_R 8
#define RANDOMX_FREQ_FADD_R 20
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_R 20
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FMUL_R 20
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CBRANCH 16
#define RANDOMX_FREQ_CFROUND 1
//Store instruction

@ -686,7 +686,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
{
const uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (isZeroOrPowerOf2(divisor))
return;
@ -695,21 +695,11 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63;
const uint64_t q = N / divisor;
const uint64_t r = N % divisor;
#ifdef __GNUC__
const uint64_t shift = 64 - __builtin_clzll(divisor);
#else
uint64_t shift = 32;
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
if (literal_id < 12)
{

@ -81,7 +81,7 @@ namespace randomx {
static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos)
{
*(uint64_t*)(code + codePos) = val;
memcpy(code + codePos, &val, sizeof(val));
codePos += sizeof(val);
}

@ -776,7 +776,7 @@ namespace randomx {
}
static void v1_IMUL_RCP(HANDLER_ARGS) {
uint64_t divisor = isn.getImm32();
const uint32_t divisor = isn.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
state.registerUsage[isn.dst] = i;
if (state.rcpCount < 4) {

@ -745,7 +745,7 @@ softaes_enc:
#endif
/* byte 0 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
addi x12, x13, -2048
#ifdef __riscv_zba
@ -757,7 +757,7 @@ softaes_enc:
lwu x14, -2048(x14)
/* byte 1 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -769,7 +769,7 @@ softaes_enc:
xor x8, x8, x14
/* byte 2 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -781,7 +781,7 @@ softaes_enc:
xor x11, x11, x15
/* byte 3 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -793,7 +793,7 @@ softaes_enc:
xor x10, x10, x14
/* byte 4 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -805,7 +805,7 @@ softaes_enc:
xor x9, x9, x15
/* byte 5 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -817,7 +817,7 @@ softaes_enc:
xor x9, x9, x14
/* byte 6 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -829,7 +829,7 @@ softaes_enc:
xor x8, x8, x15
/* byte 7 */
zext.b x15, x30
andi x15, x30, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else
@ -840,7 +840,7 @@ softaes_enc:
xor x11, x11, x14
/* byte 8 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -852,7 +852,7 @@ softaes_enc:
xor x10, x10, x15
/* byte 9 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -864,7 +864,7 @@ softaes_enc:
xor x10, x10, x14
/* byte 10 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -876,7 +876,7 @@ softaes_enc:
xor x9, x9, x15
/* byte 11 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -888,7 +888,7 @@ softaes_enc:
xor x8, x8, x14
/* byte 12 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -900,7 +900,7 @@ softaes_enc:
xor x11, x11, x15
/* byte 13 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -912,7 +912,7 @@ softaes_enc:
xor x11, x11, x14
/* byte 14 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -924,7 +924,7 @@ softaes_enc:
xor x10, x10, x15
/* byte 15 */
zext.b x15, x31
andi x15, x31, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else
@ -960,7 +960,7 @@ softaes_dec:
#endif
/* byte 0 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
addi x12, x13, -2048
#ifdef __riscv_zba
@ -972,7 +972,7 @@ softaes_dec:
lwu x14, -2048(x14)
/* byte 1 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -984,7 +984,7 @@ softaes_dec:
xor x8, x8, x14
/* byte 2 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -996,7 +996,7 @@ softaes_dec:
xor x9, x9, x15
/* byte 3 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -1008,7 +1008,7 @@ softaes_dec:
xor x10, x10, x14
/* byte 4 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -1020,7 +1020,7 @@ softaes_dec:
xor x11, x11, x15
/* byte 5 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -1032,7 +1032,7 @@ softaes_dec:
xor x9, x9, x14
/* byte 6 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -1044,7 +1044,7 @@ softaes_dec:
xor x10, x10, x15
/* byte 7 */
zext.b x15, x30
andi x15, x30, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else
@ -1055,7 +1055,7 @@ softaes_dec:
xor x11, x11, x14
/* byte 8 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -1067,7 +1067,7 @@ softaes_dec:
xor x8, x8, x15
/* byte 9 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -1079,7 +1079,7 @@ softaes_dec:
xor x10, x10, x14
/* byte 10 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -1091,7 +1091,7 @@ softaes_dec:
xor x11, x11, x15
/* byte 11 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -1103,7 +1103,7 @@ softaes_dec:
xor x8, x8, x14
/* byte 12 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -1115,7 +1115,7 @@ softaes_dec:
xor x9, x9, x15
/* byte 13 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -1127,7 +1127,7 @@ softaes_dec:
xor x11, x11, x14
/* byte 14 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -1139,7 +1139,7 @@ softaes_dec:
xor x8, x8, x15
/* byte 15 */
zext.b x15, x31
andi x15, x31, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else

@ -618,7 +618,7 @@ namespace randomx {
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I);

@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ret
*/
uint64_t randomx_reciprocal(uint64_t divisor) {
uint64_t randomx_reciprocal(uint32_t divisor) {
assert(divisor != 0);
const uint64_t p2exp63 = 1ULL << 63;
const uint64_t q = p2exp63 / divisor;
const uint64_t r = p2exp63 % divisor;
#ifdef __GNUC__
const uint32_t shift = 64 - __builtin_clzll(divisor);
#else
uint32_t shift = 32;
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
unsigned bsr = 0; //highest set bit in divisor
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
bsr++;
for (unsigned shift = 0; shift < bsr; shift++) {
if (remainder >= divisor - remainder) {
quotient = quotient * 2 + 1;
remainder = remainder * 2 - divisor;
}
else {
quotient = quotient * 2;
remainder = remainder * 2;
}
}
return quotient;
return (q << shift) + ((r << shift) / divisor);
}
#if !RANDOMX_HAVE_FAST_RECIPROCAL
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
uint64_t randomx_reciprocal_fast(uint32_t divisor) {
return randomx_reciprocal(divisor);
}

@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" {
#endif
uint64_t randomx_reciprocal(uint64_t);
uint64_t randomx_reciprocal_fast(uint64_t);
uint64_t randomx_reciprocal(uint32_t);
uint64_t randomx_reciprocal_fast(uint32_t);
#if defined(__cplusplus)
}

@ -180,7 +180,7 @@ int main(int argc, char** argv) {
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.12" << std::endl;
std::cout << "RandomX benchmark v1.2.1" << std::endl;
if (help) {
printUsage(argv[0]);

@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
}
if (opcode < randomx::ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!randomx::isZeroOrPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;

Loading…
Cancel
Save