Compare commits

...

14 Commits

Author SHA1 Message Date
wowario 62c6ea1176 RandomWOW parameters
3 years ago
tevador fe4324e8c0
Merge pull request #213 from hyc/m1typo
3 years ago
SChernykh 3c8c7ee097
Optimized dataset read (#211)
3 years ago
Howard Chu 1b3db29fb8 Fix typo for M1 Mac build
3 years ago
tevador c12097400b
Merge pull request #212 from hyc/m1prot
3 years ago
tevador 0db4c57823
Merge pull request #198 from tevador/pr-applem1
3 years ago
tevador 4aae0d834d
Merge pull request #202 from SChernykh/fix-crash
3 years ago
Howard Chu d9b7e8c25d Faster W^X policy for apple silicon macs
3 years ago
SChernykh 2ba7df2844 Fix illegal instruction crash on some ARM systems
3 years ago
tevador a44d07c89f
Merge pull request #201 from cryptonote-social/removecode
3 years ago
cryptonote-social 529b933fcf remove unnecessary first-load initialization code
3 years ago
tevador a38ce601fd Apple silicon: force W^X, enable hardware AES
4 years ago
tevador 5bfd021e8f
Merge pull request #183 from selsta/map_jit
4 years ago
selsta 7c172f746f
virtual_memory: add MAP_JIT on macOS
4 years ago

@ -153,7 +153,7 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
src/jit_compiler_a64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# not sure if this check is needed
include(CheckIncludeFile)

@ -175,10 +175,10 @@ template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
#define AES_GEN_4R_KEY0 0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d
#define AES_GEN_4R_KEY1 0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450
#define AES_GEN_4R_KEY2 0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4
#define AES_GEN_4R_KEY3 0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
@ -197,10 +197,6 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
@ -210,23 +206,23 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key0);
state2 = aesdec<softAes>(state2, key4);
state3 = aesenc<softAes>(state3, key4);
state2 = aesdec<softAes>(state2, key0);
state3 = aesenc<softAes>(state3, key0);
state0 = aesdec<softAes>(state0, key1);
state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key5);
state3 = aesenc<softAes>(state3, key5);
state2 = aesdec<softAes>(state2, key1);
state3 = aesenc<softAes>(state3, key1);
state0 = aesdec<softAes>(state0, key2);
state1 = aesenc<softAes>(state1, key2);
state2 = aesdec<softAes>(state2, key6);
state3 = aesenc<softAes>(state3, key6);
state2 = aesdec<softAes>(state2, key2);
state3 = aesenc<softAes>(state3, key2);
state0 = aesdec<softAes>(state0, key3);
state1 = aesenc<softAes>(state1, key3);
state2 = aesdec<softAes>(state2, key7);
state3 = aesenc<softAes>(state3, key7);
state2 = aesdec<softAes>(state2, key3);
state3 = aesenc<softAes>(state3, key3);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);

@ -15,7 +15,7 @@ RANDOMX_SCRATCHPAD_L2 EQU 262144t
RANDOMX_SCRATCHPAD_L1 EQU 16384t
RANDOMX_JUMP_BITS EQU 8t
RANDOMX_JUMP_OFFSET EQU 8t
RANDOMX_FREQ_IADD_RS EQU 16t
RANDOMX_FREQ_IADD_RS EQU 25t
RANDOMX_FREQ_IADD_M EQU 7t
RANDOMX_FREQ_ISUB_R EQU 16t
RANDOMX_FREQ_ISUB_M EQU 7t
@ -29,19 +29,19 @@ RANDOMX_FREQ_IMUL_RCP EQU 8t
RANDOMX_FREQ_INEG_R EQU 2t
RANDOMX_FREQ_IXOR_R EQU 15t
RANDOMX_FREQ_IXOR_M EQU 5t
RANDOMX_FREQ_IROR_R EQU 8t
RANDOMX_FREQ_IROL_R EQU 2t
RANDOMX_FREQ_IROR_R EQU 10t
RANDOMX_FREQ_IROL_R EQU 0t
RANDOMX_FREQ_ISWAP_R EQU 4t
RANDOMX_FREQ_FSWAP_R EQU 4t
RANDOMX_FREQ_FADD_R EQU 16t
RANDOMX_FREQ_FSWAP_R EQU 8t
RANDOMX_FREQ_FADD_R EQU 20t
RANDOMX_FREQ_FADD_M EQU 5t
RANDOMX_FREQ_FSUB_R EQU 16t
RANDOMX_FREQ_FSUB_R EQU 20t
RANDOMX_FREQ_FSUB_M EQU 5t
RANDOMX_FREQ_FSCAL_R EQU 6t
RANDOMX_FREQ_FMUL_R EQU 32t
RANDOMX_FREQ_FMUL_R EQU 20t
RANDOMX_FREQ_FDIV_M EQU 4t
RANDOMX_FREQ_FSQRT_R EQU 6t
RANDOMX_FREQ_CBRANCH EQU 25t
RANDOMX_FREQ_CBRANCH EQU 16t
RANDOMX_FREQ_CFROUND EQU 1t
RANDOMX_FREQ_ISTORE EQU 16t
RANDOMX_FREQ_NOP EQU 0t

@ -15,6 +15,7 @@
mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -28,6 +28,7 @@
mov rbx, r9 ;# loop counter
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -1,17 +1,16 @@
mov ecx, ebp ;# ecx = ma
and ecx, RANDOMX_DATASET_BASE_MASK
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
and edx, RANDOMX_DATASET_BASE_MASK
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]

@ -8,10 +8,10 @@
mov qword ptr [rsp+16], r13
mov qword ptr [rsp+8], r14
mov qword ptr [rsp+0], r15
xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx"
mov ebx, ebp ;# ecx = ma
and ebx, RANDOMX_DATASET_BASE_MASK
shr ebx, 6 ;# ebx = Dataset block number
xor rbp, rax ;# modify "mx"
mov rbx, rbp ;# ebx = ma
shr rbx, 38
and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
;# add ebx, datasetOffset / 64
;# call 32768

@ -67,7 +67,7 @@ namespace randomx {
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, Wownero Inc., a Monero Enterprise Alliance partner company
All rights reserved.
@ -38,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_ARGON_LANES 1
//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomX\x03"
#define RANDOMX_ARGON_SALT "RandomWOW\x01"
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
@ -56,16 +57,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_PROGRAM_SIZE 256
//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 2048
#define RANDOMX_PROGRAM_ITERATIONS 1024
//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 8
#define RANDOMX_PROGRAM_COUNT 16
//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_L3 1048576
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144
#define RANDOMX_SCRATCHPAD_L2 131072
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384
@ -82,7 +83,7 @@ Total sum of frequencies must be 256
*/
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_RS 25
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
@ -96,23 +97,23 @@ Total sum of frequencies must be 256
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_IROR_R 10
#define RANDOMX_FREQ_IROL_R 0
#define RANDOMX_FREQ_ISWAP_R 4
//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FSWAP_R 8
#define RANDOMX_FREQ_FADD_R 20
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_R 20
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FMUL_R 20
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CBRANCH 16
#define RANDOMX_FREQ_CFROUND 1
//Store instruction

@ -62,9 +62,13 @@ namespace randomx {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__) && defined(HWCAP_AES)
#elif defined(__aarch64__)
#if defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#elif defined(__APPLE__)
aes_ = true;
#endif
#endif
//TODO POWER8 AES
}

@ -36,6 +36,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__)
#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
#define RANDOMX_FORCE_SECURE
#endif

@ -95,6 +95,10 @@ JitCompilerA64::JitCompilerA64()
{
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code), reinterpret_cast<char*>(code + CodeSize));
#endif
}
JitCompilerA64::~JitCompilerA64()

@ -307,6 +307,9 @@ literal_v14: .fill 2,8,0
literal_v15: .fill 2,8,0
DECL(randomx_program_aarch64_vm_instructions_end):
# Calculate dataset pointer for dataset read
# Do it here to break false dependency from readReg2 and readReg3 (see next line)
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
@ -324,8 +327,6 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
# mx <-> ma
ror x9, x9, 32
# Calculate dataset pointer for dataset read
mov w10, w9
DECL(randomx_program_aarch64_cacheline_align_mask2):
# Actual mask will be inserted by JIT compiler
and x10, x10, 1

@ -299,10 +299,6 @@ namespace randomx {
registerUsage[i] = -1;
}
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize);

@ -40,7 +40,6 @@
.global DECL(randomx_prefetch_scratchpad)
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
@ -88,10 +87,6 @@ DECL(randomx_program_prologue):
movapd xmm13, xmmword ptr [mantissaMask+rip]
movapd xmm14, xmmword ptr [exp240+rip]
movapd xmm15, xmmword ptr [scaleMask+rip]
DECL(randomx_program_prologue_first_load):
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32

@ -31,7 +31,6 @@ _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
@ -75,17 +74,12 @@ randomx_program_prologue PROC
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
randomx_program_prologue ENDP
randomx_program_prologue_first_load PROC
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp randomx_program_loop_begin
randomx_program_prologue_first_load ENDP
randomx_program_prologue ENDP
ALIGN 64
include asm/program_xmm_constants.inc
@ -224,4 +218,4 @@ _RANDOMX_JITX86_STATIC ENDS
ENDIF
END
END

@ -32,7 +32,6 @@ extern "C" {
void randomx_prefetch_scratchpad();
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_start();

@ -35,6 +35,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
# if defined(__aarch64__) && TARGET_OS_OSX
# define USE_PTHREAD_JIT_WP 1
# include <pthread.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
@ -99,9 +104,27 @@ void* allocMemoryPages(std::size_t bytes) {
#else
#define RESERVED_FLAGS 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#ifdef __APPLE__
#include <TargetConditionals.h>
#ifdef TARGET_OS_OSX
#define MEXTRA MAP_JIT
#else
#define MEXTRA 0
#endif
#else
#define MEXTRA 0
#endif
#ifdef USE_PTHREAD_JIT_WP
#define PEXTRA PROT_EXEC
#else
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#endif
#endif
return mem;
}
@ -119,11 +142,19 @@ static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
}
void setPagesRW(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(true);
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {

Loading…
Cancel
Save