Compare commits

...

69 Commits

Author SHA1 Message Date
SChernykh 0ff83a2a81 Merge remote-tracking branch 'upstream/master'
3 years ago
tevador fe4324e8c0
Merge pull request #213 from hyc/m1typo
3 years ago
SChernykh 3c8c7ee097
Optimized dataset read (#211)
3 years ago
Howard Chu 1b3db29fb8 Fix typo for M1 Mac build
3 years ago
SChernykh a44c5a47ec Merge remote-tracking branch 'upstream/master'
3 years ago
tevador c12097400b
Merge pull request #212 from hyc/m1prot
3 years ago
tevador 0db4c57823
Merge pull request #198 from tevador/pr-applem1
3 years ago
tevador 4aae0d834d
Merge pull request #202 from SChernykh/fix-crash
3 years ago
Howard Chu d9b7e8c25d Faster W^X policy for apple silicon macs
3 years ago
SChernykh 2ba7df2844 Fix illegal instruction crash on some ARM systems
3 years ago
SChernykh 862556f239 Fix illegal instruction crash on some ARM systems
3 years ago
SChernykh f50635efd6 Merge remote-tracking branch 'upstream/master'
3 years ago
tevador a44d07c89f
Merge pull request #201 from cryptonote-social/removecode
3 years ago
cryptonote-social 529b933fcf remove unnecessary first-load initialization code
3 years ago
tevador a38ce601fd Apple silicon: force W^X, enable hardware AES
4 years ago
tevador 5bfd021e8f
Merge pull request #183 from selsta/map_jit
4 years ago
SChernykh 1ce204fb80 Merge remote-tracking branch 'upstream/master'
4 years ago
tevador 5ce5f4906c add --noBatch benchmark option
4 years ago
selsta 7c172f746f
virtual_memory: add MAP_JIT on macOS
4 years ago
tevador 9905ec9c5a
Merge pull request #188 from cryptonote-social/master
4 years ago
tevador 863765bbe6
Merge pull request #185 from tevador/pr-crosscomp
4 years ago
tevador a1c08a2f41
Merge pull request #187 from tevador/pr-netbsd
4 years ago
tevador 708a4e50c5 Fix compilation and JIT support on NetBSD:
4 years ago
tevador 6a4afc721f
Merge pull request #189 from tevador/pr-set-cache
4 years ago
tevador 32ab5dea54 fix potential use-after-free when reallocating cache
4 years ago
cryptonote-social a7733de1e7 replace hardcoded literal with its appropriate symbol
4 years ago
tevador bece0a7206 fix #184
4 years ago
tevador 7741eb1e97
Merge pull request #182 from tevador/pr-restore-fpstate
4 years ago
tevador 148b923f71 fix test 92 not failing properly on GCC/amd64
4 years ago
tevador 6a764e90d0 Preserve floating point state when calling randomx_calculate_hash
4 years ago
SChernykh bbca8b1ea4 Merge remote-tracking branch 'origin/dev'
4 years ago
tevador ac574e3743
Merge pull request #179 from tevador/pr-hash-batch
4 years ago
tevador 01381ccef3 Add a missing function to calculate a batch of hashes
4 years ago
tevador 913e495c53 Merge branch 'master' of git@github.com:tevador/RandomX.git
4 years ago
SChernykh cdeb06ab3f Merge remote-tracking branch 'upstream/master'
4 years ago
tevador 72ac5e49b6 Update dll project
4 years ago
tevador bbbb34757b
Add a note about building portable binaries
4 years ago
tevador a223b6b33b
Fixed an incorrect URL the the documentation
4 years ago
tevador 30969c0e25 Benchmark version
5 years ago
tevador 6e842d22bb
Merge pull request #171 from tevador/pr-affinityfix
5 years ago
tevador aa19c5b9b6 Disable thread affinity on FreeBSD and Android
5 years ago
tevador 70d4b0f2f1
Merge pull request #169 from tevador/pr-gitignore
5 years ago
tevador f872ce0b94
Merge pull request #170 from tevador/pr-temphash
5 years ago
tevador 3910d49b49 Hide tempHash from the public API
5 years ago
SChernykh 219c02e1e5 Combined hash and fill AES loop (#166)
5 years ago
tevador 6235852e00 Add .gitattributes
5 years ago
tevador e3561d661e Updated readme with RandomX Sniffer
5 years ago
SChernykh f7f821631b Merge remote-tracking branch 'upstream/master'
5 years ago
SChernykh 3eaff21eaf Added Intel JCC bug detection
5 years ago
SChernykh 1537612d53 Mitigation for Intel JCC erratum
5 years ago
SChernykh 82e5aa4817 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 42fa6f74a3 Enabled CFROUND
5 years ago
SChernykh d5627e9f0a Enabled FP instructions
5 years ago
SChernykh 1a431a8853 Enabled FSWAP_R and FSCAL_R
5 years ago
SChernykh 1bb8957c2c Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 4b42969a91 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 0db8631b73 Enabled CBRANCH
5 years ago
SChernykh 93afb248bf Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh e431502f95 Update instruction_weights.hpp
5 years ago
SChernykh 4a546761b9 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 930ea8ff20 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh ea81a4d854 Enabled ISTORE
5 years ago
SChernykh b165774643 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 9548422379 Enabled all integer instructions
5 years ago
SChernykh d6512a3a33 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 37cd162b17 Enabled IADD_RS
5 years ago
SChernykh dec384eaed Temporarily disabled all instructions
5 years ago
SChernykh 2d05741f07 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh e5347497e9 Disabled code execution in interpreted VM
5 years ago

3
.gitattributes vendored

@ -0,0 +1,3 @@
.gitignore export-ignore
.gitattributes export-ignore
audits export-ignore

@ -153,7 +153,7 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
src/jit_compiler_a64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# not sure if this check is needed
include(CheckIncludeFile)

@ -48,6 +48,8 @@ cmake -DARCH=native ..
make
```
To build portable binaries, omit the `ARCH` option when executing cmake.
### Windows
On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio (solution file is provided).
@ -63,6 +65,8 @@ RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmo
* The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`.
* The input `H` is the standard hashing blob with a selected nonce value.
RandomX was successfully activated on the Monero network on the 30th November 2019.
If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md).
**Note**: To achieve ASIC resistance, the key `K` must change and must not be miner-selectable. We recommend to use blockchain data as the key in a similar way to the Monero example above. If blockchain data cannot be used for some reason, use a predefined sequence of keys.
@ -108,7 +112,12 @@ Most Intel and AMD CPUs made since 2011 should be fairly efficient at RandomX. M
* DDR4 memory is limited to about 4000-6000 H/s per channel (depending on frequency and timings)
### Does RandomX facilitate botnets/malware mining or web mining?
Efficient mining requires more than 2 GiB of memory, which is difficult to hide in an infected computer and disqualifies many low-end machines such as IoT devices. Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
Due to the way the algorithm works, mining malware is much easier to detect. [RandomX Sniffer](https://github.com/tevador/randomx-sniffer) is a proof of concept tool that can detect illicit mining activity on Windows.
Efficient mining requires more than 2 GiB of memory, which also disqualifies many low-end machines such as IoT devices, which are often parts of large botnets.
Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
### Since RandomX uses floating point math, does it give reproducible results on different platforms?

@ -255,7 +255,7 @@ The Scratchpad is split into 3 levels to mimic the typical CPU cache hierarchy [
|----------------|----------|----------|----------|------|
ARM Cortex A55|2|6|-|[[24](https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4)]
|AMD Zen+|4|12|40|[[25](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]|
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy)]
The L3 cache is much larger and located further from the CPU core. As a result, its access latencies are much higher and can cause stalls in program execution.
@ -638,7 +638,7 @@ state3 = 00000000000000000000000000000000
[25] AMD Zen+ Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy
[27] Biryukov et al.: Fast and Tradeoff-Resilient Memory-Hard Functions for
Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Table 2, page 8
@ -647,4 +647,4 @@ Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Tab
[29] 7-Zip File archiver - https://www.7-zip.org/
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html

@ -239,3 +239,84 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
// initial state
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
constexpr int PREFETCH_DISTANCE = 4096;
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
scratchpadEnd -= PREFETCH_DISTANCE;
for (int i = 0; i < 2; ++i) {
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
rx_prefetch_t0(prefetchPtr);
scratchpadPtr += 64;
prefetchPtr += 64;
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;
}
rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
//two extra rounds to achieve full diffusion
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
hash_state0 = aesenc<softAes>(hash_state0, xkey0);
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
hash_state3 = aesdec<softAes>(hash_state3, xkey0);
hash_state0 = aesenc<softAes>(hash_state0, xkey1);
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
hash_state3 = aesdec<softAes>(hash_state3, xkey1);
//output hash
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
}
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

@ -15,6 +15,7 @@
mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -28,6 +28,7 @@
mov rbx, r9 ;# loop counter
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -1,17 +1,16 @@
mov ecx, ebp ;# ecx = ma
and ecx, RANDOMX_DATASET_BASE_MASK
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
and edx, RANDOMX_DATASET_BASE_MASK
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]

@ -8,10 +8,10 @@
mov qword ptr [rsp+16], r13
mov qword ptr [rsp+8], r14
mov qword ptr [rsp+0], r15
xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx"
mov ebx, ebp ;# ecx = ma
and ebx, RANDOMX_DATASET_BASE_MASK
shr ebx, 6 ;# ebx = Dataset block number
xor rbp, rax ;# modify "mx"
mov rbx, rbp ;# ebx = ma
shr rbx, 38
and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
;# add ebx, datasetOffset / 64
;# call 32768

@ -53,8 +53,15 @@ namespace randomx {
int info[4];
cpuid(info, 0);
int nIds = info[0];
manufacturer_string[0] = info[1];
manufacturer_string[1] = info[3];
manufacturer_string[2] = info[2];
manufacturer_string[3] = 0;
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
processor_info_data = info[0];
ssse3_ = (info[2] & (1 << 9)) != 0;
aes_ = (info[2] & (1 << 25)) != 0;
}
@ -62,9 +69,13 @@ namespace randomx {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__) && defined(HWCAP_AES)
#elif defined(__aarch64__)
#if defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#elif defined(__APPLE__)
aes_ = true;
#endif
#endif
//TODO POWER8 AES
}

@ -42,8 +42,34 @@ namespace randomx {
bool hasAvx2() const {
return avx2_;
}
const char* manufacturer() const {
return (const char*) manufacturer_string;
}
struct ProcessorInfo
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
};
ProcessorInfo processorInfo() const {
return processor_info;
}
private:
bool aes_, ssse3_, avx2_;
int manufacturer_string[4];
union
{
ProcessorInfo processor_info;
int processor_info_data;
};
};
}

@ -157,6 +157,21 @@ void rx_set_rounding_mode(uint32_t mode) {
}
}
uint32_t rx_get_rounding_mode() {
switch (fegetround()) {
case FE_DOWNWARD:
return RoundDown;
case FE_UPWARD:
return RoundUp;
case FE_TOWARDZERO:
return RoundToZero;
case FE_TONEAREST:
return RoundToNearest;
default:
UNREACHABLE;
}
}
#endif
#ifdef RANDOMX_USE_X87

@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
#define rx_aligned_free(a) _mm_free(a)
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
#define rx_load_vec_f128 _mm_load_pd
#define rx_store_vec_f128 _mm_store_pd
@ -172,6 +173,10 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
_mm_setcsr(rx_mxcsr_default | (mode << 13));
}
FORCE_INLINE uint32_t rx_get_rounding_mode() {
return (_mm_getcsr() >> 13) & 3;
}
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
#include <cstdint>
#include <stdexcept>
@ -201,6 +206,7 @@ typedef union{
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)
/* Splat 64-bit long long to 2 64-bit long longs */
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
@ -399,6 +405,10 @@ inline void rx_prefetch_nta(void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
inline void rx_prefetch_t0(const void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
return vld1q_f64((const float64_t*)pd);
}
@ -532,6 +542,7 @@ typedef union {
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
rx_vec_f128 x;
@ -729,6 +740,8 @@ void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
uint32_t rx_get_rounding_mode();
#endif
double loadDoublePortable(const void* addr);

@ -35,3 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
#define RANDOMX_FORCE_SECURE
#endif

@ -95,6 +95,10 @@ JitCompilerA64::JitCompilerA64()
{
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code), reinterpret_cast<char*>(code + CodeSize));
#endif
}
JitCompilerA64::~JitCompilerA64()

@ -307,6 +307,9 @@ literal_v14: .fill 2,8,0
literal_v15: .fill 2,8,0
DECL(randomx_program_aarch64_vm_instructions_end):
# Calculate dataset pointer for dataset read
# Do it here to break false dependency from readReg2 and readReg3 (see next line)
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
@ -324,8 +327,6 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
# mx <-> ma
ror x9, x9, 32
# Calculate dataset pointer for dataset read
mov w10, w9
DECL(randomx_program_aarch64_cacheline_align_mask2):
# Actual mask will be inserted by JIT compiler
and x10, x10, 1

@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
#include "cpu.hpp"
namespace randomx {
/*
@ -197,6 +198,7 @@ namespace randomx {
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t JZ_SHORT = 0x74;
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
@ -213,11 +215,52 @@ namespace randomx {
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
{},
{0x2E},
{0x2E, 0x2E},
{0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
};
bool JitCompilerX86::BranchesWithin32B = false;
size_t JitCompilerX86::getCodeSize() {
return CodeSize;
}
JitCompilerX86::JitCompilerX86() {
// CPU-specific tweaks
Cpu cpu;
if (strcmp(cpu.manufacturer(), "GenuineIntel") == 0) {
Cpu::ProcessorInfo info = cpu.processorInfo();
// Intel JCC erratum mitigation
if (info.family == 6) {
const uint32_t model = info.model | (info.ext_model << 4);
const uint32_t stepping = info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
code = (uint8_t*)allocMemoryPages(CodeSize);
memcpy(code, codePrologue, prologueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
@ -295,14 +338,10 @@ namespace randomx {
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
@ -327,6 +366,22 @@ namespace randomx {
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
uint32_t alignment_size = 32 - (branch_begin & 31);
if (alignment_size > 8) {
emit(NOPX[alignment_size - 9], alignment_size - 8);
alignment_size = 8;
}
emit(NOPX[alignment_size - 1], alignment_size);
}
}
emit(SUB_EBX);
emit(JNZ);
emit32(prologueSize - codePos - 4);
@ -775,18 +830,42 @@ namespace randomx {
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
int32_t jmp_offset = instructionOffsets[target] - (codePos + 16);
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
const uint32_t alignment_size = 32 - (branch_begin & 31);
jmp_offset -= alignment_size;
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size);
}
}
emit(REX_ADD_I);
emitByte(0xc0 + reg);
int shift = instr.getModCond() + ConditionOffset;
const int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
if (jmp_offset >= -128) {
emitByte(JZ_SHORT);
emitByte(jmp_offset);
}
else {
emit(JZ);
emit32(jmp_offset - 4);
}
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;

@ -65,6 +65,9 @@ namespace randomx {
void enableWriting();
void enableExecution();
void enableAll();
static bool BranchesWithin32B;
private:
static InstructionGeneratorX86 engine[256];
std::vector<int32_t> instructionOffsets;

@ -40,7 +40,6 @@
.global DECL(randomx_prefetch_scratchpad)
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
@ -88,10 +87,6 @@ DECL(randomx_program_prologue):
movapd xmm13, xmmword ptr [mantissaMask+rip]
movapd xmm14, xmmword ptr [exp240+rip]
movapd xmm15, xmmword ptr [scaleMask+rip]
DECL(randomx_program_prologue_first_load):
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32

@ -31,7 +31,6 @@ _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
@ -75,17 +74,12 @@ randomx_program_prologue PROC
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
randomx_program_prologue ENDP
randomx_program_prologue_first_load PROC
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp randomx_program_loop_begin
randomx_program_prologue_first_load ENDP
randomx_program_prologue ENDP
ALIGN 64
include asm/program_xmm_constants.inc
@ -224,4 +218,4 @@ _RANDOMX_JITX86_STATIC ENDS
ENDIF
END
END

@ -32,7 +32,6 @@ extern "C" {
void randomx_prefetch_scratchpad();
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_start();

@ -36,13 +36,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu.hpp"
#include <cassert>
#include <limits>
#include <cfenv>
extern "C" {
randomx_flags randomx_get_flags() {
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
randomx::Cpu cpu;
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
if (flags == RANDOMX_FLAG_JIT) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -328,7 +329,7 @@ extern "C" {
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
if (machine->cacheKey != cache->cacheKey) {
if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) {
machine->setCache(cache);
machine->cacheKey = cache->cacheKey;
}
@ -349,6 +350,8 @@ extern "C" {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
fenv_t fpstate;
fegetenv(&fpstate);
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
@ -361,6 +364,34 @@ extern "C" {
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
fesetenv(&fpstate);
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(machine->tempHash);
}
void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
}
void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
machine->resetRoundingMode();
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
}

@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_H
#include <stddef.h>
#include <stdint.h>
#define RANDOMX_HASH_SIZE 32
#define RANDOMX_DATASET_ITEM_SIZE 64
@ -238,6 +239,27 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
*/
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
/**
* Set of functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first will begin a hash calculation.
* randomx_calculate_hash_next will output the hash value of the previous input
* and begin the calculation of the next hash.
* randomx_calculate_hash_last will output the hash value of the previous input.
*
* WARNING: These functions may alter the floating point rounding mode of the calling thread.
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param input is a pointer to memory to be hashed. Must not be NULL.
* @param inputSize is the number of bytes to be hashed.
* @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL.
* @param nextInputSize is the number of bytes to be hashed for the next hash.
* @param output is a pointer to memory where the hash will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
#if defined(__cplusplus)
}
#endif

@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread,
(thread_policy_t)&policy, 1);
#elif defined(_WIN32) || defined(__CYGWIN__)
rc = SetThreadAffinityMask(reinterpret_cast<HANDLE>(thread), 1ULL << cpuid) == 0 ? -2 : 0;
#elif !defined(__OpenBSD__)
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && !defined(__NetBSD__)
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(cpuid, &cs);

@ -40,9 +40,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "../dataset.hpp"
#include "../blake2/endian.h"
#include "../common.hpp"
#include "../jit_compiler.hpp"
#ifdef _WIN32
#include <windows.h>
#include <VersionHelpers.h>
#include <versionhelpers.h>
#endif
#include "affinity.hpp"
@ -94,6 +95,7 @@ void printUsage(const char* executable) {
std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
}
struct MemoryException : public std::exception {
@ -109,11 +111,14 @@ struct DatasetAllocException : public MemoryException {
}
};
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid=-1) {
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
template<bool batch>
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
if (cpuid >= 0) {
int rc = set_thread_affinity(cpuid);
if (rc) {
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
}
}
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
@ -122,16 +127,27 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
void* noncePtr = blockTemplate + 39;
auto nonce = atomicNonce.fetch_add(1);
if (batch) {
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
}
while (nonce < noncesCount) {
if (batch) {
nonce = atomicNonce.fetch_add(1);
}
store32(noncePtr, nonce);
randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash);
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
nonce = atomicNonce.fetch_add(1);
if (!batch) {
nonce = atomicNonce.fetch_add(1);
}
}
}
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2, autoFlags;
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
bool ssse3, avx2, autoFlags, noBatch;
int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity;
int32_t seedValue;
@ -155,10 +171,11 @@ int main(int argc, char** argv) {
readOption("--ssse3", argc, argv, ssse3);
readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
readOption("--noBatch", argc, argv, noBatch);
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.5" << std::endl;
std::cout << "RandomX benchmark v1.1.8" << std::endl;
if (help) {
printUsage(argv[0]);
@ -196,7 +213,7 @@ int main(int argc, char** argv) {
}
if (jit) {
flags |= RANDOMX_FLAG_JIT;
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
flags |= RANDOMX_FLAG_SECURE;
#endif
}
@ -208,7 +225,7 @@ int main(int argc, char** argv) {
if (miningMode) {
flags |= RANDOMX_FLAG_FULL_MEM;
}
#ifndef __OpenBSD__
#ifndef RANDOMX_FORCE_SECURE
if (secure) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -260,6 +277,16 @@ int main(int argc, char** argv) {
std::cout << " - thread affinity (" << mask_to_string(threadAffinity) << ")" << std::endl;
}
MineFunc* func;
if (noBatch) {
func = &mine<false>;
}
else {
func = &mine<true>;
std::cout << " - batch mode" << std::endl;
}
std::cout << "Initializing";
if (miningMode)
std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")");
@ -330,14 +357,14 @@ int main(int argc, char** argv) {
int cpuid = -1;
if (threadAffinity)
cpuid = cpuid_from_mask(threadAffinity, i);
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
threads.push_back(std::thread(func, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
else {
mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0);
func(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0, -1);
}
double elapsed = sw.getElapsed();

@ -143,7 +143,7 @@ int main() {
randomx::JitCompiler jit;
jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
jit.generateDatasetInitCode();
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
jit.enableExecution();
#else
jit.enableAll();
@ -954,7 +954,7 @@ int main() {
assert(ibc.memMask == randomx::ScratchpadL3Mask);
});
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
@ -1009,10 +1009,10 @@ int main() {
vm = nullptr;
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
initCache("test key 000");
#ifdef __OpenBSD__
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_JIT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr);
#endif
}
@ -1026,9 +1026,6 @@ int main() {
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
randomx_destroy_vm(vm);
vm = nullptr;
auto flags = randomx_get_flags();
randomx_release_cache(cache);
@ -1054,6 +1051,40 @@ int main() {
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
});
if (cache != nullptr)
randomx_release_cache(cache);
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash1[RANDOMX_HASH_SIZE];
char hash2[RANDOMX_HASH_SIZE];
char hash3[RANDOMX_HASH_SIZE];
initCache("test key 000");
char input1[] = "This is a test";
char input2[] = "Lorem ipsum dolor sit amet";
char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1);
randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1);
randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2);
randomx_calculate_hash_last(vm, &hash3);
assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"));
assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"));
});
runTest("Preserve rounding mode", RANDOMX_FREQ_CFROUND > 0, []() {
rx_set_rounding_mode(RoundToNearest);
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(rx_get_rounding_mode() == RoundToNearest);
});
randomx_destroy_vm(vm);
vm = nullptr;
if (cache != nullptr)
randomx_release_cache(cache);

@ -120,6 +120,12 @@ namespace randomx {
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) {
hashAndFillAes1Rx4<softAes>((void*) getScratchpad(), ScratchpadSize, &reg.a, fill_state);
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);

@ -38,6 +38,7 @@ public:
virtual ~randomx_vm() = 0;
virtual void allocate() = 0;
virtual void getFinalResult(void* out, size_t outSize) = 0;
virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0;
virtual void setDataset(randomx_dataset* dataset) { }
virtual void setCache(randomx_cache* cache) { }
virtual void initScratchpad(void* seed) = 0;
@ -53,6 +54,9 @@ public:
{
return program;
}
const uint8_t* getMemory() const {
return mem.memory;
}
protected:
void initialize();
alignas(64) randomx::Program program;
@ -67,6 +71,7 @@ protected:
uint64_t datasetOffset;
public:
std::string cacheKey;
alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data
};
namespace randomx {
@ -78,6 +83,7 @@ namespace randomx {
void allocate() override;
void initScratchpad(void* seed) override;
void getFinalResult(void* out, size_t outSize) override;
void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override;
protected:
void generateProgram(void* seed);
};

@ -35,6 +35,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
# if defined(__aarch64__) && TARGET_OS_OSX
# define USE_PTHREAD_JIT_WP 1
# include <pthread.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
@ -94,9 +99,32 @@ void* allocMemoryPages(std::size_t bytes) {
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
#else
mem = mmap(nullptr, bytes, PAGE_READWRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#if defined(__NetBSD__)
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
#ifdef __APPLE__
#include <TargetConditionals.h>
#ifdef TARGET_OS_OSX
#define MEXTRA MAP_JIT
#else
#define MEXTRA 0
#endif
#else
#define MEXTRA 0
#endif
#ifdef USE_PTHREAD_JIT_WP
#define PEXTRA PROT_EXEC
#else
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#endif
#endif
return mem;
}
@ -114,11 +142,19 @@ static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
}
void setPagesRW(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(true);
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {
@ -141,7 +177,7 @@ void* allocLargePagesMemory(std::size_t bytes) {
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
#elif defined(__OpenBSD__)
#elif defined(__OpenBSD__) || defined(__NetBSD__)
mem = MAP_FAILED; // OpenBSD does not support huge pages
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);

@ -54,12 +54,17 @@
<ItemGroup>
<ClCompile Include="..\src\aes_hash.cpp" />
<ClCompile Include="..\src\allocator.cpp" />
<ClCompile Include="..\src\argon2_avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\argon2_ssse3.c" />
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\bytecode_machine.cpp" />
<ClCompile Include="..\src\cpu.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
<ClCompile Include="..\src\instruction.cpp" />
<ClCompile Include="..\src\instructions_portable.cpp" />

@ -172,5 +172,14 @@
<ClCompile Include="..\src\bytecode_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_avx2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_ssse3.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>
Loading…
Cancel
Save