Compare commits

..

3 Commits

Author SHA1 Message Date
SChernykh 5f053881df Fixed incorrect sizeof
5 years ago
SChernykh a76ac019b4 Removed C++ code from C API
5 years ago
SChernykh b6d27972f2 Combined hash and fill AES loop
5 years ago

3
.gitattributes vendored

@ -1,3 +0,0 @@
.gitignore export-ignore
.gitattributes export-ignore
audits export-ignore

@ -153,7 +153,7 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
src/jit_compiler_a64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# not sure if this check is needed
include(CheckIncludeFile)

@ -48,8 +48,6 @@ cmake -DARCH=native ..
make
```
To build portable binaries, omit the `ARCH` option when executing cmake.
### Windows
On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio (solution file is provided).
@ -65,8 +63,6 @@ RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmo
* The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`.
* The input `H` is the standard hashing blob with a selected nonce value.
RandomX was successfully activated on the Monero network on the 30th November 2019.
If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md).
**Note**: To achieve ASIC resistance, the key `K` must change and must not be miner-selectable. We recommend to use blockchain data as the key in a similar way to the Monero example above. If blockchain data cannot be used for some reason, use a predefined sequence of keys.
@ -112,12 +108,7 @@ Most Intel and AMD CPUs made since 2011 should be fairly efficient at RandomX. M
* DDR4 memory is limited to about 4000-6000 H/s per channel (depending on frequency and timings)
### Does RandomX facilitate botnets/malware mining or web mining?
Due to the way the algorithm works, mining malware is much easier to detect. [RandomX Sniffer](https://github.com/tevador/randomx-sniffer) is a proof of concept tool that can detect illicit mining activity on Windows.
Efficient mining requires more than 2 GiB of memory, which also disqualifies many low-end machines such as IoT devices, which are often parts of large botnets.
Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
Efficient mining requires more than 2 GiB of memory, which is difficult to hide in an infected computer and disqualifies many low-end machines such as IoT devices. Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
### Since RandomX uses floating point math, does it give reproducible results on different platforms?

@ -255,7 +255,7 @@ The Scratchpad is split into 3 levels to mimic the typical CPU cache hierarchy [
|----------------|----------|----------|----------|------|
ARM Cortex A55|2|6|-|[[24](https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4)]
|AMD Zen+|4|12|40|[[25](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]|
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy)]
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]
The L3 cache is much larger and located further from the CPU core. As a result, its access latencies are much higher and can cause stalls in program execution.
@ -638,7 +638,7 @@ state3 = 00000000000000000000000000000000
[25] AMD Zen+ Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
[27] Biryukov et al.: Fast and Tradeoff-Resilient Memory-Hard Functions for
Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Table 2, page 8
@ -647,4 +647,4 @@ Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Tab
[29] 7-Zip File archiver - https://www.7-zip.org/
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html

@ -15,7 +15,6 @@
mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -28,7 +28,6 @@
mov rbx, r9 ;# loop counter
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -1,16 +1,17 @@
mov ecx, ebp ;# ecx = ma
and ecx, RANDOMX_DATASET_BASE_MASK
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
and edx, RANDOMX_DATASET_BASE_MASK
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

@ -8,10 +8,10 @@
mov qword ptr [rsp+16], r13
mov qword ptr [rsp+8], r14
mov qword ptr [rsp+0], r15
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov rbx, rbp ;# ebx = ma
shr rbx, 38
and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
ror rbp, 32 ;# swap "ma" and "mx"
mov ebx, ebp ;# ecx = ma
and ebx, RANDOMX_DATASET_BASE_MASK
shr ebx, 6 ;# ebx = Dataset block number
;# add ebx, datasetOffset / 64
;# call 32768

@ -53,15 +53,8 @@ namespace randomx {
int info[4];
cpuid(info, 0);
int nIds = info[0];
manufacturer_string[0] = info[1];
manufacturer_string[1] = info[3];
manufacturer_string[2] = info[2];
manufacturer_string[3] = 0;
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
processor_info_data = info[0];
ssse3_ = (info[2] & (1 << 9)) != 0;
aes_ = (info[2] & (1 << 25)) != 0;
}
@ -69,13 +62,9 @@ namespace randomx {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__)
#if defined(HWCAP_AES)
#elif defined(__aarch64__) && defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#elif defined(__APPLE__)
aes_ = true;
#endif
#endif
//TODO POWER8 AES
}

@ -42,34 +42,8 @@ namespace randomx {
bool hasAvx2() const {
return avx2_;
}
const char* manufacturer() const {
return (const char*) manufacturer_string;
}
struct ProcessorInfo
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
};
ProcessorInfo processorInfo() const {
return processor_info;
}
private:
bool aes_, ssse3_, avx2_;
int manufacturer_string[4];
union
{
ProcessorInfo processor_info;
int processor_info_data;
};
};
}

@ -157,21 +157,6 @@ void rx_set_rounding_mode(uint32_t mode) {
}
}
uint32_t rx_get_rounding_mode() {
switch (fegetround()) {
case FE_DOWNWARD:
return RoundDown;
case FE_UPWARD:
return RoundUp;
case FE_TOWARDZERO:
return RoundToZero;
case FE_TONEAREST:
return RoundToNearest;
default:
UNREACHABLE;
}
}
#endif
#ifdef RANDOMX_USE_X87

@ -173,10 +173,6 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
_mm_setcsr(rx_mxcsr_default | (mode << 13));
}
FORCE_INLINE uint32_t rx_get_rounding_mode() {
return (_mm_getcsr() >> 13) & 3;
}
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
#include <cstdint>
#include <stdexcept>
@ -740,8 +736,6 @@ void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
uint32_t rx_get_rounding_mode();
#endif
double loadDoublePortable(const void* addr);

@ -35,7 +35,3 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
#define RANDOMX_FORCE_SECURE
#endif

@ -95,10 +95,6 @@ JitCompilerA64::JitCompilerA64()
{
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code), reinterpret_cast<char*>(code + CodeSize));
#endif
}
JitCompilerA64::~JitCompilerA64()

@ -307,9 +307,6 @@ literal_v14: .fill 2,8,0
literal_v15: .fill 2,8,0
DECL(randomx_program_aarch64_vm_instructions_end):
# Calculate dataset pointer for dataset read
# Do it here to break false dependency from readReg2 and readReg3 (see next line)
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
@ -327,6 +324,8 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
# mx <-> ma
ror x9, x9, 32
# Calculate dataset pointer for dataset read
mov w10, w9
DECL(randomx_program_aarch64_cacheline_align_mask2):
# Actual mask will be inserted by JIT compiler
and x10, x10, 1

@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
#include "cpu.hpp"
namespace randomx {
/*
@ -198,7 +197,6 @@ namespace randomx {
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t JZ_SHORT = 0x74;
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
@ -215,52 +213,11 @@ namespace randomx {
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
{},
{0x2E},
{0x2E, 0x2E},
{0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
};
bool JitCompilerX86::BranchesWithin32B = false;
size_t JitCompilerX86::getCodeSize() {
return CodeSize;
}
JitCompilerX86::JitCompilerX86() {
// CPU-specific tweaks
Cpu cpu;
if (strcmp(cpu.manufacturer(), "GenuineIntel") == 0) {
Cpu::ProcessorInfo info = cpu.processorInfo();
// Intel JCC erratum mitigation
if (info.family == 6) {
const uint32_t model = info.model | (info.ext_model << 4);
const uint32_t stepping = info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
code = (uint8_t*)allocMemoryPages(CodeSize);
memcpy(code, codePrologue, prologueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
@ -338,10 +295,14 @@ namespace randomx {
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
instructionOffsets.clear();
for (unsigned i = 0; i < RegistersCount; ++i) {
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
}
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
@ -366,22 +327,6 @@ namespace randomx {
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
uint32_t alignment_size = 32 - (branch_begin & 31);
if (alignment_size > 8) {
emit(NOPX[alignment_size - 9], alignment_size - 8);
alignment_size = 8;
}
emit(NOPX[alignment_size - 1], alignment_size);
}
}
emit(SUB_EBX);
emit(JNZ);
emit32(prologueSize - codePos - 4);
@ -830,42 +775,18 @@ namespace randomx {
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
int32_t jmp_offset = instructionOffsets[target] - (codePos + 16);
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
const uint32_t alignment_size = 32 - (branch_begin & 31);
jmp_offset -= alignment_size;
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size);
}
}
emit(REX_ADD_I);
emitByte(0xc0 + reg);
const int shift = instr.getModCond() + ConditionOffset;
int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);
if (jmp_offset >= -128) {
emitByte(JZ_SHORT);
emitByte(jmp_offset);
}
else {
emit(JZ);
emit32(jmp_offset - 4);
}
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;

@ -65,9 +65,6 @@ namespace randomx {
void enableWriting();
void enableExecution();
void enableAll();
static bool BranchesWithin32B;
private:
static InstructionGeneratorX86 engine[256];
std::vector<int32_t> instructionOffsets;

@ -40,6 +40,7 @@
.global DECL(randomx_prefetch_scratchpad)
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
@ -87,6 +88,10 @@ DECL(randomx_program_prologue):
movapd xmm13, xmmword ptr [mantissaMask+rip]
movapd xmm14, xmmword ptr [exp240+rip]
movapd xmm15, xmmword ptr [scaleMask+rip]
DECL(randomx_program_prologue_first_load):
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32

@ -31,6 +31,7 @@ _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
@ -74,12 +75,17 @@ randomx_program_prologue PROC
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
randomx_program_prologue ENDP
randomx_program_prologue_first_load PROC
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp randomx_program_loop_begin
randomx_program_prologue ENDP
randomx_program_prologue_first_load ENDP
ALIGN 64
include asm/program_xmm_constants.inc
@ -218,4 +224,4 @@ _RANDOMX_JITX86_STATIC ENDS
ENDIF
END
END

@ -32,6 +32,7 @@ extern "C" {
void randomx_prefetch_scratchpad();
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_start();

@ -36,14 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu.hpp"
#include <cassert>
#include <limits>
#include <cfenv>
extern "C" {
randomx_flags randomx_get_flags() {
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
randomx::Cpu cpu;
#ifdef RANDOMX_FORCE_SECURE
#ifdef __OpenBSD__
if (flags == RANDOMX_FLAG_JIT) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -329,7 +328,7 @@ extern "C" {
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) {
if (machine->cacheKey != cache->cacheKey) {
machine->setCache(cache);
machine->cacheKey = cache->cacheKey;
}
@ -350,8 +349,6 @@ extern "C" {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
fenv_t fpstate;
fegetenv(&fpstate);
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
@ -364,34 +361,23 @@ extern "C" {
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
fesetenv(&fpstate);
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(machine->tempHash);
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t *tempHash, const void* input, size_t inputSize) {
blake2b(tempHash, sizeof(uint64_t) * 8, input, inputSize, nullptr, 0);
machine->initScratchpad(tempHash);
}
void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) {
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t *tempHash, const void* nextInput, size_t nextInputSize, void* output) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
machine->run(tempHash);
blake2b(tempHash, sizeof(uint64_t) * 8, machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
machine->run(tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
}
void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
machine->resetRoundingMode();
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
blake2b(tempHash, sizeof(uint64_t) * 8, nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
}
}

@ -240,15 +240,10 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
/**
* Set of functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first will begin a hash calculation.
* randomx_calculate_hash_next will output the hash value of the previous input
* and begin the calculation of the next hash.
* randomx_calculate_hash_last will output the hash value of the previous input.
*
* WARNING: These functions may alter the floating point rounding mode of the calling thread.
* Paired functions used to calculate multiple RandomX hashes during mining for example.
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param tempHash an array of 8 64-bit values used to store intermediate data between calls to randomx_calculate_hash_first and randomx_calculate_hash_next.
* @param input is a pointer to memory to be hashed. Must not be NULL.
* @param inputSize is the number of bytes to be hashed.
* @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL.
@ -256,9 +251,8 @@ RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *inpu
* @param output is a pointer to memory where the hash will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t *tempHash, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t *tempHash, const void* nextInput, size_t nextInputSize, void* output);
#if defined(__cplusplus)
}

@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread,
(thread_policy_t)&policy, 1);
#elif defined(_WIN32) || defined(__CYGWIN__)
rc = SetThreadAffinityMask(reinterpret_cast<HANDLE>(thread), 1ULL << cpuid) == 0 ? -2 : 0;
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && !defined(__NetBSD__)
#elif !defined(__OpenBSD__)
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(cpuid, &cs);

@ -40,10 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "../dataset.hpp"
#include "../blake2/endian.h"
#include "../common.hpp"
#include "../jit_compiler.hpp"
#ifdef _WIN32
#include <windows.h>
#include <versionhelpers.h>
#include <VersionHelpers.h>
#endif
#include "affinity.hpp"
@ -95,7 +94,6 @@ void printUsage(const char* executable) {
std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
}
struct MemoryException : public std::exception {
@ -111,14 +109,11 @@ struct DatasetAllocException : public MemoryException {
}
};
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
template<bool batch>
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid=-1) {
if (cpuid >= 0) {
int rc = set_thread_affinity(cpuid);
if (rc) {
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
}
}
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
@ -127,27 +122,21 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
void* noncePtr = blockTemplate + 39;
auto nonce = atomicNonce.fetch_add(1);
if (batch) {
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
}
uint64_t tempHash[8];
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, tempHash, blockTemplate, sizeof(blockTemplate));
while (nonce < noncesCount) {
if (batch) {
nonce = atomicNonce.fetch_add(1);
}
nonce = atomicNonce.fetch_add(1);
store32(noncePtr, nonce);
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
randomx_calculate_hash_next(vm, tempHash, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
if (!batch) {
nonce = atomicNonce.fetch_add(1);
}
}
}
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
bool ssse3, avx2, autoFlags, noBatch;
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2, autoFlags;
int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity;
int32_t seedValue;
@ -171,11 +160,10 @@ int main(int argc, char** argv) {
readOption("--ssse3", argc, argv, ssse3);
readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
readOption("--noBatch", argc, argv, noBatch);
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.8" << std::endl;
std::cout << "RandomX benchmark v1.1.5" << std::endl;
if (help) {
printUsage(argv[0]);
@ -213,7 +201,7 @@ int main(int argc, char** argv) {
}
if (jit) {
flags |= RANDOMX_FLAG_JIT;
#ifdef RANDOMX_FORCE_SECURE
#ifdef __OpenBSD__
flags |= RANDOMX_FLAG_SECURE;
#endif
}
@ -225,7 +213,7 @@ int main(int argc, char** argv) {
if (miningMode) {
flags |= RANDOMX_FLAG_FULL_MEM;
}
#ifndef RANDOMX_FORCE_SECURE
#ifndef __OpenBSD__
if (secure) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -277,16 +265,6 @@ int main(int argc, char** argv) {
std::cout << " - thread affinity (" << mask_to_string(threadAffinity) << ")" << std::endl;
}
MineFunc* func;
if (noBatch) {
func = &mine<false>;
}
else {
func = &mine<true>;
std::cout << " - batch mode" << std::endl;
}
std::cout << "Initializing";
if (miningMode)
std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")");
@ -357,14 +335,14 @@ int main(int argc, char** argv) {
int cpuid = -1;
if (threadAffinity)
cpuid = cpuid_from_mask(threadAffinity, i);
threads.push_back(std::thread(func, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
else {
func(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0, -1);
mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0);
}
double elapsed = sw.getElapsed();

@ -143,7 +143,7 @@ int main() {
randomx::JitCompiler jit;
jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
jit.generateDatasetInitCode();
#ifdef RANDOMX_FORCE_SECURE
#ifdef __OpenBSD__
jit.enableExecution();
#else
jit.enableAll();
@ -954,7 +954,7 @@ int main() {
assert(ibc.memMask == randomx::ScratchpadL3Mask);
});
#ifdef RANDOMX_FORCE_SECURE
#ifdef __OpenBSD__
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
@ -1009,10 +1009,10 @@ int main() {
vm = nullptr;
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
initCache("test key 000");
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_JIT | RANDOMX_FLAG_SECURE, cache, nullptr);
#ifdef __OpenBSD__
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr);
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
#endif
}
@ -1026,6 +1026,9 @@ int main() {
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
randomx_destroy_vm(vm);
vm = nullptr;
auto flags = randomx_get_flags();
randomx_release_cache(cache);
@ -1051,40 +1054,6 @@ int main() {
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
});
if (cache != nullptr)
randomx_release_cache(cache);
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash1[RANDOMX_HASH_SIZE];
char hash2[RANDOMX_HASH_SIZE];
char hash3[RANDOMX_HASH_SIZE];
initCache("test key 000");
char input1[] = "This is a test";
char input2[] = "Lorem ipsum dolor sit amet";
char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1);
randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1);
randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2);
randomx_calculate_hash_last(vm, &hash3);
assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"));
assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"));
});
runTest("Preserve rounding mode", RANDOMX_FREQ_CFROUND > 0, []() {
rx_set_rounding_mode(RoundToNearest);
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(rx_get_rounding_mode() == RoundToNearest);
});
randomx_destroy_vm(vm);
vm = nullptr;
if (cache != nullptr)
randomx_release_cache(cache);

@ -54,9 +54,6 @@ public:
{
return program;
}
const uint8_t* getMemory() const {
return mem.memory;
}
protected:
void initialize();
alignas(64) randomx::Program program;
@ -71,7 +68,6 @@ protected:
uint64_t datasetOffset;
public:
std::string cacheKey;
alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data
};
namespace randomx {

@ -35,11 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
# if defined(__aarch64__) && TARGET_OS_OSX
# define USE_PTHREAD_JIT_WP 1
# include <pthread.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
@ -99,32 +94,9 @@ void* allocMemoryPages(std::size_t bytes) {
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
#else
#if defined(__NetBSD__)
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
#ifdef __APPLE__
#include <TargetConditionals.h>
#ifdef TARGET_OS_OSX
#define MEXTRA MAP_JIT
#else
#define MEXTRA 0
#endif
#else
#define MEXTRA 0
#endif
#ifdef USE_PTHREAD_JIT_WP
#define PEXTRA PROT_EXEC
#else
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
mem = mmap(nullptr, bytes, PAGE_READWRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#endif
#endif
return mem;
}
@ -142,19 +114,11 @@ static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
}
void setPagesRW(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(true);
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {
@ -177,7 +141,7 @@ void* allocLargePagesMemory(std::size_t bytes) {
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
#elif defined(__OpenBSD__) || defined(__NetBSD__)
#elif defined(__OpenBSD__)
mem = MAP_FAILED; // OpenBSD does not support huge pages
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);

@ -54,17 +54,12 @@
<ItemGroup>
<ClCompile Include="..\src\aes_hash.cpp" />
<ClCompile Include="..\src\allocator.cpp" />
<ClCompile Include="..\src\argon2_avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\argon2_ssse3.c" />
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\bytecode_machine.cpp" />
<ClCompile Include="..\src\cpu.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
<ClCompile Include="..\src\instruction.cpp" />
<ClCompile Include="..\src\instructions_portable.cpp" />

@ -172,14 +172,5 @@
<ClCompile Include="..\src\bytecode_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_avx2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_ssse3.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>
Loading…
Cancel
Save