ARMv8-a JIT (work in progress)

armv8-a-jit
SChernykh 5 years ago
parent 93fec18991
commit 228e718c04

@ -90,6 +90,7 @@ namespace randomx {
constexpr int StoreL3Condition = 14;
//Prevent some unsafe configurations.
#define RANDOMX_UNSAFE 1
#ifndef RANDOMX_UNSAFE
static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs");
static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes");
@ -119,7 +120,7 @@ namespace randomx {
class JitCompilerX86;
using JitCompiler = JitCompilerX86;
#elif defined(__aarch64__)
#define RANDOMX_HAVE_COMPILER 0
#define RANDOMX_HAVE_COMPILER 1
class JitCompilerA64;
using JitCompiler = JitCompilerA64;
#else

@ -83,43 +83,43 @@ Total sum of frequencies must be 256
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4
#define RANDOMX_FREQ_IADD_M 0
#define RANDOMX_FREQ_ISUB_R 0
#define RANDOMX_FREQ_ISUB_M 0
#define RANDOMX_FREQ_IMUL_R 0
#define RANDOMX_FREQ_IMUL_M 0
#define RANDOMX_FREQ_IMULH_R 0
#define RANDOMX_FREQ_IMULH_M 0
#define RANDOMX_FREQ_ISMULH_R 0
#define RANDOMX_FREQ_ISMULH_M 0
#define RANDOMX_FREQ_IMUL_RCP 0
#define RANDOMX_FREQ_INEG_R 0
#define RANDOMX_FREQ_IXOR_R 0
#define RANDOMX_FREQ_IXOR_M 0
#define RANDOMX_FREQ_IROR_R 0
#define RANDOMX_FREQ_IROL_R 0
#define RANDOMX_FREQ_ISWAP_R 0
//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
#define RANDOMX_FREQ_FSWAP_R 0
#define RANDOMX_FREQ_FADD_R 0
#define RANDOMX_FREQ_FADD_M 0
#define RANDOMX_FREQ_FSUB_R 0
#define RANDOMX_FREQ_FSUB_M 0
#define RANDOMX_FREQ_FSCAL_R 0
#define RANDOMX_FREQ_FMUL_R 0
#define RANDOMX_FREQ_FDIV_M 0
#define RANDOMX_FREQ_FSQRT_R 0
//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CFROUND 1
#define RANDOMX_FREQ_CBRANCH 0
#define RANDOMX_FREQ_CFROUND 0
//Store instruction
#define RANDOMX_FREQ_ISTORE 16
#define RANDOMX_FREQ_ISTORE 0
//No-op instruction
#define RANDOMX_FREQ_NOP 0
#define RANDOMX_FREQ_NOP 240
/* ------
256
*/

@ -66,6 +66,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define REP64(x) REP32(x) REP32(x)
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
#define REP240(x) REP128(x) REP64(x) REP32(x) REP16(x)
#define REP256(x) REP128(x) REP128(x)
#define REPNX(x,N) REP##N(x)
#define REPN(x,N) REPNX(x,N)

@ -0,0 +1,177 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "jit_compiler_a64.hpp"
#include "program.hpp"
#include "virtual_memory.hpp"
namespace ARMV8A {
constexpr uint32_t B = 0x14000000;
constexpr uint32_t EOR = 0xCA000000;
constexpr uint32_t EOR32 = 0x4A000000;
constexpr uint32_t ADD = 0x8B000000;
constexpr uint32_t MOVZ = 0xD2800000;
constexpr uint32_t MOVN = 0x92800000;
constexpr uint32_t MOVK = 0xF2800000;
}
namespace randomx {
static const size_t CodeSize = ((uint8_t*)randomx_program_aarch64_end) - ((uint8_t*)randomx_program_aarch64);
static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instructions) - ((uint8_t*)randomx_program_aarch64);
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
JitCompilerA64::JitCompilerA64()
: code((uint8_t*) allocMemoryPages(CodeSize))
{
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
enableAll();
}
JitCompilerA64::~JitCompilerA64()
{
freePagedMemory(code, CodeSize);
}
void JitCompilerA64::enableAll()
{
setPagesRWX(code, CodeSize);
}
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config)
{
uint32_t codePos = PrologueSize;
for (uint32_t i = 0; i < program.getSize(); ++i)
{
Instruction& instr = program(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
(this->*engine[instr.opcode])(instr, i, codePos);
}
// Update spMix2
// eor w11, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 11 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos);
// Update spMix1
// eor x10, config.readReg0, config.readReg1
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code + PrologueSize), reinterpret_cast<char*>(code + codePos));
#endif
}
size_t JitCompilerA64::getCodeSize()
{
return CodeSize;
}
void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos)
{
uint32_t k = codePos;
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
const uint32_t shift = instr.getModShift();
// add dst, src << shift
emit32(ARMV8A::ADD | dst | (dst << 5) | (shift << 10) | (src << 16), code, k);
if (instr.dst == RegisterNeedsDisplacement)
{
const uint32_t imm32 = instr.getImm32();
if (static_cast<int32_t>(imm32) < 0)
{
// movn x21, ~imm32 (16 high bits)
emit32(ARMV8A::MOVN | 21 | (1 << 21) | ((~imm32 >> 16) << 5), code, k);
}
else
{
// movz x21, imm32 (16 high bits)
emit32(ARMV8A::MOVZ | 21 | (1 << 21) | ((imm32 >> 16) << 5), code, k);
}
// movk x21, imm32 (16 low bits)
emit32(ARMV8A::MOVK | 21 | ((imm32 & 0xFFFF) << 5), code, k);
// add dst, x21
emit32(ARMV8A::ADD | dst | (dst << 5) | (21 << 16), code, k);
}
codePos = k;
}
void JitCompilerA64::h_NOP(Instruction& instr, int i, uint32_t& codePos)
{
}
#include "instruction_weights.hpp"
#define INST_HANDLE(x) REPN(&JitCompilerA64::h_##x, WT(x))
InstructionGeneratorA64 JitCompilerA64::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
}

@ -32,21 +32,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <vector>
#include <stdexcept>
#include "common.hpp"
#include "jit_compiler_a64_static.hpp"
namespace randomx {
class Program;
class ProgramConfiguration;
class SuperscalarProgram;
class Instruction;
typedef void(JitCompilerA64::*InstructionGeneratorA64)(Instruction&, int, uint32_t&);
class JitCompilerA64 {
public:
JitCompilerA64() {
throw std::runtime_error("ARM64 JIT compiler is not implemented yet.");
}
void generateProgram(Program&, ProgramConfiguration&) {
JitCompilerA64();
~JitCompilerA64();
void generateProgram(Program&, ProgramConfiguration&);
}
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
}
@ -58,19 +61,31 @@ namespace randomx {
}
ProgramFunc* getProgramFunc() {
return nullptr;
return reinterpret_cast<ProgramFunc*>(code);
}
DatasetInitFunc* getDatasetInitFunc() {
return nullptr;
}
uint8_t* getCode() {
return nullptr;
}
size_t getCodeSize() {
return 0;
return code;
}
size_t getCodeSize();
void enableWriting() {}
void enableExecution() {}
void enableAll() {}
void enableAll();
private:
static InstructionGeneratorA64 engine[256];
uint8_t* code;
static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos)
{
*(uint32_t*)(code + codePos) = val;
codePos += sizeof(val);
}
void h_IADD_RS(Instruction&, int, uint32_t&);
void h_NOP(Instruction&, int, uint32_t&);
};
}
}

@ -0,0 +1,275 @@
.arch armv8-a
.text
.global randomx_program_aarch64
.global randomx_program_aarch64_vm_instructions
.global randomx_program_aarch64_vm_instructions_end
.global randomx_program_aarch64_update_spMix1
.global randomx_program_aarch64_end
# Register allocation
# x0 -> pointer to reg buffer
# x1 -> pointer to mem buffer and then to dataset
# x2 -> pointer to scratchpad
# x3 -> loop counter
# x4 -> "r0"
# x5 -> "r1"
# x6 -> "r2"
# x7 -> "r3"
# x8 -> temporary
# x9 -> mx, ma
# x10 -> spMix1
# x11 -> spMix2
# x12 -> "r4"
# x13 -> "r5"
# x14 -> "r6"
# x15 -> "r7"
# x16 -> ScratchpadL1Mask64
# x17 -> ScratchpadL2Mask64
# x18 -> ScratchpadL3Mask64
# x19 -> spAddr0
# x20 -> spAddr1
# x21 -> temporary
# x22 -> temporary
# x23 -> temporary
# x24 -> temporary
# x25 -> temporary
# x26 -> ScratchpadL1Mask8
# x27 -> ScratchpadL2Mask8
# x28 -> ScratchpadL3Mask8
# x29 -> CacheLineAlignMask
# v0-v7 -> temporary
# v8-v15 -> not used
# v16 -> "f0"
# v17 -> "f1"
# v18 -> "f2"
# v19 -> "f3"
# v20 -> "e0"
# v21 -> "e1"
# v22 -> "e2"
# v23 -> "e3"
# v24 -> "a0"
# v25 -> "a1"
# v26 -> "a2"
# v27 -> "a3"
# v28 -> temporary
# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
# v30 -> E 'or' mask = 0x3*00000000******3*00000000******
# v31 -> scale mask = 0x81f000000000000081f0000000000000
randomx_program_aarch64:
# Save callee-saved registers
sub sp, sp, 128
stp x16, x17, [sp]
stp x18, x19, [sp, 16]
stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64]
stp x26, x27, [sp, 80]
stp x28, x29, [sp, 96]
stp x8, x30, [sp, 112]
# Zero integer registers
mov x4, xzr
mov x5, xzr
mov x6, xzr
mov x7, xzr
mov x12, xzr
mov x13, xzr
mov x14, xzr
mov x15, xzr
# Load ma, mx and dataset pointer
ldp x9, x1, [x1]
# Load initial spMix value
mov x10, x9
# Load Scratchpad masks
mov x16, 16384 - 64
mov x17, 262144 - 64
mov x18, 2097152 - 64
mov x26, 16384 - 8
mov x27, 262144 - 8
mov x28, 2097152 - 8
# Load CacheLineAlignMask
mov x29, 0x7FFFFFC0
# Load group A registers
ldp q24, q25, [x0, 192]
ldp q26, q27, [x0, 224]
# Load E 'and' mask
mov x21, 0x00FFFFFFFFFFFFFF
ins v29.d[0], x21
ins v29.d[1], x21
# Load E 'or' mask (stored in reg.f[0])
ldr q30, [x0, 64]
# Load scale mask
mov x21, 0x81f0000000000000
ins v31.d[0], x21
ins v31.d[1], x21
main_loop:
# spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x21, x10, 32
and w19, w10, w18
and w20, w21, w18
# x19 = scratchpad + spAddr0
# x20 = scratchpad + spAddr1
add x19, x19, x2
add x20, x20, x2
# xor integer registers with scratchpad data (spAddr0)
ldp x21, x22, [x19]
ldp x23, x24, [x19, 16]
eor x4, x4, x21
eor x5, x5, x22
eor x6, x6, x23
eor x7, x7, x24
ldp x21, x22, [x19, 32]
ldp x23, x24, [x19, 48]
eor x12, x12, x21
eor x13, x13, x22
eor x14, x14, x23
eor x15, x15, x24
# Load group F registers (spAddr1)
ldpsw x21, x22, [x20]
ldpsw x23, x24, [x20, 8]
ins v16.d[0], x21
ins v16.d[1], x22
ins v17.d[0], x23
ins v17.d[1], x24
ldpsw x21, x22, [x20, 16]
ldpsw x23, x24, [x20, 24]
ins v18.d[0], x21
ins v18.d[1], x22
ins v19.d[0], x23
ins v19.d[1], x24
scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d
scvtf v18.2d, v18.2d
scvtf v19.2d, v19.2d
# Load group E registers (spAddr1)
ldpsw x21, x22, [x20, 32]
ldpsw x23, x24, [x20, 40]
ins v20.d[0], x21
ins v20.d[1], x22
ins v21.d[0], x23
ins v21.d[1], x24
ldpsw x21, x22, [x20, 48]
ldpsw x23, x24, [x20, 56]
ins v22.d[0], x21
ins v22.d[1], x22
ins v23.d[0], x23
ins v23.d[1], x24
scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d
scvtf v22.2d, v22.2d
scvtf v23.2d, v23.2d
and v20.16b, v20.16b, v29.16b
and v21.16b, v21.16b, v29.16b
and v22.16b, v22.16b, v29.16b
and v23.16b, v23.16b, v29.16b
orr v20.16b, v20.16b, v30.16b
orr v21.16b, v21.16b, v30.16b
orr v22.16b, v22.16b, v30.16b
orr v23.16b, v23.16b, v30.16b
# Execute VM instructions
randomx_program_aarch64_vm_instructions:
# 16 KB buffer for generated instructions
.fill 4096,4,0
randomx_program_aarch64_vm_instructions_end:
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x11
# Calculate dataset pointer for dataset prefetch
mov w25, w9
and x25, x25, x29
add x25, x25, x1
# Prefetch dataset data
prfm pldl2strm, [x25]
# mx <-> ma
ror x9, x9, 32
# Calculate dataset pointer for dataset read
mov w25, w9
and x25, x25, x29
add x25, x25, x1
# xor integer registers with dataset data
ldp x21, x22, [x25]
ldp x23, x24, [x25, 16]
eor x4, x4, x21
eor x5, x5, x22
eor x6, x6, x23
eor x7, x7, x24
ldp x21, x22, [x25, 32]
ldp x23, x24, [x25, 48]
eor x12, x12, x21
eor x13, x13, x22
eor x14, x14, x23
eor x15, x15, x24
randomx_program_aarch64_update_spMix1:
eor x10, x0, x0
# Store integer registers to scratchpad (spAddr1)
stp x4, x5, [x20, 0]
stp x6, x7, [x20, 16]
stp x12, x13, [x20, 32]
stp x14, x15, [x20, 48]
# xor group F and group E registers
eor v16.16b, v16.16b, v20.16b
eor v17.16b, v17.16b, v21.16b
eor v18.16b, v18.16b, v22.16b
eor v19.16b, v19.16b, v23.16b
# Store FP registers to scratchpad (spAddr0)
stp q16, q17, [x19, 0]
stp q18, q19, [x19, 32]
subs x3, x3, 1
bne main_loop
# Store integer registers
stp x4, x5, [x0, 0]
stp x6, x7, [x0, 16]
stp x12, x13, [x0, 32]
stp x14, x15, [x0, 48]
# Store FP registers
stp q16, q17, [x0, 64]
stp q18, q19, [x0, 96]
stp q20, q21, [x0, 128]
stp q22, q23, [x0, 160]
# Restore callee-saved registers
ldp x16, x17, [sp]
ldp x18, x19, [sp, 16]
ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64]
ldp x26, x27, [sp, 80]
ldp x28, x29, [sp, 96]
ldp x8, x30, [sp, 112]
add sp, sp, 128
ret
randomx_program_aarch64_end:

@ -0,0 +1,37 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
extern "C" {
void randomx_program_aarch64(void* reg, void* mem, void* scratchpad, uint64_t iterations);
void randomx_program_aarch64_vm_instructions();
void randomx_program_aarch64_vm_instructions_end();
void randomx_program_aarch64_update_spMix1();
void randomx_program_aarch64_end();
}

@ -159,7 +159,6 @@ int main(int argc, char** argv) {
}
std::atomic<uint32_t> atomicNonce(0);
AtomicHash result;
std::vector<randomx_vm*> vms;
std::vector<std::thread> threads;
randomx_dataset* dataset;
@ -231,22 +230,42 @@ int main(int argc, char** argv) {
if (dataset == nullptr) {
throw DatasetAllocException();
}
uint32_t datasetItemCount = randomx_dataset_item_count();
if (initThreadCount > 1) {
auto perThread = datasetItemCount / initThreadCount;
auto remainder = datasetItemCount % initThreadCount;
uint32_t startItem = 0;
for (int i = 0; i < initThreadCount; ++i) {
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count));
startItem += count;
char* dataset_memory = reinterpret_cast<char*>(randomx_get_dataset_memory(dataset));
bool read_ok = false;
FILE* fp = fopen("dataset.bin", "rb");
if (fp)
{
read_ok = (fread(dataset_memory, 1, randomx::DatasetSize, fp) == randomx::DatasetSize);
fclose(fp);
}
if (!read_ok) {
uint32_t datasetItemCount = randomx_dataset_item_count();
if (initThreadCount > 1) {
auto perThread = datasetItemCount / initThreadCount;
auto remainder = datasetItemCount % initThreadCount;
uint32_t startItem = 0;
for (int i = 0; i < initThreadCount; ++i) {
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count));
startItem += count;
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
else {
randomx_init_dataset(dataset, cache, 0, datasetItemCount);
}
fp = fopen("dataset.bin", "wb");
if (fp)
{
fwrite(dataset_memory, 1, randomx::DatasetSize, fp);
fclose(fp);
}
}
else {
randomx_init_dataset(dataset, cache, 0, datasetItemCount);
}
randomx_release_cache(cache);
cache = nullptr;
@ -267,7 +286,10 @@ int main(int argc, char** argv) {
}
vms.push_back(vm);
}
std::cout << "Running benchmark (" << noncesCount << " nonces) ..." << std::endl;
for (int iter = 0; iter < 100; ++iter) {
std::cout << "Running benchmark (" << noncesCount << " nonces, iteration " << iter << ") ..." << std::endl;
atomicNonce = 0;
AtomicHash result;
sw.restart();
if (threadCount > 1) {
for (unsigned i = 0; i < vms.size(); ++i) {
@ -282,18 +304,13 @@ int main(int argc, char** argv) {
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
threads.clear();
}
else {
mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0);
}
double elapsed = sw.getElapsed();
for (unsigned i = 0; i < vms.size(); ++i)
randomx_destroy_vm(vms[i]);
if (miningMode)
randomx_release_dataset(dataset);
else
randomx_release_cache(cache);
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0)
@ -304,6 +321,13 @@ int main(int argc, char** argv) {
else {
std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl;
}
}
for (unsigned i = 0; i < vms.size(); ++i)
randomx_destroy_vm(vms[i]);
if (miningMode)
randomx_release_dataset(dataset);
else
randomx_release_cache(cache);
}
catch (MemoryException& e) {
std::cout << "ERROR: " << e.what() << std::endl;

@ -63,6 +63,9 @@ namespace randomx {
template<class Allocator, bool softAes, bool secureJit>
void CompiledVm<Allocator, softAes, secureJit>::execute() {
#ifdef __aarch64__
memcpy(reg.f, config.eMask, sizeof(config.eMask));
#endif
compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS);
}

Loading…
Cancel
Save