Added light mode

armv8-a-jit
SChernykh 5 years ago
parent cb43ef96e2
commit 7fb5cfdfce

@ -68,7 +68,6 @@ static const size_t CodeSize = ((uint8_t*)randomx_init_dataset_aarch64_end) - ((
static const size_t MainLoopBegin = ((uint8_t*)randomx_program_aarch64_main_loop) - ((uint8_t*)randomx_program_aarch64);
static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instructions) - ((uint8_t*)randomx_program_aarch64);
static const size_t ImulRcpLiteralsEnd = ((uint8_t*)randomx_program_aarch64_imul_rcp_literals_end) - ((uint8_t*)randomx_program_aarch64);
static const size_t InstructionsEnd = ((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64);
static const size_t CalcDatasetItemSize =
(((uint8_t*)randomx_calc_dataset_item_aarch64_end) - ((uint8_t*)randomx_calc_dataset_item_aarch64)) +
@ -150,6 +149,63 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
#endif
}
void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset)
{
uint32_t codePos = MainLoopBegin + 4;
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
num32bitLiterals = 0;
for (uint32_t i = 0; i < RegistersCount; ++i)
reg_changed_offset[i] = codePos;
for (uint32_t i = 0; i < program.getSize(); ++i)
{
Instruction& instr = program(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
(this->*engine[instr.opcode])(instr, codePos);
}
// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w2, w9, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_light_cacheline_align_mask) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 2 | (9 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
// Update spMix1
// eor x10, config.readReg0, config.readReg1
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
// Apply dataset offset
codePos = ((uint8_t*)randomx_program_aarch64_light_dataset_offset) - ((uint8_t*)randomx_program_aarch64);
datasetOffset /= CacheLineSize;
const uint32_t imm_lo = datasetOffset & ((1 << 12) - 1);
const uint32_t imm_hi = datasetOffset >> 12;
emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos);
emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
#endif
}
template<size_t N>
void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache)
{

@ -50,8 +50,7 @@ namespace randomx {
~JitCompilerA64();
void generateProgram(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {}
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
template<size_t N>
void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &);

@ -35,6 +35,9 @@
.global randomx_program_aarch64_cacheline_align_mask1
.global randomx_program_aarch64_cacheline_align_mask2
.global randomx_program_aarch64_update_spMix1
.global randomx_program_aarch64_vm_instructions_end_light
.global randomx_program_aarch64_light_cacheline_align_mask
.global randomx_program_aarch64_light_dataset_offset
.global randomx_init_dataset_aarch64
.global randomx_init_dataset_aarch64_end
.global randomx_calc_dataset_item_aarch64
@ -318,6 +321,7 @@ randomx_program_aarch64_cacheline_align_mask2:
and x10, x10, 1
add x10, x10, x1
randomx_program_aarch64_xor_with_dataset_line:
# xor integer registers with dataset data
ldp x18, x19, [x10]
eor x4, x4, x18
@ -386,6 +390,46 @@ randomx_program_aarch64_update_spMix1:
ret
randomx_program_aarch64_vm_instructions_end_light:
sub sp, sp, 96
stp x0, x1, [sp, 64]
stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
# mx <-> ma
ror x9, x9, 32
# x0 -> pointer to cache memory
mov x0, x1
# x1 -> pointer to output
mov x1, sp
randomx_program_aarch64_light_cacheline_align_mask:
# Actual mask will be inserted by JIT compiler
and w2, w9, 1
# x2 -> item number
lsr x2, x2, 6
randomx_program_aarch64_light_dataset_offset:
# Apply dataset offset (filled in by JIT compiler)
add x2, x2, 0
add x2, x2, 0
bl randomx_calc_dataset_item_aarch64
mov x10, sp
ldp x0, x1, [sp, 64]
ldp x2, x30, [sp, 80]
add sp, sp, 96
b randomx_program_aarch64_xor_with_dataset_line
# Input parameters
#
# x0 -> pointer to cache

@ -38,6 +38,9 @@ extern "C" {
void randomx_program_aarch64_cacheline_align_mask1();
void randomx_program_aarch64_cacheline_align_mask2();
void randomx_program_aarch64_update_spMix1();
void randomx_program_aarch64_vm_instructions_end_light();
void randomx_program_aarch64_light_cacheline_align_mask();
void randomx_program_aarch64_light_dataset_offset();
void randomx_init_dataset_aarch64();
void randomx_init_dataset_aarch64_end();
void randomx_calc_dataset_item_aarch64();

@ -254,6 +254,7 @@ int main(int argc, char** argv) {
p += nBytes;
}
fclose(fp);
std::cout << "Dataset loaded in " << sw.getElapsed() << " s" << std::endl;
}
else
{
@ -261,6 +262,8 @@ int main(int argc, char** argv) {
}
if (!read_ok) {
Stopwatch dataset_initialization(true);
uint32_t datasetItemCount = randomx_dataset_item_count();
if (initThreadCount > 1) {
auto perThread = datasetItemCount / initThreadCount;
@ -278,6 +281,7 @@ int main(int argc, char** argv) {
else {
randomx_init_dataset(dataset, cache, 0, datasetItemCount);
}
std::cout << "Dataset initialized in " << dataset_initialization.getElapsed() << " s" << std::endl;
fp = fopen("dataset.bin", "wb");
if (fp)
@ -290,7 +294,6 @@ int main(int argc, char** argv) {
cache = nullptr;
threads.clear();
}
std::cout << "Memory initialized in " << sw.getElapsed() << " s" << std::endl;
std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl;
for (int i = 0; i < threadCount; ++i) {
randomx_vm *vm = randomx_create_vm(flags, cache, dataset);

Loading…
Cancel
Save