diff --git a/src/jit_compiler_a64.cpp b/src/jit_compiler_a64.cpp index 07a8114..67ad3bd 100644 --- a/src/jit_compiler_a64.cpp +++ b/src/jit_compiler_a64.cpp @@ -860,4 +860,10 @@ void JitCompilerA64::h_NOP(Instruction& instr, uint32_t& codePos) INST_HANDLE(NOP) }; +void JitCompilerA64::initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) +{ + //for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize) + // initDatasetItem(cache, dataset, itemNumber); +} + } diff --git a/src/jit_compiler_a64.hpp b/src/jit_compiler_a64.hpp index 9c6fe11..7a4913a 100644 --- a/src/jit_compiler_a64.hpp +++ b/src/jit_compiler_a64.hpp @@ -51,21 +51,18 @@ namespace randomx { void generateProgram(Program&, ProgramConfiguration&); - void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { - - } + void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {} + template - void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &) { + void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &) {} - } - void generateDatasetInitCode() { + void generateDatasetInitCode() {} - } ProgramFunc* getProgramFunc() { return reinterpret_cast(code); } DatasetInitFunc* getDatasetInitFunc() { - return nullptr; + return initDataset; } uint8_t* getCode() { return code; @@ -128,5 +125,7 @@ namespace randomx { void h_CFROUND(Instruction&, uint32_t&); void h_ISTORE(Instruction&, uint32_t&); void h_NOP(Instruction&, uint32_t&); + + static void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); }; } diff --git a/src/jit_compiler_a64_static.S b/src/jit_compiler_a64_static.S index 8ce200a..a4ac496 100644 --- a/src/jit_compiler_a64_static.S +++ b/src/jit_compiler_a64_static.S @@ -36,6 +36,11 @@ .global randomx_program_aarch64_cacheline_align_mask2 .global randomx_program_aarch64_update_spMix1 .global randomx_program_aarch64_end + .global randomx_calc_dataset_item_aarch64 + .global randomx_calc_dataset_item_aarch64_prefetch + .global randomx_calc_dataset_item_aarch64_mix + .global randomx_calc_dataset_item_aarch64_store_result + .global randomx_calc_dataset_item_aarch64_end # Register allocation @@ -381,3 +386,118 @@ randomx_program_aarch64_update_spMix1: ret randomx_program_aarch64_end: + +# Input parameters +# +# x0 -> pointer to cache memory +# x1 -> pointer to output +# x2 -> item number +# +# Register allocation +# +# x0-x7 -> output value (calculated dataset item) +# x8 -> pointer to cache memory +# x9 -> pointer to output +# x10 -> registerValue +# x11 -> mixBlock +# x12 -> temporary +# x13 -> temporary + +randomx_calc_dataset_item_aarch64: + sub sp, sp, 112 + stp x0, x1, [sp] + stp x2, x3, [sp, 16] + stp x4, x5, [sp, 32] + stp x6, x7, [sp, 48] + stp x8, x9, [sp, 64] + stp x10, x11, [sp, 80] + stp x12, x13, [sp, 96] + + mov x8, x0 + mov x9, x1 + mov x10, x2 + + # rl[0] = (itemNumber + 1) * superscalarMul0; + ldr x12, superscalarMul0 + madd x0, x1, x12, x12 + + # rl[1] = rl[0] ^ superscalarAdd1; + ldr x12, superscalarAdd1 + eor x1, x20, x12 + + # rl[2] = rl[0] ^ superscalarAdd2; + ldr x12, superscalarAdd2 + eor x2, x20, x12 + + # rl[3] = rl[0] ^ superscalarAdd3; + ldr x12, superscalarAdd3 + eor x3, x20, x12 + + # rl[4] = rl[0] ^ superscalarAdd4; + ldr x12, superscalarAdd4 + eor x4, x20, x12 + + # rl[5] = rl[0] ^ superscalarAdd5; + ldr x12, superscalarAdd5 + eor x5, x20, x12 + + # rl[6] = rl[0] ^ superscalarAdd6; + ldr x12, superscalarAdd6 + eor x6, x20, x12 + + # rl[7] = rl[0] ^ superscalarAdd7; + ldr x12, superscalarAdd7 + eor x7, x20, x12 + + b randomx_calc_dataset_item_aarch64_prefetch + +superscalarMul0: .fill 1,8,6364136223846793005 +superscalarAdd1: .fill 1,8,9298411001130361340 +superscalarAdd2: .fill 1,8,12065312585734608966 +superscalarAdd3: .fill 1,8,9306329213124626780 +superscalarAdd4: .fill 1,8,5281919268842080866 +superscalarAdd5: .fill 1,8,10536153434571861004 +superscalarAdd6: .fill 1,8,3398623926847679864 +superscalarAdd7: .fill 1,8,9549104520008361294 + +# Prefetch -> SuperScalar hash -> Mix will be repeated N times + +randomx_calc_dataset_item_aarch64_prefetch: + and x11, x10, 4194303 + add x11, x8, x11, lsl 6 + prfm pldl2strm, [x11] + + # Generated SuperScalar hash program goes here + +randomx_calc_dataset_item_aarch64_mix: + ldp x12, x13, [x11] + eor x0, x0, x12 + eor x1, x1, x13 + ldp x12, x13, [x11, 16] + eor x2, x2, x12 + eor x3, x3, x13 + ldp x12, x13, [x11, 32] + eor x4, x4, x12 + eor x5, x5, x13 + ldp x12, x13, [x11, 48] + eor x6, x6, x12 + eor x7, x7, x13 + +randomx_calc_dataset_item_aarch64_store_result: + stp x0, x1, [x9] + stp x2, x3, [x9, 16] + stp x4, x5, [x9, 32] + stp x6, x7, [x9, 48] + + ldp x0, x1, [sp] + ldp x2, x3, [sp, 16] + ldp x4, x5, [sp, 32] + ldp x6, x7, [sp, 48] + ldp x8, x9, [sp, 64] + ldp x10, x11, [sp, 80] + ldp x12, x13, [sp, 96] + add sp, sp, 112 + + ret + +randomx_calc_dataset_item_aarch64_end: diff --git a/src/jit_compiler_a64_static.hpp b/src/jit_compiler_a64_static.hpp index 0065ffd..556fa2f 100644 --- a/src/jit_compiler_a64_static.hpp +++ b/src/jit_compiler_a64_static.hpp @@ -39,4 +39,9 @@ extern "C" { void randomx_program_aarch64_cacheline_align_mask2(); void randomx_program_aarch64_update_spMix1(); void randomx_program_aarch64_end(); + void randomx_calc_dataset_item_aarch64(); + void randomx_calc_dataset_item_aarch64_prefetch(); + void randomx_calc_dataset_item_aarch64_mix(); + void randomx_calc_dataset_item_aarch64_store_result(); + void randomx_calc_dataset_item_aarch64_end(); }