|
|
|
@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
|
|
|
|
// and w16, w10, ScratchpadL3Mask64
|
|
|
|
|
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
|
|
|
|
|
|
|
|
|
|
// and w17, w18, ScratchpadL3Mask64
|
|
|
|
|
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
|
|
|
|
|
// and w17, w20, ScratchpadL3Mask64
|
|
|
|
|
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
|
|
|
|
|
|
|
|
|
|
codePos = PrologueSize;
|
|
|
|
|
literalPos = ImulRcpLiteralsEnd;
|
|
|
|
@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update spMix2
|
|
|
|
|
// eor w18, config.readReg2, config.readReg3
|
|
|
|
|
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
|
|
|
|
// eor w20, config.readReg2, config.readReg3
|
|
|
|
|
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
|
|
|
|
|
|
|
|
|
// Jump back to the main loop
|
|
|
|
|
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
|
|
|
|
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
|
|
|
|
|
|
|
|
|
// and w18, w18, CacheLineAlignMask
|
|
|
|
|
// and w20, w20, CacheLineAlignMask
|
|
|
|
|
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
|
|
|
|
|
emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
|
|
|
|
|
emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
|
|
|
|
|
|
|
|
|
|
// and w10, w10, CacheLineAlignMask
|
|
|
|
|
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
|
|
|
|
@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
|
|
|
|
// and w16, w10, ScratchpadL3Mask64
|
|
|
|
|
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
|
|
|
|
|
|
|
|
|
|
// and w17, w18, ScratchpadL3Mask64
|
|
|
|
|
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
|
|
|
|
|
// and w17, w20, ScratchpadL3Mask64
|
|
|
|
|
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
|
|
|
|
|
|
|
|
|
|
codePos = PrologueSize;
|
|
|
|
|
literalPos = ImulRcpLiteralsEnd;
|
|
|
|
@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update spMix2
|
|
|
|
|
// eor w18, config.readReg2, config.readReg3
|
|
|
|
|
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
|
|
|
|
// eor w20, config.readReg2, config.readReg3
|
|
|
|
|
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
|
|
|
|
|
|
|
|
|
// Jump back to the main loop
|
|
|
|
|
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
|
|
|
@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMovImmediate(tmp_reg, imm, code, k);
|
|
|
|
|
|
|
|
|
|
// add dst, src, tmp_reg
|
|
|
|
@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
|
|
|
|
|
uint32_t k = codePos;
|
|
|
|
|
|
|
|
|
|
uint32_t imm = instr.getImm32();
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 19;
|
|
|
|
|
|
|
|
|
|
imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
|
|
|
|
|
emitAddImmediate(tmp_reg, src, imm, code, k);
|
|
|
|
@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
|
|
|
|
|
|
|
|
|
// add dst, dst, tmp_reg
|
|
|
|
@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
|
|
|
|
|
|
|
|
|
// sub dst, dst, tmp_reg
|
|
|
|
@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
if (src == dst)
|
|
|
|
|
{
|
|
|
|
|
src = 18;
|
|
|
|
|
src = 20;
|
|
|
|
|
emitMovImmediate(src, instr.getImm32(), code, k);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
|
|
|
|
|
|
|
|
|
// sub dst, dst, tmp_reg
|
|
|
|
@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
|
|
|
|
|
|
|
|
|
// umulh dst, dst, tmp_reg
|
|
|
|
@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
|
|
|
|
|
|
|
|
|
// smulh dst, dst, tmp_reg
|
|
|
|
@ -692,7 +692,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
uint32_t k = codePos;
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint64_t N = 1ULL << 63;
|
|
|
|
@ -711,9 +711,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
literalPos -= sizeof(uint64_t);
|
|
|
|
|
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
|
|
|
|
|
|
|
|
|
|
if (literal_id < 13)
|
|
|
|
|
if (literal_id < 12)
|
|
|
|
|
{
|
|
|
|
|
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
|
|
|
|
|
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
|
|
|
|
|
|
|
|
|
|
// mul dst, dst, literal_reg
|
|
|
|
|
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
|
|
|
|
@ -751,7 +751,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
if (src == dst)
|
|
|
|
|
{
|
|
|
|
|
src = 18;
|
|
|
|
|
src = 20;
|
|
|
|
|
emitMovImmediate(src, instr.getImm32(), code, k);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -769,7 +769,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
|
|
|
|
|
|
|
|
|
// eor dst, dst, tmp_reg
|
|
|
|
@ -807,7 +807,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
if (src != dst)
|
|
|
|
|
{
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
|
|
|
|
|
// sub tmp_reg, xzr, src
|
|
|
|
|
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
|
|
|
|
@ -835,7 +835,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
uint32_t k = codePos;
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
|
|
|
|
|
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
|
|
|
|
|
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
|
|
|
|
@ -984,7 +984,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
constexpr uint32_t fpcr_tmp_reg = 8;
|
|
|
|
|
|
|
|
|
|
// ror tmp_reg, src, imm
|
|
|
|
@ -1008,7 +1008,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
|
|
|
|
|
|
|
|
|
|
const uint32_t src = IntRegMap[instr.src];
|
|
|
|
|
const uint32_t dst = IntRegMap[instr.dst];
|
|
|
|
|
constexpr uint32_t tmp_reg = 18;
|
|
|
|
|
constexpr uint32_t tmp_reg = 20;
|
|
|
|
|
|
|
|
|
|
uint32_t imm = instr.getImm32();
|
|
|
|
|
|
|
|
|
|