Added IADD_M instruction

armv8-a-jit
SChernykh 5 years ago
parent 87bb70b4da
commit 3a1c58946d

@ -83,7 +83,7 @@ Total sum of frequencies must be 256
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_M 0
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 0
#define RANDOMX_FREQ_ISUB_M 0
#define RANDOMX_FREQ_IMUL_R 0
@ -119,7 +119,7 @@ Total sum of frequencies must be 256
#define RANDOMX_FREQ_ISTORE 0
//No-op instruction
#define RANDOMX_FREQ_NOP 240
#define RANDOMX_FREQ_NOP 233
/* ------
256
*/

@ -66,7 +66,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define REP64(x) REP32(x) REP32(x)
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
#define REP240(x) REP128(x) REP64(x) REP32(x) REP16(x)
#define REP233(x) REP128(x) REP64(x) REP32(x) REP9(x)
#define REP256(x) REP128(x) REP128(x)
#define REPNX(x,N) REP##N(x)
#define REPN(x,N) REPNX(x,N)

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
All rights reserved.
@ -32,13 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace ARMV8A {
constexpr uint32_t B = 0x14000000;
constexpr uint32_t EOR = 0xCA000000;
constexpr uint32_t EOR32 = 0x4A000000;
constexpr uint32_t ADD = 0x8B000000;
constexpr uint32_t MOVZ = 0xD2800000;
constexpr uint32_t MOVN = 0x92800000;
constexpr uint32_t MOVK = 0xF2800000;
constexpr uint32_t B = 0x14000000;
constexpr uint32_t EOR = 0xCA000000;
constexpr uint32_t EOR32 = 0x4A000000;
constexpr uint32_t ADD = 0x8B000000;
constexpr uint32_t MOVZ = 0xD2800000;
constexpr uint32_t MOVN = 0x92800000;
constexpr uint32_t MOVK = 0xF2800000;
constexpr uint32_t ADD_IMM_LO = 0x91000000;
constexpr uint32_t ADD_IMM_HI = 0x91400000;
}
@ -49,6 +52,8 @@ static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instruc
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
template<typename T> static constexpr size_t Log2(T value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
JitCompilerA64::JitCompilerA64()
: code((uint8_t*) allocMemoryPages(CodeSize))
{
@ -101,6 +106,70 @@ size_t JitCompilerA64::getCodeSize()
return CodeSize;
}
void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos)
{
uint32_t k = codePos;
if (imm < (1 << 16))
{
// movz tmp_reg, imm32 (16 low bits)
emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k);
}
else
{
if (static_cast<int32_t>(imm) < 0)
{
// movn tmp_reg, ~imm32 (16 high bits)
emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k);
}
else
{
// movz tmp_reg, imm32 (16 high bits)
emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k);
}
// movk tmp_reg, imm32 (16 low bits)
emit32(ARMV8A::MOVK | dst | ((imm & 0xFFFF) << 5), code, k);
}
codePos = k;
}
void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos)
{
uint32_t k = codePos;
if (imm < (1 << 24))
{
const uint32_t imm_lo = imm & ((1 << 12) - 1);
const uint32_t imm_hi = imm >> 12;
if (imm_lo && imm_hi)
{
emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k);
emit32(ARMV8A::ADD_IMM_HI | dst | (dst << 5) | (imm_hi << 10), code, k);
}
else if (imm_lo)
{
emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k);
}
else
{
emit32(ARMV8A::ADD_IMM_HI | dst | (src << 5) | (imm_hi << 10), code, k);
}
}
else
{
constexpr uint32_t tmp_reg = 21;
emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg
emit32(ARMV8A::ADD | dst | (src << 5) | (tmp_reg << 16), code, k);
}
codePos = k;
}
void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos)
{
uint32_t k = codePos;
@ -113,23 +182,46 @@ void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos)
emit32(ARMV8A::ADD | dst | (dst << 5) | (shift << 10) | (src << 16), code, k);
if (instr.dst == RegisterNeedsDisplacement)
emitAddImmediate(dst, dst, instr.getImm32(), code, k);
codePos = k;
}
void JitCompilerA64::h_IADD_M(Instruction& instr, int i, uint32_t& codePos)
{
uint32_t k = codePos;
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 21;
if (src != dst)
{
const uint32_t imm32 = instr.getImm32();
if (static_cast<int32_t>(imm32) < 0)
{
// movn x21, ~imm32 (16 high bits)
emit32(ARMV8A::MOVN | 21 | (1 << 21) | ((~imm32 >> 16) << 5), code, k);
}
else
{
// movz x21, imm32 (16 high bits)
emit32(ARMV8A::MOVZ | 21 | (1 << 21) | ((imm32 >> 16) << 5), code, k);
}
// movk x21, imm32 (16 low bits)
emit32(ARMV8A::MOVK | 21 | ((imm32 & 0xFFFF) << 5), code, k);
// add dst, x21
emit32(ARMV8A::ADD | dst | (dst << 5) | (21 << 16), code, k);
imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
emitAddImmediate(tmp_reg, src, imm, code, k);
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10);
constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10);
emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
// ldr tmp_reg, [x2, tmp_reg]
emit32(0xf8606840 | tmp_reg | (tmp_reg << 16), code, k);
}
else
{
imm = (imm & ScratchpadL3Mask) >> 3;
emitMovImmediate(tmp_reg, imm, code, k);
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
}
// add dst, dst, tmp_reg
emit32(ARMV8A::ADD | dst | (dst << 5) | (tmp_reg << 16), code, k);
codePos = k;
}

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
All rights reserved.
@ -85,7 +86,11 @@ namespace randomx {
codePos += sizeof(val);
}
void emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos);
void emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos);
void h_IADD_RS(Instruction&, int, uint32_t&);
void h_IADD_M(Instruction&, int, uint32_t&);
void h_NOP(Instruction&, int, uint32_t&);
};
}

@ -1,3 +1,30 @@
# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
# Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holder nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.arch armv8-a
.text
.global randomx_program_aarch64

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
All rights reserved.

Loading…
Cancel
Save