diff --git a/src/configuration.h b/src/configuration.h index 7c56232..59545fd 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -83,7 +83,7 @@ Total sum of frequencies must be 256 //Integer instructions #define RANDOMX_FREQ_IADD_RS 16 -#define RANDOMX_FREQ_IADD_M 0 +#define RANDOMX_FREQ_IADD_M 7 #define RANDOMX_FREQ_ISUB_R 0 #define RANDOMX_FREQ_ISUB_M 0 #define RANDOMX_FREQ_IMUL_R 0 @@ -119,7 +119,7 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_ISTORE 0 //No-op instruction -#define RANDOMX_FREQ_NOP 240 +#define RANDOMX_FREQ_NOP 233 /* ------ 256 */ diff --git a/src/instruction_weights.hpp b/src/instruction_weights.hpp index 5c97843..b6642e3 100644 --- a/src/instruction_weights.hpp +++ b/src/instruction_weights.hpp @@ -66,7 +66,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define REP64(x) REP32(x) REP32(x) #define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) #define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x) -#define REP240(x) REP128(x) REP64(x) REP32(x) REP16(x) +#define REP233(x) REP128(x) REP64(x) REP32(x) REP9(x) #define REP256(x) REP128(x) REP128(x) #define REPNX(x,N) REP##N(x) #define REPN(x,N) REPNX(x,N) diff --git a/src/jit_compiler_a64.cpp b/src/jit_compiler_a64.cpp index 0e48436..72f7bdc 100644 --- a/src/jit_compiler_a64.cpp +++ b/src/jit_compiler_a64.cpp @@ -1,5 +1,6 @@ /* Copyright (c) 2018-2019, tevador +Copyright (c) 2019, SChernykh All rights reserved. @@ -32,13 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace ARMV8A { -constexpr uint32_t B = 0x14000000; -constexpr uint32_t EOR = 0xCA000000; -constexpr uint32_t EOR32 = 0x4A000000; -constexpr uint32_t ADD = 0x8B000000; -constexpr uint32_t MOVZ = 0xD2800000; -constexpr uint32_t MOVN = 0x92800000; -constexpr uint32_t MOVK = 0xF2800000; +constexpr uint32_t B = 0x14000000; +constexpr uint32_t EOR = 0xCA000000; +constexpr uint32_t EOR32 = 0x4A000000; +constexpr uint32_t ADD = 0x8B000000; +constexpr uint32_t MOVZ = 0xD2800000; +constexpr uint32_t MOVN = 0x92800000; +constexpr uint32_t MOVK = 0xF2800000; +constexpr uint32_t ADD_IMM_LO = 0x91000000; +constexpr uint32_t ADD_IMM_HI = 0x91400000; } @@ -49,6 +52,8 @@ static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instruc constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; +template static constexpr size_t Log2(T value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; } + JitCompilerA64::JitCompilerA64() : code((uint8_t*) allocMemoryPages(CodeSize)) { @@ -101,6 +106,70 @@ size_t JitCompilerA64::getCodeSize() return CodeSize; } +void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos) +{ + uint32_t k = codePos; + + if (imm < (1 << 16)) + { + // movz tmp_reg, imm32 (16 low bits) + emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k); + } + else + { + if (static_cast(imm) < 0) + { + // movn tmp_reg, ~imm32 (16 high bits) + emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k); + } + else + { + // movz tmp_reg, imm32 (16 high bits) + emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k); + } + + // movk tmp_reg, imm32 (16 low bits) + emit32(ARMV8A::MOVK | dst | ((imm & 0xFFFF) << 5), code, k); + } + + codePos = k; +} + +void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos) +{ + uint32_t k = codePos; + + if (imm < (1 << 24)) + { + const uint32_t imm_lo = imm & ((1 << 12) - 1); + const uint32_t imm_hi = imm >> 12; + + if (imm_lo && imm_hi) + { + emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k); + emit32(ARMV8A::ADD_IMM_HI | dst | (dst << 5) | (imm_hi << 10), code, k); + } + else if (imm_lo) + { + emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k); + } + else + { + emit32(ARMV8A::ADD_IMM_HI | dst | (src << 5) | (imm_hi << 10), code, k); + } + } + else + { + constexpr uint32_t tmp_reg = 21; + emitMovImmediate(tmp_reg, imm, code, k); + + // add dst, src, tmp_reg + emit32(ARMV8A::ADD | dst | (src << 5) | (tmp_reg << 16), code, k); + } + + codePos = k; +} + void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos) { uint32_t k = codePos; @@ -113,23 +182,46 @@ void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos) emit32(ARMV8A::ADD | dst | (dst << 5) | (shift << 10) | (src << 16), code, k); if (instr.dst == RegisterNeedsDisplacement) + emitAddImmediate(dst, dst, instr.getImm32(), code, k); + + codePos = k; +} + +void JitCompilerA64::h_IADD_M(Instruction& instr, int i, uint32_t& codePos) +{ + uint32_t k = codePos; + + const uint32_t src = IntRegMap[instr.src]; + const uint32_t dst = IntRegMap[instr.dst]; + uint32_t imm = instr.getImm32(); + + constexpr uint32_t tmp_reg = 21; + + if (src != dst) { - const uint32_t imm32 = instr.getImm32(); - if (static_cast(imm32) < 0) - { - // movn x21, ~imm32 (16 high bits) - emit32(ARMV8A::MOVN | 21 | (1 << 21) | ((~imm32 >> 16) << 5), code, k); - } - else - { - // movz x21, imm32 (16 high bits) - emit32(ARMV8A::MOVZ | 21 | (1 << 21) | ((imm32 >> 16) << 5), code, k); - } - // movk x21, imm32 (16 low bits) - emit32(ARMV8A::MOVK | 21 | ((imm32 & 0xFFFF) << 5), code, k); - // add dst, x21 - emit32(ARMV8A::ADD | dst | (dst << 5) | (21 << 16), code, k); + imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1); + emitAddImmediate(tmp_reg, src, imm, code, k); + + constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5); + constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10); + constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10); + + emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k); + + // ldr tmp_reg, [x2, tmp_reg] + emit32(0xf8606840 | tmp_reg | (tmp_reg << 16), code, k); } + else + { + imm = (imm & ScratchpadL3Mask) >> 3; + emitMovImmediate(tmp_reg, imm, code, k); + + // ldr tmp_reg, [x2, tmp_reg, lsl 3] + emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k); + } + + // add dst, dst, tmp_reg + emit32(ARMV8A::ADD | dst | (dst << 5) | (tmp_reg << 16), code, k); codePos = k; } diff --git a/src/jit_compiler_a64.hpp b/src/jit_compiler_a64.hpp index 80a295e..2fedac3 100644 --- a/src/jit_compiler_a64.hpp +++ b/src/jit_compiler_a64.hpp @@ -1,5 +1,6 @@ /* Copyright (c) 2018-2019, tevador +Copyright (c) 2019, SChernykh All rights reserved. @@ -85,7 +86,11 @@ namespace randomx { codePos += sizeof(val); } + void emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos); + void emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos); + void h_IADD_RS(Instruction&, int, uint32_t&); + void h_IADD_M(Instruction&, int, uint32_t&); void h_NOP(Instruction&, int, uint32_t&); }; } diff --git a/src/jit_compiler_a64_static.S b/src/jit_compiler_a64_static.S index bf31440..904a149 100644 --- a/src/jit_compiler_a64_static.S +++ b/src/jit_compiler_a64_static.S @@ -1,3 +1,30 @@ +# Copyright (c) 2018-2019, tevador +# Copyright (c) 2019, SChernykh +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holder nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + .arch armv8-a .text .global randomx_program_aarch64 diff --git a/src/jit_compiler_a64_static.hpp b/src/jit_compiler_a64_static.hpp index 099e330..4f29a26 100644 --- a/src/jit_compiler_a64_static.hpp +++ b/src/jit_compiler_a64_static.hpp @@ -1,5 +1,6 @@ /* Copyright (c) 2018-2019, tevador +Copyright (c) 2019, SChernykh All rights reserved.