Added IADD_M instruction

5 years ago · 3a1c58946d
parent 87bb70b4da
commit 3a1c58946d
6 changed files with 150 additions and 25 deletions
--- a/src/configuration.h
+++ b/src/configuration.h
@ -83,7 +83,7 @@ Total sum of frequencies must be 256

 //Integer instructions
 #define RANDOMX_FREQ_IADD_RS       16
-#define RANDOMX_FREQ_IADD_M         0
+#define RANDOMX_FREQ_IADD_M         7
 #define RANDOMX_FREQ_ISUB_R         0
 #define RANDOMX_FREQ_ISUB_M         0
 #define RANDOMX_FREQ_IMUL_R         0
@ -119,7 +119,7 @@ Total sum of frequencies must be 256
 #define RANDOMX_FREQ_ISTORE         0

 //No-op instruction
-#define RANDOMX_FREQ_NOP          240
+#define RANDOMX_FREQ_NOP          233
 /*                               ------
                                  256
 */
--- a/src/instruction_weights.hpp
+++ b/src/instruction_weights.hpp
@ -66,7 +66,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define REP64(x) REP32(x) REP32(x)
 #define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
 #define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
-#define REP240(x) REP128(x) REP64(x) REP32(x) REP16(x)
+#define REP233(x) REP128(x) REP64(x) REP32(x) REP9(x)
 #define REP256(x) REP128(x) REP128(x)
 #define REPNX(x,N) REP##N(x)
 #define REPN(x,N) REPNX(x,N)
--- a/src/jit_compiler_a64.cpp
+++ b/src/jit_compiler_a64.cpp
@ -1,5 +1,6 @@
 /*
 Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>

 All rights reserved.

@ -32,13 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 namespace ARMV8A {

-constexpr uint32_t B     = 0x14000000;
-constexpr uint32_t EOR   = 0xCA000000;
-constexpr uint32_t EOR32 = 0x4A000000;
-constexpr uint32_t ADD   = 0x8B000000;
-constexpr uint32_t MOVZ  = 0xD2800000;
-constexpr uint32_t MOVN  = 0x92800000;
-constexpr uint32_t MOVK  = 0xF2800000;
+constexpr uint32_t B          = 0x14000000;
+constexpr uint32_t EOR        = 0xCA000000;
+constexpr uint32_t EOR32      = 0x4A000000;
+constexpr uint32_t ADD        = 0x8B000000;
+constexpr uint32_t MOVZ       = 0xD2800000;
+constexpr uint32_t MOVN       = 0x92800000;
+constexpr uint32_t MOVK       = 0xF2800000;
+constexpr uint32_t ADD_IMM_LO = 0x91000000;
+constexpr uint32_t ADD_IMM_HI = 0x91400000;

 }

@ -49,6 +52,8 @@ static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instruc

 constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };

+template<typename T> static constexpr size_t Log2(T value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
+
 JitCompilerA64::JitCompilerA64()
 	: code((uint8_t*) allocMemoryPages(CodeSize))
 {
@ -101,6 +106,70 @@ size_t JitCompilerA64::getCodeSize()
 	return CodeSize;
 }

+void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos)
+{
+	uint32_t k = codePos;
+
+	if (imm < (1 << 16))
+	{
+		// movz tmp_reg, imm32 (16 low bits)
+		emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k);
+	}
+	else
+	{
+		if (static_cast<int32_t>(imm) < 0)
+		{
+			// movn tmp_reg, ~imm32 (16 high bits)
+			emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k);
+		}
+		else
+		{
+			// movz tmp_reg, imm32 (16 high bits)
+			emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k);
+		}
+
+		// movk tmp_reg, imm32 (16 low bits)
+		emit32(ARMV8A::MOVK | dst | ((imm & 0xFFFF) << 5), code, k);
+	}
+
+	codePos = k;
+}
+
+void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos)
+{
+	uint32_t k = codePos;
+
+	if (imm < (1 << 24))
+	{
+		const uint32_t imm_lo = imm & ((1 << 12) - 1);
+		const uint32_t imm_hi = imm >> 12;
+
+		if (imm_lo && imm_hi)
+		{
+			emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k);
+			emit32(ARMV8A::ADD_IMM_HI | dst | (dst << 5) | (imm_hi << 10), code, k);
+		}
+		else if (imm_lo)
+		{
+			emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k);
+		}
+		else
+		{
+			emit32(ARMV8A::ADD_IMM_HI | dst | (src << 5) | (imm_hi << 10), code, k);
+		}
+	}
+	else
+	{
+		constexpr uint32_t tmp_reg = 21;
+		emitMovImmediate(tmp_reg, imm, code, k);
+
+		// add dst, src, tmp_reg
+		emit32(ARMV8A::ADD | dst | (src << 5) | (tmp_reg << 16), code, k);
+	}
+
+	codePos = k;
+}
+
 void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos)
 {
 	uint32_t k = codePos;
@ -113,23 +182,46 @@ void JitCompilerA64::h_IADD_RS(Instruction& instr, int i, uint32_t& codePos)
 	emit32(ARMV8A::ADD | dst | (dst << 5) | (shift << 10) | (src << 16), code, k);

 	if (instr.dst == RegisterNeedsDisplacement)
+		emitAddImmediate(dst, dst, instr.getImm32(), code, k);
+
+	codePos = k;
+}
+
+void JitCompilerA64::h_IADD_M(Instruction& instr, int i, uint32_t& codePos)
+{
+	uint32_t k = codePos;
+
+	const uint32_t src = IntRegMap[instr.src];
+	const uint32_t dst = IntRegMap[instr.dst];
+	uint32_t imm = instr.getImm32();
+
+	constexpr uint32_t tmp_reg = 21;
+
+	if (src != dst)
 	{
-		const uint32_t imm32 = instr.getImm32();
-		if (static_cast<int32_t>(imm32) < 0)
-		{
-			// movn x21, ~imm32 (16 high bits)
-			emit32(ARMV8A::MOVN | 21 | (1 << 21) | ((~imm32 >> 16) << 5), code, k);
-		}
-		else
-		{
-			// movz x21, imm32 (16 high bits)
-			emit32(ARMV8A::MOVZ | 21 | (1 << 21) | ((imm32 >> 16) << 5), code, k);
-		}
-		// movk x21, imm32 (16 low bits)
-		emit32(ARMV8A::MOVK | 21 | ((imm32 & 0xFFFF) << 5), code, k);
-		// add dst, x21
-		emit32(ARMV8A::ADD | dst | (dst << 5) | (21 << 16), code, k);
+		imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
+		emitAddImmediate(tmp_reg, src, imm, code, k);
+
+		constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+		constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10);
+		constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10);
+
+		emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
+
+		// ldr tmp_reg, [x2, tmp_reg]
+		emit32(0xf8606840 | tmp_reg | (tmp_reg << 16), code, k);
 	}
+	else
+	{
+		imm = (imm & ScratchpadL3Mask) >> 3;
+		emitMovImmediate(tmp_reg, imm, code, k);
+
+		// ldr tmp_reg, [x2, tmp_reg, lsl 3]
+		emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
+	}
+
+	// add dst, dst, tmp_reg
+	emit32(ARMV8A::ADD | dst | (dst << 5) | (tmp_reg << 16), code, k);

 	codePos = k;
 }
--- a/src/jit_compiler_a64.hpp
+++ b/src/jit_compiler_a64.hpp
@ -1,5 +1,6 @@
 /*
 Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>

 All rights reserved.

@ -85,7 +86,11 @@ namespace randomx {
 			codePos += sizeof(val);
 		}

+		void emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos);
+		void emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos);
+
 		void h_IADD_RS(Instruction&, int, uint32_t&);
+		void h_IADD_M(Instruction&, int, uint32_t&);
 		void h_NOP(Instruction&, int, uint32_t&);
 	};
 }
--- a/src/jit_compiler_a64_static.S
+++ b/src/jit_compiler_a64_static.S
@ -1,3 +1,30 @@
+# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+# Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 	* Redistributions of source code must retain the above copyright
+# 	  notice, this list of conditions and the following disclaimer.
+# 	* Redistributions in binary form must reproduce the above copyright
+# 	  notice, this list of conditions and the following disclaimer in the
+# 	  documentation and/or other materials provided with the distribution.
+# 	* Neither the name of the copyright holder nor the
+# 	  names of its contributors may be used to endorse or promote products
+# 	  derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 	.arch armv8-a
 	.text
 	.global	randomx_program_aarch64
--- a/src/jit_compiler_a64_static.hpp
+++ b/src/jit_compiler_a64_static.hpp
@ -1,5 +1,6 @@
 /*
 Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>

 All rights reserved.