Code cleanup

5 years ago · 258fa170fe
parent ffdeca4f67
commit 258fa170fe
3 changed files with 34 additions and 16 deletions
--- a/src/jit_compiler_a64.cpp
+++ b/src/jit_compiler_a64.cpp
@ -85,7 +85,6 @@ JitCompilerA64::JitCompilerA64()
 {
 	memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
 	memcpy(code, (void*) randomx_program_aarch64, CodeSize);
-	enableAll();
 }

 JitCompilerA64::~JitCompilerA64()
@ -93,6 +92,16 @@ JitCompilerA64::~JitCompilerA64()
 	freePagedMemory(code, CodeSize + CalcDatasetItemSize);
 }

+void JitCompilerA64::enableWriting()
+{
+	setPagesRW(code, CodeSize + CalcDatasetItemSize);
+}
+
+void JitCompilerA64::enableExecution()
+{
+	setPagesRX(code, CodeSize + CalcDatasetItemSize);
+}
+
 void JitCompilerA64::enableAll()
 {
 	setPagesRWX(code, CodeSize + CalcDatasetItemSize);
@ -211,18 +220,23 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
 {
 	uint32_t codePos = CodeSize;

-	ptrdiff_t nBytes = ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch) - ((uint8_t*)randomx_calc_dataset_item_aarch64);
-	memcpy(code + codePos, (uint8_t*)randomx_calc_dataset_item_aarch64, nBytes);
-	codePos += nBytes;
+	uint8_t* p1 = (uint8_t*)randomx_calc_dataset_item_aarch64;
+	uint8_t* p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_prefetch;
+	memcpy(code + codePos, p1, p2 - p1);
+	codePos += p2 - p1;

 	num32bitLiterals = 64;
 	constexpr uint32_t tmp_reg = 12;

 	for (size_t i = 0; i < N; ++i)
 	{
-		nBytes = ((uint8_t*)randomx_calc_dataset_item_aarch64_mix) - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch);
-		memcpy(code + codePos, (uint8_t*)randomx_calc_dataset_item_aarch64_prefetch, nBytes);
-		codePos += nBytes;
+		// and x11, x10, CacheSize / CacheLineSize - 1
+		emit32(0x92400000 | 11 | (10 << 5) | ((Log2(CacheSize / CacheLineSize) - 1) << 10), code, codePos);
+
+		p1 = ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch) + 4;
+		p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_mix;
+		memcpy(code + codePos, p1, p2 - p1);
+		codePos += p2 - p1;

 		SuperscalarProgram& prog = programs[i];
 		const size_t progSize = prog.getSize();
@ -298,17 +312,19 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
 			}
 		}

-		nBytes = ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result) - ((uint8_t*)randomx_calc_dataset_item_aarch64_mix);
-		memcpy(code + codePos, (uint8_t*)randomx_calc_dataset_item_aarch64_mix, nBytes);
-		codePos += nBytes;
+		p1 = (uint8_t*)randomx_calc_dataset_item_aarch64_mix;
+		p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_store_result;
+		memcpy(code + codePos, p1, p2 - p1);
+		codePos += p2 - p1;

 		// Update registerValue
 		emit32(ARMV8A::MOV_REG | 10 | (prog.getAddressRegister() << 16), code, codePos);
 	}

-	nBytes = ((uint8_t*)randomx_calc_dataset_item_aarch64_end) - ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
-	memcpy(code + codePos, (uint8_t*)randomx_calc_dataset_item_aarch64_store_result, nBytes);
-	codePos += nBytes;
+	p1 = (uint8_t*)randomx_calc_dataset_item_aarch64_store_result;
+	p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_end;
+	memcpy(code + codePos, p1, p2 - p1);
+	codePos += p2 - p1;

 #ifdef __GNUC__
 	__builtin___clear_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
--- a/src/jit_compiler_a64.hpp
+++ b/src/jit_compiler_a64.hpp
@ -62,8 +62,8 @@ namespace randomx {
 		uint8_t* getCode() { return code; }
 		size_t getCodeSize();

-		void enableWriting() {}
-		void enableExecution() {}
+		void enableWriting();
+		void enableExecution();
 		void enableAll();

 	private:
--- a/src/jit_compiler_a64_static.S
+++ b/src/jit_compiler_a64_static.S
@ -337,6 +337,7 @@ randomx_program_aarch64_xor_with_dataset_line:
 	eor	x15, x15, x19

 randomx_program_aarch64_update_spMix1:
+	# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1"
 	eor	x10, x0, x0

 	# Store integer registers to scratchpad (spAddr1)
@ -534,7 +535,8 @@ superscalarAdd7: .quad 9549104520008361294
 # Prefetch -> SuperScalar hash -> Mix will be repeated N times

 randomx_calc_dataset_item_aarch64_prefetch:
-	and	x11, x10, 4194303
+	# Actual mask will be inserted by JIT compiler
+	and	x11, x10, 1
 	add	x11, x8, x11, lsl 6
 	prfm	pldl2strm, [x11]