Implemented cache shift

Fixed assembly code generator Fixed an error in the interpreter Updated specification: sign-extended immediates
6 years ago · 6332831ec1
parent 4fc4b840f5
commit 6332831ec1
11 changed files with 121 additions and 69 deletions
--- a/doc/isa.md
+++ b/doc/isa.md
@ -33,7 +33,7 @@ The first operand is read from memory. The location is determined by the `loc(a)

 Flag `reg(a)` encodes an integer register `r0`-`r7`.  The read address is calculated as:
 ```
-reg(a)[31:0] = reg(a)[31:0] XOR addr0
+reg(a) = reg(a) XOR signExtend(addr0)
 addr(a) = reg(a)[W-1:0]
 ```
 `W` is the address width from the above table. For reading from the scratchpad, `addr(a)` is multiplied by 8 for 8-byte aligned access.
@ -54,7 +54,7 @@ The second operand is loaded either from a register or from an immediate value e

 `imm0` is an 8-bit immediate value, which is used for shift and rotate ALU operations.

-`imm1` is a 32-bit immediate value which is used for most operations. For operands larger than 32 bits, the value is zero-extended for unsigned instructions and sign-extended for signed instructions. For FPU instructions, the value is considered a signed 32-bit integer and then converted to a double precision floating point format.
+`imm1` is a 32-bit immediate value which is used for most operations. For operands larger than 32 bits, the value is sign-extended. For FPU instructions, the value is considered a signed 32-bit integer and then converted to a double precision floating point format.

 #### Operand C
 The third operand is the location where the result is stored.
@ -80,7 +80,7 @@ addr(c) = 8 * (addr1 XOR reg(c)[31:0])[W-1:0]
 An 8-bit immediate value that is used as the shift/rotate count by some ALU instructions and as the jump offset of the CALL instruction.

 #### addr0
-A 32-bit address mask that is used to calculate the read address for the A operand.
+A 32-bit address mask that is used to calculate the read address for the A operand. It's sign-extended to 64 bits.

 #### addr1
 A 32-bit address mask that is used to calculate the write address for the C operand. `addr1` is equal to `imm1`.
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@ -16,7 +16,7 @@ GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
-
+//#define TRACE
 #include "AssemblyGeneratorX86.hpp"
 #include "Pcg32.hpp"
 #include "common.hpp"
@ -164,6 +164,9 @@ namespace RandomX {
 			asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
 			asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
 			asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
+			if (trace) {
+				asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
+			}
 			return;

 		case 1:
@ -174,10 +177,16 @@ namespace RandomX {
 			asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
 			asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
 			asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
+			if (trace) {
+				asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
+			}
 			return;

 		default:
 			asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
+			if (trace) {
+				asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
+			}
 		}
 	}

@ -189,7 +198,7 @@ namespace RandomX {
 			asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
 			asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
 			asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
-			return;
+			break;

 		case 1:
 		case 2:
@ -198,10 +207,14 @@ namespace RandomX {
 			asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
 			asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
 			asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
-			return;
+			break;

 		default:
 			asmCode << "\tmovsd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
+			break;
+		}
+		if (trace) {
+			asmCode << "\tmovd qword ptr [rsi + rdi * 8 + 262144], xmm0" << std::endl;
 		}
 	}

@ -466,8 +479,11 @@ namespace RandomX {
 			asmCode << "\tjmp rx_i_" << wrapi(i + 1) << std::endl;
 			asmCode << "taken_call_" << i << ":" << std::endl;
 		}
+		if (trace) {
+			asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
+		}
 		asmCode << "\tpush rax" << std::endl;
-		asmCode << "\tcall rx_i_" << wrapi(i + (instr.imm0 & 127) + 1) << std::endl;
+		asmCode << "\tcall rx_i_" << wrapi(i + (instr.imm0 & 127) + 2) << std::endl;
 	}

 	void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@ -43,5 +43,11 @@ namespace RandomX {
 	void CompiledVirtualMachine::execute() {
 		FPINIT();
 		executeProgram(reg, mem, readDataset, scratchpad);
+#ifdef TRACE
+		for (int32_t i = InstructionCount - 1; i >= 0; --i) {
+			std::cout << std::hex << tracepad[i].u64 << std::endl;
+		}
+#endif
+
 	}
 }
--- a/src/CompiledVirtualMachine.hpp
+++ b/src/CompiledVirtualMachine.hpp
@ -18,7 +18,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */

 #pragma once
-
+//#define TRACE
 #include "VirtualMachine.hpp"
 #include "Program.hpp"
 #include <sstream>
@ -30,5 +30,9 @@ namespace RandomX {
 		CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {}
 		virtual void initializeProgram(const void* seed) override;
 		virtual void execute() override;
+	private:
+#ifdef TRACE
+		convertible_t tracepad[InstructionCount];
+#endif
 	};
 }
--- a/src/Instruction.hpp
+++ b/src/Instruction.hpp
@ -34,7 +34,7 @@ namespace RandomX {
 		uint8_t locc;
 		uint8_t regc;
 		uint8_t imm0;
-		uint32_t addr0;
+		int32_t addr0;
 		union {
 			uint32_t addr1;
 			int32_t imm1;
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@ -65,7 +65,7 @@ namespace RandomX {

 	convertible_t InterpretedVirtualMachine::loada(Instruction& inst) {
 		convertible_t& rega = reg.r[inst.rega % RegistersCount];
-		rega.u64 ^= inst.addr0;
+		rega.i64 ^= inst.addr0; //sign-extend addr0
 		addr_t addr = rega.u32;
 		switch (inst.loca & 7)
 		{
@ -86,7 +86,7 @@ namespace RandomX {
 	}

 	convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
-		switch (inst.loca & 7)
+		switch (inst.locb & 7)
 		{
 		case 0:
 		case 1:
@ -98,7 +98,7 @@ namespace RandomX {
 		case 6:
 		case 7:
 			convertible_t temp;
-			temp.i64 = inst.imm1;
+			temp.i64 = inst.imm1; //sign-extend imm1
 			return temp;
 		}
 	}
@ -182,13 +182,13 @@ namespace RandomX {
 	}

 #define ALU_RETIRE(x) x(a, b, c); \
-	if(trace) std::cout << std::hex << a.u64 << " " << b.u64 << " " << c.u64 << std::endl;
+	if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;

 #define FPU_RETIRE(x) x(a, b, c); \
 	if(trace) { \
 		convertible_t bc; \
 		bc.f64 = b; \
-		std::cout << std::hex << a.u64 << " " << bc.u64 << " " << c.u64 << std::endl; \
+		std::cout << std::hex << /*a.u64 << " " << bc.u64 << " " <<*/ c.u64 << std::endl; \
 	} \
 	if(fpuCheck) { \
 		convertible_t bc; \
@ -206,7 +206,7 @@ namespace RandomX {
 	}

 #define FPU_RETIRE_NB(x) x(a, b, c); \
-	if(trace) std::cout << std::hex << a.u64 << " " << c.u64 << std::endl;
+	if(trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;

 #define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
 	convertible_t a = loada(inst); \
@ -277,9 +277,11 @@ namespace RandomX {
 			stackPush(pc);
 			pc += (inst.imm0 & 127) + 1;
 			pc = pc % ProgramLength;
+			if (trace) std::cout << std::hex << a.u64 << std::endl;
 		}
 		else {
 			c.u64 = a.u64;
+			if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
 		}
 	}

@ -296,6 +298,7 @@ namespace RandomX {
 		else {
 			c.u64 = a.u64;
 		}
+		if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
 	}

 #include "instructionWeights.hpp"
--- a/src/VirtualMachine.cpp
+++ b/src/VirtualMachine.cpp
@ -58,16 +58,14 @@ namespace RandomX {
 	void VirtualMachine::initializeScratchpad(uint32_t index) {
 		if (lightClient) {
 			if (softAes) {
-				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 0, 4 * index + 0, mem.lcm->keys);
-				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 1, 4 * index + 1, mem.lcm->keys);
-				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 2, 4 * index + 2, mem.lcm->keys);
-				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 3, 4 * index + 3, mem.lcm->keys);
+				for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
+					initBlock<true>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
+				}
 			}
 			else {
-				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 0, 4 * index + 0, mem.lcm->keys);
-				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 1, 4 * index + 1, mem.lcm->keys);
-				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 2, 4 * index + 2, mem.lcm->keys);
-				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 3, 4 * index + 3, mem.lcm->keys);
+				for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
+					initBlock<false>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
+				}
 			}
 		}
 		else {
--- a/src/common.hpp
+++ b/src/common.hpp
@ -35,6 +35,7 @@ namespace RandomX {
 	constexpr int SeedSize = 32;

 	constexpr int CacheBlockSize = 1024;
+	constexpr int CacheShift = CacheBlockSize / 2;
 	constexpr int BlockExpansionRatio = 64;
 	constexpr uint32_t DatasetBlockSize = BlockExpansionRatio * CacheBlockSize;
 	constexpr uint32_t DatasetBlockCount = 65536;
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@ -26,6 +26,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include <new>
 #include <algorithm>
 #include <stdexcept>
+#include <cstring>

 #if defined(_MSC_VER)
 #if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
@ -237,7 +238,7 @@ namespace RandomX {
 		convertible_t data;
 		auto blockNumber = memory.ma / DatasetBlockSize;
 		if (memory.lcm->blockNumber != blockNumber) {
-			initBlock<softAes>(memory.lcm->cache, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
+			initBlock<softAes>(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
 			memory.lcm->blockNumber = blockNumber;
 		}
 		data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize));
@ -263,15 +264,16 @@ namespace RandomX {
 		if (dataset == nullptr) {
 			throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed.");
 		}
-		uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize, sizeof(__m128i));
-		if (dataset == nullptr) {
+		uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
+		if (cache == nullptr) {
 			throw std::bad_alloc();
 		}
 		initializeCache(seed, SeedSize, cache);
+		memcpy(cache + CacheSize, cache, CacheShift);
 		alignas(16) __m128i keys[10];
 		expandAesKeys<softAes>((const __m128i*)seed, keys);
 		for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
-			initBlock<softAes>(cache, dataset + i * DatasetBlockSize, i, keys);
+			initBlock<softAes>(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys);
 		}
 		_mm_free(cache);
 	}
@ -285,11 +287,12 @@ namespace RandomX {
 	template<bool softAes>
 	void datasetInitLight(const void* seed, LightClientMemory*& lcm) {
 		lcm = new LightClientMemory();
-		lcm->cache = (uint8_t*)_mm_malloc(CacheSize, sizeof(__m128i));
+		lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
 		if (lcm->cache == nullptr) {
 			throw std::bad_alloc();
 		}
 		initializeCache(seed, SeedSize, lcm->cache);
+		memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift);
 		expandAesKeys<softAes>((__m128i*)seed, lcm->keys);
 		lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
 		if (lcm->block == nullptr) {
@ -303,4 +306,4 @@ namespace RandomX {

 	template
 		void datasetInitLight<true>(const void*, LightClientMemory*&);
-}
+}
--- a/src/executeProgram-win64.asm
+++ b/src/executeProgram-win64.asm
@ -151,19 +151,40 @@ rx_finish:
 	; return
 	ret	0

-rx_read_dataset:
+rx_read_dataset_light:
 	push rdx
 	push r9
 	push r10
 	push r11
-	sub rsp, 32
+	movd qword ptr [rsp - 8], xmm1
+	movd qword ptr [rsp - 16], xmm2
+	sub rsp, 48
 	call qword ptr [rbp]
-	add	rsp, 32
+	add rsp, 48
+	movd xmm2, qword ptr [rsp - 16]
+	movd xmm1, qword ptr [rsp - 8]
 	pop r11
 	pop r10
 	pop r9
 	pop rdx
 	ret 0
+
+rx_read_dataset:
+	mov r8d, dword ptr [rdx]	; ma
+	mov rax, qword ptr [rdx+8]	; dataset
+	mov rax, qword ptr [rax+r8]
+	add dword ptr [rdx], 8
+	mov r8d, dword ptr [rdx+4]	; mx
+	xor ecx, r8d
+	mov dword ptr [rdx+4], ecx
+	test ecx, 0FFF8h
+	jne short rx_read_dataset_full_ret
+	and ecx, -8
+	mov dword ptr [rdx], ecx
+	mov r8, qword ptr [rdx+8]
+	prefetcht0 byte ptr [r8+rcx]
+rx_read_dataset_full_ret:
+	ret 0
 executeProgram ENDP

 END
--- a/src/program.inc
+++ b/src/program.inc
@ -123,7 +123,7 @@ rx_i_6: ;CALL
 	jmp rx_i_7
 taken_call_6:
 	push rax
-	call rx_i_50
+	call rx_i_51

 rx_i_7: ;FPDIV
 	dec edi
@ -538,7 +538,7 @@ rx_i_38: ;CALL
 	jmp rx_i_39
 taken_call_38:
 	push rax
-	call rx_i_111
+	call rx_i_112

 rx_i_39: ;CALL
 	dec edi
@ -553,7 +553,7 @@ rx_i_39: ;CALL
 	jmp rx_i_40
 taken_call_39:
 	push rax
-	call rx_i_61
+	call rx_i_62

 rx_i_40: ;FPMUL
 	dec edi
@ -621,7 +621,7 @@ rx_i_44: ;CALL
 	jmp rx_i_45
 taken_call_44:
 	push rax
-	call rx_i_93
+	call rx_i_94

 rx_i_45: ;FPROUND
 	dec edi
@ -726,7 +726,7 @@ rx_i_51: ;CALL
 	jmp rx_i_52
 taken_call_51:
 	push rax
-	call rx_i_134
+	call rx_i_135

 rx_i_52: ;FPDIV
 	dec edi
@ -943,7 +943,7 @@ rx_i_65: ;CALL
 	jmp rx_i_66
 taken_call_65:
 	push rax
-	call rx_i_123
+	call rx_i_124

 rx_i_66: ;FPSUB
 	dec edi
@ -996,7 +996,7 @@ rx_i_69: ;CALL
 	jmp rx_i_70
 taken_call_69:
 	push rax
-	call rx_i_132
+	call rx_i_133

 rx_i_70: ;FPDIV
 	dec edi
@ -1022,7 +1022,7 @@ rx_i_71: ;CALL
 	jmp rx_i_72
 taken_call_71:
 	push rax
-	call rx_i_82
+	call rx_i_83

 rx_i_72: ;FPADD
 	dec edi
@ -1093,7 +1093,7 @@ rx_i_76: ;CALL
 	jmp rx_i_77
 taken_call_76:
 	push rax
-	call rx_i_194
+	call rx_i_195

 rx_i_77: ;FPDIV
 	dec edi
@ -1138,7 +1138,7 @@ rx_i_79: ;CALL
 	jmp rx_i_80
 taken_call_79:
 	push rax
-	call rx_i_205
+	call rx_i_206

 rx_i_80: ;FPADD
 	dec edi
@ -1208,7 +1208,7 @@ rx_i_83: ;CALL
 	jmp rx_i_84
 taken_call_83:
 	push rax
-	call rx_i_96
+	call rx_i_97

 rx_i_84: ;ROR_64
 	dec edi
@ -1249,7 +1249,7 @@ rx_i_86: ;CALL
 	jmp rx_i_87
 taken_call_86:
 	push rax
-	call rx_i_148
+	call rx_i_149

 rx_i_87: ;DIV_64
 	dec edi
@ -1376,7 +1376,7 @@ rx_i_96: ;CALL
 	mov ecx, ebx
 	call rx_read_dataset
 	push rax
-	call rx_i_173
+	call rx_i_174

 rx_i_97: ;ROR_64
 	dec edi
@ -1402,7 +1402,7 @@ rx_i_98: ;CALL
 	jmp rx_i_99
 taken_call_98:
 	push rax
-	call rx_i_160
+	call rx_i_161

 rx_i_99: ;MUL_64
 	dec edi
@ -1567,7 +1567,7 @@ rx_i_111: ;CALL
 	and eax, 2047
 	mov rax, qword ptr [rsi + rax * 8]
 	push rax
-	call rx_i_146
+	call rx_i_147

 rx_i_112: ;FPMUL
 	dec edi
@ -1617,7 +1617,7 @@ rx_i_115: ;CALL
 	mov ecx, ebx
 	call rx_read_dataset
 	push rax
-	call rx_i_215
+	call rx_i_216

 rx_i_116: ;ADD_32
 	dec edi
@ -1778,7 +1778,7 @@ rx_i_126: ;CALL
 	jmp rx_i_127
 taken_call_126:
 	push rax
-	call rx_i_195
+	call rx_i_196

 rx_i_127: ;ADD_64
 	dec edi
@ -1806,7 +1806,7 @@ rx_i_128: ;CALL
 	jmp rx_i_129
 taken_call_128:
 	push rax
-	call rx_i_240
+	call rx_i_241

 rx_i_129: ;MUL_32
 	dec edi
@ -1863,7 +1863,7 @@ rx_i_133: ;CALL
 	and eax, 2047
 	mov rax, qword ptr [rsi + rax * 8]
 	push rax
-	call rx_i_157
+	call rx_i_158

 rx_i_134: ;AND_64
 	dec edi
@ -2049,7 +2049,7 @@ rx_i_146: ;CALL
 	jmp rx_i_147
 taken_call_146:
 	push rax
-	call rx_i_260
+	call rx_i_261

 rx_i_147: ;IMUL_32
 	dec edi
@ -2277,7 +2277,7 @@ rx_i_163: ;CALL
 	jmp rx_i_164
 taken_call_163:
 	push rax
-	call rx_i_184
+	call rx_i_185

 rx_i_164: ;ADD_32
 	dec edi
@ -2430,7 +2430,7 @@ rx_i_173: ;CALL
 	jmp rx_i_174
 taken_call_173:
 	push rax
-	call rx_i_200
+	call rx_i_201

 rx_i_174: ;FPSQRT
 	dec edi
@ -2593,7 +2593,7 @@ rx_i_185: ;CALL
 	jmp rx_i_186
 taken_call_185:
 	push rax
-	call rx_i_214
+	call rx_i_215

 rx_i_186: ;FPADD
 	dec edi
@ -2647,7 +2647,7 @@ rx_i_189: ;CALL
 	jmp rx_i_190
 taken_call_189:
 	push rax
-	call rx_i_249
+	call rx_i_250

 rx_i_190: ;XOR_64
 	dec edi
@ -3209,7 +3209,7 @@ rx_i_230: ;CALL
 	jmp rx_i_231
 taken_call_230:
 	push rax
-	call rx_i_331
+	call rx_i_332

 rx_i_231: ;FPMUL
 	dec edi
@ -3323,7 +3323,7 @@ rx_i_237: ;CALL
 	jmp rx_i_238
 taken_call_237:
 	push rax
-	call rx_i_271
+	call rx_i_272

 rx_i_238: ;FPDIV
 	dec edi
@ -3379,7 +3379,7 @@ rx_i_241: ;CALL
 	mov ecx, r15d
 	call rx_read_dataset
 	push rax
-	call rx_i_298
+	call rx_i_299

 rx_i_242: ;ROR_64
 	dec edi
@ -3597,7 +3597,7 @@ rx_i_257: ;CALL
 	jmp rx_i_258
 taken_call_257:
 	push rax
-	call rx_i_370
+	call rx_i_371

 rx_i_258: ;FPADD
 	dec edi
@ -3776,7 +3776,7 @@ rx_i_270: ;CALL
 	jmp rx_i_271
 taken_call_270:
 	push rax
-	call rx_i_298
+	call rx_i_299

 rx_i_271: ;ROL_64
 	dec edi
@ -3868,7 +3868,7 @@ rx_i_277: ;CALL
 	and eax, 2047
 	mov rax, qword ptr [rsi + rax * 8]
 	push rax
-	call rx_i_375
+	call rx_i_376

 rx_i_278: ;FPADD
 	dec edi
@ -4548,7 +4548,7 @@ rx_i_326: ;CALL
 	jmp rx_i_327
 taken_call_326:
 	push rax
-	call rx_i_346
+	call rx_i_347

 rx_i_327: ;MUL_64
 	dec edi
@ -4922,7 +4922,7 @@ rx_i_354: ;CALL
 	jmp rx_i_355
 taken_call_354:
 	push rax
-	call rx_i_355
+	call rx_i_356

 rx_i_355: ;MUL_64
 	dec edi
@ -5659,7 +5659,7 @@ rx_i_409: ;CALL
 	jmp rx_i_410
 taken_call_409:
 	push rax
-	call rx_i_497
+	call rx_i_498

 rx_i_410: ;FPDIV
 	dec edi
@ -5866,7 +5866,7 @@ rx_i_425: ;CALL
 	jmp rx_i_426
 taken_call_425:
 	push rax
-	call rx_i_34
+	call rx_i_35

 rx_i_426: ;IMUL_32
 	dec edi
@ -6556,7 +6556,7 @@ rx_i_476: ;CALL
 	and eax, 2047
 	mov rax, qword ptr [rsi + rax * 8]
 	push rax
-	call rx_i_11
+	call rx_i_12

 rx_i_477: ;MUL_64
 	dec edi
@ -6580,7 +6580,7 @@ rx_i_478: ;CALL
 	jmp rx_i_479
 taken_call_478:
 	push rax
-	call rx_i_72
+	call rx_i_73

 rx_i_479: ;FPSUB
 	dec edi
@ -6721,7 +6721,7 @@ rx_i_489: ;CALL
 	jmp rx_i_490
 taken_call_489:
 	push rax
-	call rx_i_61
+	call rx_i_62

 rx_i_490: ;ADD_64
 	dec edi