diff --git a/src/main.cpp b/src/main.cpp
index ac63dce..b6efceb 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,6 +22,7 @@ along with RandomX. If not, see.
#include "AssemblyGeneratorX86.hpp"
#include "Stopwatch.hpp"
#include "blake2/blake2.h"
+#include "blake2/endian.h"
#include
#include
#include
@@ -125,12 +126,11 @@ void printUsage(const char* executable) {
}
template
-void generateAsm(int nonce) {
+void generateAsm(uint32_t nonce) {
alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
- int* noncePtr = (int*)(blockTemplate + 39);
- *noncePtr = nonce;
+ store32(blockTemplate + 39, nonce);
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
uint8_t scratchpad[RandomX::ScratchpadSize];
fillAes1Rx4((void*)hash, RandomX::ScratchpadSize, scratchpad);
@@ -142,12 +142,11 @@ void generateAsm(int nonce) {
}
template
-void generateNative(int nonce) {
+void generateNative(uint32_t nonce) {
alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
- int* noncePtr = (int*)(blockTemplate + 39);
- *noncePtr = nonce;
+ store32(blockTemplate + 39, nonce);
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
uint8_t scratchpad[RandomX::ScratchpadSize];
fillAes1Rx4((void*)hash, RandomX::ScratchpadSize, scratchpad);
@@ -161,16 +160,16 @@ void generateNative(int nonce) {
}
template
-void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
+void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, uint8_t* scratchpad) {
alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
- int* noncePtr = (int*)(blockTemplate + 39);
- int nonce = atomicNonce.fetch_add(1);
+ void* noncePtr = blockTemplate + 39;
+ auto nonce = atomicNonce.fetch_add(1);
while (nonce < noncesCount) {
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
- *noncePtr = nonce;
+ store32(noncePtr, nonce);
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
fillAes1Rx4((void*)hash, RandomX::ScratchpadSize, scratchpad);
vm->resetRoundingMode();
@@ -242,7 +241,7 @@ int main(int argc, char** argv) {
if (softAes)
std::cout << "Using software AES." << std::endl;
- std::atomic atomicNonce(0);
+ std::atomic atomicNonce(0);
AtomicHash result;
std::vector vms;
std::vector threads;
diff --git a/src/program.inc b/src/program.inc
index 8a18fe4..4171a54 100644
--- a/src/program.inc
+++ b/src/program.inc
@@ -471,9 +471,9 @@
mov eax, r13d
and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
- andps xmm12, xmm14
+ andps xmm12, xmm13
+ orps xmm12, xmm14
divpd xmm5, xmm12
- maxpd xmm5, xmm13
; IXOR_M r2, L1[r5]
mov eax, r13d
and eax, 16376
@@ -685,9 +685,9 @@
mov eax, r10d
and eax, 262136
cvtdq2pd xmm12, qword ptr [rsi+rax]
- andps xmm12, xmm14
+ andps xmm12, xmm13
+ orps xmm12, xmm14
divpd xmm4, xmm12
- maxpd xmm4, xmm13
; IMUL_9C r4, -1849458799
lea r12, [r12+r12*8-1849458799]
; IADD_RC r1, r4, -651820510
@@ -707,9 +707,9 @@
mov eax, r8d
and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
- andps xmm12, xmm14
+ andps xmm12, xmm13
+ orps xmm12, xmm14
divpd xmm7, xmm12
- maxpd xmm7, xmm13
; IADD_M r3, L1[r7]
mov eax, r15d
and eax, 16376