Random dataset accesses - asm only

Initial support for large pages
pull/20/head
tevador 6 years ago
parent bf8397b08d
commit 619bee5418

@ -28,6 +28,11 @@ namespace RandomX {
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regMx = "edi";
static const char* regIc = "ebp";
static const char* regStackBeginAddr = "rbx";
static const char* regScratchpadAddr = "rsi";
void AssemblyGeneratorX86::generateProgram(const void* seed) { void AssemblyGeneratorX86::generateProgram(const void* seed) {
asmCode.str(std::string()); //clear asmCode.str(std::string()); //clear
Pcg32 gen(seed); Pcg32 gen(seed);
@ -48,7 +53,7 @@ namespace RandomX {
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) { void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl; asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl;
asmCode << "\tdec edi" << std::endl; asmCode << "\tdec " << regIc << std::endl;
asmCode << "\tjz rx_finish" << std::endl; asmCode << "\tjz rx_finish" << std::endl;
auto generator = engine[instr.opcode]; auto generator = engine[instr.opcode];
(this->*generator)(instr, i); (this->*generator)(instr, i);
@ -56,54 +61,34 @@ namespace RandomX {
void AssemblyGeneratorX86::genar(Instruction& instr) { void AssemblyGeneratorX86::genar(Instruction& instr) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl; asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
switch (instr.loca & 7) asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
switch (instr.loca & 3)
{ {
case 0: case 0:
case 1: case 1:
case 2: case 2:
case 3: asmCode << "\tcall rx_readint_l1" << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl; return;
asmCode << "\tcall rx_read_dataset_r" << std::endl; default: //3
return; asmCode << "\tcall rx_readint_l2" << std::endl;
return;
case 4:
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tmov rax, qword ptr [rsi + rax * 8]" << std::endl;
return;
default:
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tmov rax, qword ptr [rsi + rax * 8]" << std::endl;
return;
} }
} }
void AssemblyGeneratorX86::genaf(Instruction& instr) { void AssemblyGeneratorX86::genaf(Instruction& instr) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl; asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
switch (instr.loca & 7) asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
switch (instr.loca & 3)
{ {
case 0: case 0:
case 1: case 1:
case 2: case 2:
case 3: asmCode << "\tcall rx_readfloat_l1" << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl; return;
asmCode << "\tcall rx_read_dataset_f" << std::endl; default: //3
return; asmCode << "\tcall rx_readfloat_l2" << std::endl;
return;
case 4:
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
return;
default:
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
return;
} }
} }
@ -169,9 +154,9 @@ namespace RandomX {
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
if (trace) { if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rcx" << std::endl;
} }
return; return;
@ -182,31 +167,31 @@ namespace RandomX {
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
if (trace) { if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rcx" << std::endl;
} }
return; return;
default: default:
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl; asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
if (trace) { if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rax" << std::endl;
} }
return;
} }
} }
void AssemblyGeneratorX86::gencf(Instruction& instr, bool alwaysLow = false) { void AssemblyGeneratorX86::gencf(Instruction& instr) {
if(!alwaysLow) asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl; const char* store = (instr.locc & 8) ? "movhpd" : "movlpd";
const char* store = (!alwaysLow && (instr.locc & 8)) ? "movhpd" : "movlpd";
switch (instr.locc & 7) switch (instr.locc & 7)
{ {
case 4: case 4:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl; asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
break; break;
case 5: case 5:
@ -215,11 +200,11 @@ namespace RandomX {
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl; asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
break; break;
} }
if (trace) { if (trace) {
asmCode << "\t" << store << " qword ptr [rsi + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl; asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
} }
} }
@ -454,15 +439,14 @@ namespace RandomX {
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) { void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
genar(instr); genar(instr);
asmCode << "\tmov rcx, rax" << std::endl; //asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tshl eax, 13" << std::endl; asmCode << "\tshl eax, 13" << std::endl;
asmCode << "\tand rcx, -2048" << std::endl; //asmCode << "\tand rcx, -2048" << std::endl;
asmCode << "\tand eax, 24576" << std::endl; asmCode << "\tand eax, 24576" << std::endl;
asmCode << "\tcvtsi2sd " << regF[instr.regc % RegistersCount] << ", rcx" << std::endl; //asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
asmCode << "\tor eax, 40896" << std::endl; asmCode << "\tor eax, 40896" << std::endl;
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl; asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl; asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
gencf(instr, true);
} }
static inline const char* jumpCondition(Instruction& instr, bool invert = false) { static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
@ -496,7 +480,7 @@ namespace RandomX {
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl; asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
asmCode << "taken_call_" << i << ":" << std::endl; asmCode << "taken_call_" << i << ":" << std::endl;
if (trace) { if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rax" << std::endl;
} }
asmCode << "\tpush rax" << std::endl; asmCode << "\tpush rax" << std::endl;
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl; asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
@ -504,7 +488,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) { void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
genar(instr); genar(instr);
asmCode << "\tcmp rsp, rbp" << std::endl; asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
asmCode << "\tje short not_taken_ret_" << i << std::endl; asmCode << "\tje short not_taken_ret_" << i << std::endl;
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl; asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
gencr(instr); gencr(instr);

@ -45,7 +45,7 @@ namespace RandomX {
void genbr132(Instruction&); void genbr132(Instruction&);
void genbf(Instruction&, const char*); void genbf(Instruction&, const char*);
void gencr(Instruction&); void gencr(Instruction&);
void gencf(Instruction&, bool); void gencf(Instruction&);
void generateCode(Instruction&, int); void generateCode(Instruction&, int);

@ -47,8 +47,8 @@ namespace RandomX {
} }
void CompiledVirtualMachine::execute() { void CompiledVirtualMachine::execute() {
//executeProgram(reg, mem, scratchpad, readDataset); executeProgram(reg, mem, scratchpad, readDataset);
compiler.getProgramFunc()(reg, mem, scratchpad); //compiler.getProgramFunc()(reg, mem, scratchpad);
#ifdef TRACEVM #ifdef TRACEVM
for (int32_t i = InstructionCount - 1; i >= 0; --i) { for (int32_t i = InstructionCount - 1; i >= 0; --i) {
std::cout << std::hex << tracepad[i].u64 << std::endl; std::cout << std::hex << tracepad[i].u64 << std::endl;

@ -40,6 +40,6 @@ namespace RandomX {
DatasetReadFunc readDataset; DatasetReadFunc readDataset;
alignas(16) RegisterFile reg; alignas(16) RegisterFile reg;
MemoryRegisters mem; MemoryRegisters mem;
alignas(16) convertible_t scratchpad[ScratchpadLength]; alignas(64) convertible_t scratchpad[ScratchpadLength];
}; };
} }

@ -26,6 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "dataset.hpp" #include "dataset.hpp"
#include "Pcg32.hpp" #include "Pcg32.hpp"
#include "Cache.hpp" #include "Cache.hpp"
#include "virtualMemory.hpp"
#if defined(__SSE2__) #if defined(__SSE2__)
#include <wmmintrin.h> #include <wmmintrin.h>
@ -161,12 +162,17 @@ namespace RandomX {
template template
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory); convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
void datasetAlloc(dataset_t& ds) { void datasetAlloc(dataset_t& ds, bool largePages) {
if (sizeof(size_t) <= 4) if (sizeof(size_t) <= 4)
throw std::runtime_error("Platform doesn't support enough memory for the dataset"); throw std::runtime_error("Platform doesn't support enough memory for the dataset");
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64); if (largePages) {
if (ds.dataset == nullptr) { ds.dataset = (uint8_t*)allocLargePagesMemory(DatasetSize);
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed."); }
else {
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, 64);
if (ds.dataset == nullptr) {
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
}
} }
} }

@ -35,7 +35,7 @@ namespace RandomX {
template<bool softAes> template<bool softAes>
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys); void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
void datasetAlloc(dataset_t& ds); void datasetAlloc(dataset_t& ds, bool largePages);
template<bool softAes> template<bool softAes>
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount); void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);

@ -15,19 +15,19 @@
;# You should have received a copy of the GNU General Public License ;# You should have received a copy of the GNU General Public License
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>. ;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
PUBLIC executeProgram _RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
.code PUBLIC executeProgram
executeProgram PROC executeProgram PROC
; REGISTER ALLOCATION: ; REGISTER ALLOCATION:
; rax -> temporary ; rax -> temporary
; rbx -> MemoryRegisters& memory ; rbx -> beginning of VM stack
; rcx -> temporary ; rcx -> temporary
; rdx -> temporary ; rdx -> temporary
; rsi -> convertible_t& scratchpad ; rsi -> convertible_t& scratchpad
; rdi -> "ic" (instruction counter) ; rdi -> "mx"
; rbp -> beginning of VM stack ; rbp -> "ic"
; rsp -> end of VM stack ; rsp -> end of VM stack
; r8 -> "r0" ; r8 -> "r0"
; r9 -> "r1" ; r9 -> "r1"
@ -55,7 +55,8 @@ executeProgram PROC
; | saved registers ; | saved registers
; | ; |
; v ; v
; [rbp] RegisterFile& registerFile ; [rbx+8] RegisterFile& registerFile
; [rbx+0] uint8_t* dataset
; | ; |
; | ; |
; | VM stack ; | VM stack
@ -80,17 +81,18 @@ executeProgram PROC
movdqu xmmword ptr [rsp+0], xmm10 movdqu xmmword ptr [rsp+0], xmm10
; function arguments ; function arguments
push rcx ; RegisterFile& registerFile push rcx ; RegisterFile& registerFile
mov rbx, rdx ; MemoryRegisters& memory mov edi, dword ptr [rdx] ; "mx"
mov rsi, r8 ; convertible_t& scratchpad mov rax, qword ptr [rdx+8] ; uint8_t* dataset
push r9 push rax
mov rsi, r8 ; convertible_t* scratchpad
mov rbp, rsp ; beginning of VM stack mov rbx, rsp ; beginning of VM stack
mov rdi, 1048577 ; number of VM instructions to execute + 1 mov ebp, 524289 ; number of VM instructions to execute + 1
xorps xmm10, xmm10 xorps xmm10, xmm10
cmpeqpd xmm10, xmm10 cmpeqpd xmm10, xmm10
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
; reset rounding mode ; reset rounding mode
mov dword ptr [rsp-8], 40896 mov dword ptr [rsp-8], 40896
@ -162,7 +164,7 @@ executeProgram PROC
rx_finish: rx_finish:
; unroll the stack ; unroll the stack
mov rsp, rbp mov rsp, rbx
; save VM register values ; save VM register values
pop rcx pop rcx
@ -202,57 +204,103 @@ rx_finish:
pop rbx pop rbx
; return ; return
ret 0 ret
TransformAddress MACRO reg32, reg64
;# Transforms the address in the register so that the transformed address
;# lies in a different cache line than the original address (mod 2^N).
;# This is done to prevent a load-store dependency.
;# There are 3 different transformations that can be used: x -> 9*x+C, x -> x+C, x -> x^C
lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
;lea reg32, [reg64-128] ;# C = all except -7 to +7
;xor reg32, -8 ;# C = all except 0 to 7
ENDM
ReadMemoryRandom MACRO spmask, float
;# IN ecx = random 32-bit address
;# OUT rax = 64-bit integer return value
;# OUT xmm0 = 128-bit floating point return value
;# GLOBAL rbp = "ic" number of instructions until the end of the program
;# GLOBAL rbx = address of the dataset address
;# GLOBAL rsi = address of the scratchpad
;# GLOBAL rdi = "mx" random 32-bit dataset address
;# MODIFY rcx, rdx
LOCAL L_prefetch, L_read, L_return
mov eax, ebp
and al, 63
jz short L_prefetch ;# "ic" divisible by 64 -> prefetch
xor edx, edx
cmp al, 14
je short L_read ;# "ic" = 14 (mod 64) -> random read
cmovb edx, ecx ;# "ic" < 14 (mod 64) -> modify random read address
xor edi, edx
L_return:
and ecx, spmask ;# limit address to the specified scratchpad size
IF float
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
ELSE
mov rax, qword ptr [rsi+rcx*8]
ENDIF
ret
L_prefetch:
mov rax, qword ptr [rbx] ;# load the dataset address
and edi, -64 ;# align "mx" to the start of a cache line
prefetchnta byte ptr [rax+rdi]
jmp short L_return
L_read:
push rcx
TransformAddress ecx, rcx ;# TransformAddress function
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
call rx_read_dataset
pop rcx
jmp short L_return
ENDM
ALIGN 64
rx_readint_l1:
ReadMemoryRandom 2047, 0
ALIGN 64
rx_readint_l2:
ReadMemoryRandom 32767, 0
ALIGN 64
rx_readfloat_l1:
ReadMemoryRandom 2047, 1
ALIGN 64
rx_readfloat_l2:
ReadMemoryRandom 32767, 1
ALIGN 64
rx_read_dataset: rx_read_dataset:
push r8 ;# IN rcx = scratchpad index - must be divisible by 8
push r9 ;# GLOBAL rbx = address of the dataset address
push r10 ;# GLOBAL rsi = address of the scratchpad
push r11 ;# GLOBAL rdi = "mx" random 32-bit dataset address
mov rdx, rbx ;# MODIFY rax, rcx, rdx
movd qword ptr [rsp - 8], xmm1 mov rax, qword ptr [rbx] ;# load the dataset address
movd qword ptr [rsp - 16], xmm2 lea rcx, [rsi+rcx*8] ;# scratchpad cache line
sub rsp, 48 lea rax, [rax+rdi] ;# dataset cache line
call qword ptr [rbp] mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
add rsp, 48 xor qword ptr [rcx+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
movd xmm2, qword ptr [rsp - 16] mov rdx, qword ptr [rax+8]
movd xmm1, qword ptr [rsp - 8] xor qword ptr [rcx+8], rdx
pop r11 mov rdx, qword ptr [rax+16]
pop r10 xor qword ptr [rcx+16], rdx
pop r9 mov rdx, qword ptr [rax+24]
pop r8 xor qword ptr [rcx+24], rdx
ret 0 mov rdx, qword ptr [rax+32]
xor qword ptr [rcx+32], rdx
rx_read_dataset_r: mov rdx, qword ptr [rax+40]
mov edx, dword ptr [rbx] ; ma xor qword ptr [rcx+40], rdx
mov rax, qword ptr [rbx+8] ; dataset mov rdx, qword ptr [rax+48]
mov rax, qword ptr [rax+rdx] xor qword ptr [rcx+48], rdx
add dword ptr [rbx], 8 mov rdx, qword ptr [rax+56]
xor ecx, dword ptr [rbx+4] ; mx xor qword ptr [rcx+56], rdx
mov dword ptr [rbx+4], ecx ret
test ecx, 0FFF8h
jne short rx_read_dataset_r_ret
and ecx, -8
mov dword ptr [rbx], ecx
mov rdx, qword ptr [rbx+8]
prefetcht0 byte ptr [rdx+rcx]
rx_read_dataset_r_ret:
ret 0
rx_read_dataset_f:
mov edx, dword ptr [rbx] ; ma
mov rax, qword ptr [rbx+8] ; dataset
cvtdq2pd xmm0, qword ptr [rax+rdx]
add dword ptr [rbx], 8
xor ecx, dword ptr [rbx+4] ; mx
mov dword ptr [rbx+4], ecx
test ecx, 0FFF8h
jne short rx_read_dataset_f_ret
and ecx, -8
mov dword ptr [rbx], ecx
prefetcht0 byte ptr [rax+rcx]
rx_read_dataset_f_ret:
ret 0
executeProgram ENDP executeProgram ENDP
_RANDOMX_EXECUTE_PROGRAM ENDS
END END

@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
bool softAes, lightClient, genAsm, compiled, help; bool softAes, lightClient, genAsm, compiled, help, largePages;
int programCount, threadCount; int programCount, threadCount;
readOption("--help", argc, argv, help); readOption("--help", argc, argv, help);
@ -177,6 +177,7 @@ int main(int argc, char** argv) {
readOption("--compiled", argc, argv, compiled); readOption("--compiled", argc, argv, compiled);
readIntOption("--threads", argc, argv, threadCount, 1); readIntOption("--threads", argc, argv, threadCount, 1);
readIntOption("--nonces", argc, argv, programCount, 1000); readIntOption("--nonces", argc, argv, programCount, 1000);
readOption("--largePages", argc, argv, largePages);
if (genAsm) { if (genAsm) {
generateAsm(programCount); generateAsm(programCount);
@ -216,7 +217,7 @@ int main(int argc, char** argv) {
} }
else { else {
RandomX::Cache* cache = dataset.cache; RandomX::Cache* cache = dataset.cache;
RandomX::datasetAlloc(dataset); RandomX::datasetAlloc(dataset, largePages);
if (threadCount > 1) { if (threadCount > 1) {
auto perThread = RandomX::DatasetBlockCount / threadCount; auto perThread = RandomX::DatasetBlockCount / threadCount;
auto remainder = RandomX::DatasetBlockCount % threadCount; auto remainder = RandomX::DatasetBlockCount % threadCount;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,108 @@
/*
Copyright (c) 2018 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "virtualMemory.hpp"
#include <stdexcept>
#ifdef _WIN32
#include <windows.h>
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#endif
#include <sys/types.h>
#include <sys/mman.h>
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
#ifdef _WIN32
std::string getErrorMessage(const char* function) {
LPSTR messageBuffer = nullptr;
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
std::string message(messageBuffer, size);
LocalFree(messageBuffer);
return std::string(function) + std::string(": ") + message;
}
void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
HANDLE hToken;
TOKEN_PRIVILEGES tp;
BOOL status;
DWORD error;
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
throw std::runtime_error(getErrorMessage("OpenProcessToken"));
if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
throw std::runtime_error(getErrorMessage("LookupPrivilegeValue"));
tp.PrivilegeCount = 1;
if (bEnable)
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
else
tp.Privileges[0].Attributes = 0;
status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
error = GetLastError();
if (!status || (error != ERROR_SUCCESS))
throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges"));
if (!CloseHandle(hToken))
throw std::runtime_error(getErrorMessage("CloseHandle"));
}
#endif
void* allocExecutableMemory(size_t bytes) {
void* mem;
#ifdef _WIN32
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocExecutableMemory - VirtualAlloc"));
#else
mem = mmap(nullptr, CodeSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocExecutableMemory - mmap failed");
#endif
return mem;
}
void* allocLargePagesMemory(size_t bytes) {
void* mem;
#ifdef _WIN32
setPrivilege("SeLockMemoryPrivilege", 1);
mem = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
#else
#ifdef __APPLE__
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
#endif
if (mem == MAP_FAILED)
throw std::runtime_error("allocLargePagesMemory - mmap failed");
#endif
return mem;
}

@ -0,0 +1,23 @@
/*
Copyright (c) 2018 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#pragma once
void* allocExecutableMemory(size_t);
void* allocLargePagesMemory(size_t);
Loading…
Cancel
Save