Interpreter + async mode

feature/branches
tevador 5 years ago
parent a7ffe8c19a
commit 8b1102ee05

@ -0,0 +1,292 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "common.hpp"
#include "InterpretedVirtualMachine.hpp"
#include <iostream>
#include <iomanip>
#include <sstream>
namespace RandomX {
class Mul9Transform : public ITransform {
public:
Mul9Transform(int32_t cc) : c(cc) {
std::ostringstream oss;
oss << "mul9_" << std::hex << (cc & 255);
name = oss.str();
}
int32_t apply(int32_t x) const override {
return 9 * x + c;
}
const char* getName() const override {
return name.c_str();
}
std::ostream& printAsm(std::ostream& os) const override {
os << "lea ecx, [rcx+rcx*8" << std::showpos << c << "]" << std::noshowpos << std::endl;
return os;
}
std::ostream& printCxx(std::ostream& os) const override {
os << "static const Mul9Transform " << name << "(" << c << ");" << std::endl;
return os;
}
private:
int32_t c;
std::string name;
};
class AddTransform : public ITransform {
public:
AddTransform(int32_t cc) : c(cc) {
std::ostringstream oss;
oss << "add_" << std::hex << (cc & 255);
name = oss.str();
}
int32_t apply(int32_t x) const override {
return x + c;
}
const char* getName() const override {
return name.c_str();
}
std::ostream& printAsm(std::ostream& os) const override {
os << "db 64" << std::endl;
os << "add ecx, " << c << std::endl;
return os;
}
std::ostream& printCxx(std::ostream& os) const override {
os << "static const AddTransform " << name << "(" << c << ");" << std::endl;
return os;
}
private:
int32_t c;
std::string name;
};
class XorTransform : public ITransform {
public:
XorTransform(int32_t cc) : c(cc) {
std::ostringstream oss;
oss << "xor_" << std::hex << (cc & 255);
name = oss.str();
}
int32_t apply(int32_t x) const override {
return x ^ c;
}
const char* getName() const override {
return name.c_str();
}
std::ostream& printAsm(std::ostream& os) const override {
os << "db 64" << std::endl;
os << "xor ecx, " << c << std::endl;
return os;
}
std::ostream& printCxx(std::ostream& os) const override {
os << "static const XorTransform " << name << "(" << c << ");" << std::endl;
return os;
}
private:
int32_t c;
std::string name;
};
static const Mul9Transform mul9_6d(109);
static const XorTransform xor_60(96);
static const Mul9Transform mul9_ed(-19);
static const AddTransform add_9e(-98);
static const AddTransform add_eb(-21);
static const XorTransform xor_b0(-80);
static const Mul9Transform mul9_a4(-92);
static const AddTransform add_71(113);
static const Mul9Transform mul9_64(100);
static const AddTransform add_d9(-39);
static const XorTransform xor_78(120);
static const Mul9Transform mul9_89(-119);
static const AddTransform add_8f(-113);
static const AddTransform add_6f(111);
static const XorTransform xor_68(104);
static const Mul9Transform mul9_ad(-83);
static const Mul9Transform mul9_7f(127);
static const XorTransform xor_90(-112);
static const AddTransform add_59(89);
static const AddTransform add_e0(-32);
static const AddTransform add_68(104);
static const XorTransform xor_88(-120);
static const XorTransform xor_18(24);
static const Mul9Transform mul9_9(9);
static const AddTransform add_e1(-31);
static const XorTransform xor_f0(-16);
static const AddTransform add_44(68);
static const Mul9Transform mul9_92(-110);
static const XorTransform xor_40(64);
static const XorTransform xor_d8(-40);
static const XorTransform xor_f8(-8);
static const AddTransform add_f6(-10);
static const XorTransform xor_e0(-32);
static const AddTransform add_e(14);
static const Mul9Transform mul9_d2(-46);
static const XorTransform xor_98(-104);
static const Mul9Transform mul9_24(36);
static const AddTransform add_64(100);
static const Mul9Transform mul9_bf(-65);
static const Mul9Transform mul9_1b(27);
static const Mul9Transform mul9_5b(91);
static const AddTransform add_9b(-101);
static const AddTransform add_a2(-94);
static const Mul9Transform mul9_f6(-10);
static const XorTransform xor_50(80);
static const AddTransform add_94(-108);
static const AddTransform add_c6(-58);
static const XorTransform xor_30(48);
static const Mul9Transform mul9_49(73);
static const XorTransform xor_d0(-48);
static const XorTransform xor_20(32);
static const XorTransform xor_a0(-96);
static const AddTransform add_76(118);
static const AddTransform add_5b(91);
static const Mul9Transform mul9_12(18);
static const AddTransform add_f5(-11);
static const Mul9Transform mul9_3f(63);
static const AddTransform add_72(114);
static const Mul9Transform mul9_2d(45);
static const AddTransform add_bd(-67);
static const AddTransform add_35(53);
static const Mul9Transform mul9_9b(-101);
static const Mul9Transform mul9_ff(-1);
static const XorTransform xor_10(16);
static const Mul9Transform mul9_db(-37);
static const Mul9Transform mul9_e4(-28);
static const Mul9Transform mul9_c9(-55);
static const XorTransform xor_a8(-88);
static const XorTransform xor_b8(-72);
static const AddTransform add_24(36);
static const XorTransform xor_c8(-56);
static const AddTransform add_74(116);
static const XorTransform xor_58(88);
static const XorTransform xor_80(-128);
static const AddTransform add_32(50);
static const AddTransform add_69(105);
static const AddTransform add_db(-37);
static const XorTransform xor_70(112);
static const XorTransform xor_8(8);
static const XorTransform xor_e8(-24);
static const Mul9Transform mul9_76(118);
static const XorTransform xor_48(72);
static const XorTransform xor_c0(-64);
static const AddTransform add_28(40);
static const Mul9Transform mul9_b6(-74);
static const Mul9Transform mul9_52(82);
static const Mul9Transform mul9_36(54);
static const XorTransform xor_38(56);
static const XorTransform xor_28(40);
static const AddTransform add_57(87);
const ITransform* InterpretedVirtualMachine::addressTransformations[TransformationCount] = {
(ITransform*)&mul9_6d,
(ITransform*)&xor_60,
(ITransform*)&mul9_ed,
(ITransform*)&add_9e,
(ITransform*)&add_eb,
(ITransform*)&xor_b0,
(ITransform*)&mul9_a4,
(ITransform*)&add_71,
(ITransform*)&mul9_64,
(ITransform*)&add_d9,
(ITransform*)&xor_78,
(ITransform*)&mul9_89,
(ITransform*)&add_8f,
(ITransform*)&add_6f,
(ITransform*)&xor_68,
(ITransform*)&mul9_ad,
(ITransform*)&mul9_7f,
(ITransform*)&xor_90,
(ITransform*)&add_59,
(ITransform*)&add_e0,
(ITransform*)&add_68,
(ITransform*)&xor_88,
(ITransform*)&xor_18,
(ITransform*)&mul9_9,
(ITransform*)&add_e1,
(ITransform*)&xor_f0,
(ITransform*)&add_44,
(ITransform*)&mul9_92,
(ITransform*)&xor_40,
(ITransform*)&xor_d8,
(ITransform*)&xor_f8,
(ITransform*)&add_f6,
(ITransform*)&xor_e0,
(ITransform*)&add_e,
(ITransform*)&mul9_d2,
(ITransform*)&xor_98,
(ITransform*)&mul9_24,
(ITransform*)&add_64,
(ITransform*)&mul9_bf,
(ITransform*)&mul9_1b,
(ITransform*)&mul9_5b,
(ITransform*)&add_9b,
(ITransform*)&add_a2,
(ITransform*)&mul9_f6,
(ITransform*)&xor_50,
(ITransform*)&add_94,
(ITransform*)&add_c6,
(ITransform*)&xor_30,
(ITransform*)&mul9_49,
(ITransform*)&xor_d0,
(ITransform*)&xor_20,
(ITransform*)&xor_a0,
(ITransform*)&add_76,
(ITransform*)&add_5b,
(ITransform*)&mul9_12,
(ITransform*)&add_f5,
(ITransform*)&mul9_3f,
(ITransform*)&add_72,
(ITransform*)&mul9_2d,
(ITransform*)&add_bd,
(ITransform*)&add_35,
(ITransform*)&mul9_9b,
(ITransform*)&mul9_ff,
(ITransform*)&xor_10,
(ITransform*)&mul9_db,
(ITransform*)&mul9_e4,
(ITransform*)&mul9_c9,
(ITransform*)&xor_a8,
(ITransform*)&xor_b8,
(ITransform*)&add_24,
(ITransform*)&xor_c8,
(ITransform*)&add_74,
(ITransform*)&xor_58,
(ITransform*)&xor_80,
(ITransform*)&add_32,
(ITransform*)&add_69,
(ITransform*)&add_db,
(ITransform*)&xor_70,
(ITransform*)&xor_8,
(ITransform*)&xor_e8,
(ITransform*)&mul9_76,
(ITransform*)&xor_48,
(ITransform*)&xor_c0,
(ITransform*)&add_28,
(ITransform*)&mul9_b6,
(ITransform*)&mul9_52,
(ITransform*)&mul9_36,
(ITransform*)&xor_38,
(ITransform*)&xor_28,
(ITransform*)&add_57,
};
}

@ -67,12 +67,12 @@ namespace RandomX {
void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rax" << std::endl;
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
asmCode << "\tcall rx_read" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rax" << std::endl;
if (instr.loca & 3) {
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
}

@ -46,7 +46,7 @@ namespace RandomX {
return keys;
}
const uint8_t* getCache() {
const uint8_t* getCache() const {
return memory;
}
private:

@ -25,15 +25,16 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
CompiledVirtualMachine::CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {
CompiledVirtualMachine::CompiledVirtualMachine() {
totalSize = 0;
}
void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
if (lightClient) {
throw std::runtime_error("Compiled VM does not support light-client mode");
}
VirtualMachine::setDataset(ds, lightClient);
void CompiledVirtualMachine::setDataset(dataset_t ds) {
mem.ds = ds;
}
void CompiledVirtualMachine::initializeScratchpad(uint32_t index) {
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
}
void CompiledVirtualMachine::initializeProgram(const void* seed) {

@ -37,8 +37,9 @@ namespace RandomX {
void operator delete(void* ptr) {
_mm_free(ptr);
}
CompiledVirtualMachine(bool softAes);
void setDataset(dataset_t ds, bool light = false) override;
CompiledVirtualMachine();
void setDataset(dataset_t ds) override;
void initializeScratchpad(uint32_t index) override;
void initializeProgram(const void* seed) override;
virtual void execute() override;
void* getProgram() {

@ -21,11 +21,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "InterpretedVirtualMachine.hpp"
#include "Pcg32.hpp"
#include "instructions.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
#include "LightClientAsyncWorker.hpp"
#include <iostream>
#include <iomanip>
#include <stdexcept>
#include <sstream>
#include <cmath>
#include <thread>
#ifdef STATS
#include <algorithm>
#endif
@ -38,6 +42,57 @@ constexpr bool fpuCheck = false;
namespace RandomX {
InterpretedVirtualMachine::~InterpretedVirtualMachine() {
if (asyncWorker) {
delete mem.ds.asyncWorker;
}
}
void InterpretedVirtualMachine::setDataset(dataset_t ds) {
if (asyncWorker) {
if (softAes) {
mem.ds.asyncWorker = new LightClientAsyncWorker<true>(ds.cache);
}
else {
mem.ds.asyncWorker = new LightClientAsyncWorker<false>(ds.cache);
}
readDataset = &datasetReadLightAsync;
}
else {
mem.ds = ds;
if (softAes) {
readDataset = &datasetReadLight<true>;
}
else {
readDataset = &datasetReadLight<false>;
}
}
}
void InterpretedVirtualMachine::initializeScratchpad(uint32_t index) {
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
if (asyncWorker) {
ILightClientAsyncWorker* worker = mem.ds.asyncWorker;
const uint32_t blocksPerThread = (ScratchpadSize / CacheLineSize) / 2;
worker->prepareBlocks(scratchpad, startingBlock, blocksPerThread); //async first half
worker->getBlocks(scratchpad + ScratchpadLength / 2, startingBlock + blocksPerThread, blocksPerThread); //sync second half
worker->sync();
}
else {
auto cache = mem.ds.cache;
if (softAes) {
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
}
}
else {
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
}
}
}
}
void InterpretedVirtualMachine::initializeProgram(const void* seed) {
Pcg32 gen(seed);
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
@ -50,6 +105,7 @@ namespace RandomX {
}
//std::cout << reg;
p.initialize(gen);
currentTransform = addressTransformations[gen.getUniform(0, TransformationCount - 1)];
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
mem.mx = *(((uint32_t*)seed) + 5);
pc = 0;
@ -74,61 +130,61 @@ namespace RandomX {
#endif
}
convertible_t InterpretedVirtualMachine::loada(Instruction& inst) {
convertible_t& rega = reg.r[inst.rega % RegistersCount];
rega.i64 ^= inst.addra; //sign-extend addra
convertible_t InterpretedVirtualMachine::loada(Instruction& instr) {
convertible_t& rega = reg.r[instr.rega % RegistersCount];
rega.i64 ^= instr.addra; //sign-extend addra
addr_t addr = rega.u32;
switch (inst.loca & 7)
{
case 0:
case 1:
case 2:
case 3:
return readDataset(addr, mem);
case 4:
return scratchpad[addr % ScratchpadL2];
if ((ic % 64) == 0) {
addr = currentTransform->apply(addr);
#ifdef STATS
datasetAccess[mem.ma / (DatasetBlockCount / 256) / CacheLineSize]++;
#endif
readDataset(addr, mem, reg);
}
case 5:
case 6:
case 7:
return scratchpad[addr % ScratchpadL1];
if ((instr.loca & 192) == 0) {
mem.mx ^= addr;
}
if (instr.loca & 3) {
return scratchpad[addr % ScratchpadL1];
}
else {
return scratchpad[addr % ScratchpadL2];
}
}
convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
switch (inst.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
return reg.r[inst.regb % RegistersCount];
case 6:
case 7:
convertible_t temp;
temp.i64 = inst.imm32; //sign-extend imm32
return temp;
convertible_t InterpretedVirtualMachine::loadbia(Instruction& instr) {
if (instr.locb & 3) {
return reg.r[instr.regb % RegistersCount];
}
else {
convertible_t temp;
temp.i64 = instr.imm32; //sign-extend imm32
return temp;
}
}
convertible_t InterpretedVirtualMachine::loadbr0(Instruction& inst) {
switch (inst.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
return reg.r[inst.regb % RegistersCount];
case 4:
case 5:
case 6:
case 7:
convertible_t temp;
temp.u64 = inst.imm8;
return temp;
convertible_t InterpretedVirtualMachine::loadbiashift(Instruction& instr) {
if (instr.locb & 1) {
return reg.r[instr.regb % RegistersCount];
}
else {
convertible_t temp;
temp.u64 = instr.imm8;
return temp;
}
}
convertible_t InterpretedVirtualMachine::loadbiadiv(Instruction& instr) {
if (instr.locb & 3) {
convertible_t temp;
temp.u64 = instr.imm32;
return temp;
}
else {
return reg.r[instr.regb % RegistersCount];
}
}
@ -174,26 +230,6 @@ namespace RandomX {
}
}
void InterpretedVirtualMachine::writecflo(Instruction& inst, fpu_reg_t& regc) {
addr_t addr;
switch (inst.locc & 7)
{
case 4:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
scratchpad[addr % ScratchpadL2] = regc.lo;
break;
case 5:
case 6:
case 7:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
scratchpad[addr % ScratchpadL1] = regc.lo;
default:
break;
}
}
#define ALU_RETIRE(x) x(a, b, c); \
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
@ -242,7 +278,7 @@ namespace RandomX {
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
convertible_t b = loadbr1(inst); \
convertible_t b = loadbia(inst); \
convertible_t& c = getcr(inst); \
ALU_RETIRE(x) \
}
@ -250,7 +286,15 @@ namespace RandomX {
#define ALU_INST_SR(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
convertible_t b = loadbr0(inst); \
convertible_t b = loadbiashift(inst); \
convertible_t& c = getcr(inst); \
ALU_RETIRE(x) \
}
#define ALU_INST_DIV(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
convertible_t b = loadbiadiv(inst); \
convertible_t& c = getcr(inst); \
ALU_RETIRE(x) \
}
@ -282,8 +326,8 @@ namespace RandomX {
ALU_INST(MUL_32)
ALU_INST(IMUL_32)
ALU_INST(IMULH_64)
ALU_INST(DIV_64)
ALU_INST(IDIV_64)
ALU_INST_DIV(DIV_64)
ALU_INST_DIV(IDIV_64)
ALU_INST(AND_64)
ALU_INST(AND_32)
ALU_INST(OR_64)
@ -301,42 +345,68 @@ namespace RandomX {
FPU_INST(FPSUB)
FPU_INST(FPMUL)
FPU_INST(FPDIV)
FPU_INST_NB(FPSQRT)
FPU_INST_NB(FPROUND)
void InterpretedVirtualMachine::h_FPROUND(Instruction& inst) {
convertible_t a = loada(inst);
convertible_t& c = getcr(inst);
c.u64 = a.u64;
if (trace) std::cout << std::hex << a.u64 << std::endl;
FPROUND(a, inst.imm8);
}
void InterpretedVirtualMachine::h_JUMP(Instruction& inst) {
convertible_t a = loada(inst);
convertible_t& c = getcr(inst);
c.u64 = a.u64;
if (trace) std::cout << std::hex << a.u64 << std::endl;
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
#ifdef STATS
count_JUMP_taken++;
count_jump_taken[inst.locb & 7]++;
#endif
pc += (inst.imm8 & 127) + 1;
pc = pc % ProgramLength;
}
#ifdef STATS
else {
count_JUMP_not_taken++;
count_jump_not_taken[inst.locb & 7]++;
}
#endif
}
void InterpretedVirtualMachine::h_CALL(Instruction& inst) {
convertible_t a = loada(inst);
convertible_t& c = getcr(inst);
c.u64 = a.u64;
if (trace) std::cout << std::hex << a.u64 << std::endl;
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
#ifdef STATS
count_CALL_taken++;
count_jump_taken[inst.locb & 7]++;
count_retdepth = std::max(0, count_retdepth - 1);
#endif
stackPush(a);
stackPush(pc);
#ifdef STATS
count_max_stack = std::max(count_max_stack, (int)stack.size());
#endif
pc += (inst.imm8 & 127) + 1;
pc = pc % ProgramLength;
if (trace) std::cout << std::hex << a.u64 << std::endl;
}
else {
convertible_t& c = getcr(inst);
#ifdef STATS
else {
count_CALL_not_taken++;
count_jump_not_taken[inst.locb & 7]++;
#endif
c.u64 = a.u64;
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
}
#endif
}
void InterpretedVirtualMachine::h_RET(Instruction& inst) {
convertible_t a = loada(inst);
convertible_t b = loadbr1(inst);
convertible_t& c = getcr(inst);
c.u64 = a.u64;
if (trace) std::cout << std::hex << a.u64 << std::endl;
if (stack.size() > 0) {
#ifdef STATS
count_RET_taken++;
@ -344,22 +414,13 @@ namespace RandomX {
count_retdepth_max = std::max(count_retdepth_max, count_retdepth);
#endif
auto raddr = stackPopAddress();
auto retval = stackPopValue();
c.u64 = a.u64 ^ retval.u64;
pc = raddr;
}
else {
#ifdef STATS
if (stack.size() == 0)
count_RET_stack_empty++;
else {
count_RET_not_taken++;
count_jump_not_taken[inst.locb & 7]++;
}
#endif
c.u64 = a.u64;
else {
count_RET_stack_empty++;
}
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
#endif
}
#include "instructionWeights.hpp"
@ -394,6 +455,7 @@ namespace RandomX {
INST_HANDLE(FPDIV)
INST_HANDLE(FPSQRT)
INST_HANDLE(FPROUND)
INST_HANDLE(JUMP)
INST_HANDLE(CALL)
INST_HANDLE(RET)
};

@ -25,23 +25,37 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
class ITransform {
public:
virtual int32_t apply(int32_t) const = 0;
virtual const char* getName() const = 0;
virtual std::ostream& printAsm(std::ostream&) const = 0;
virtual std::ostream& printCxx(std::ostream&) const = 0;
};
class InterpretedVirtualMachine;
typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&);
class InterpretedVirtualMachine : public VirtualMachine {
public:
InterpretedVirtualMachine(bool softAes) : VirtualMachine(softAes) {}
virtual void initializeProgram(const void* seed) override;
virtual void execute() override;
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
~InterpretedVirtualMachine();
void setDataset(dataset_t ds) override;
void initializeScratchpad(uint32_t index) override;
void initializeProgram(const void* seed) override;
void execute() override;
const Program& getProgam() {
return p;
}
private:
static InstructionHandler engine[256];
static const ITransform* addressTransformations[TransformationCount];
bool softAes, asyncWorker;
Program p;
std::vector<convertible_t> stack;
uint64_t pc, ic;
const ITransform* currentTransform;
#ifdef STATS
int count_ADD_64 = 0;
int count_ADD_32 = 0;
@ -71,11 +85,12 @@ namespace RandomX {
int count_FPDIV = 0;
int count_FPSQRT = 0;
int count_FPROUND = 0;
int count_JUMP_taken = 0;
int count_JUMP_not_taken = 0;
int count_CALL_taken = 0;
int count_CALL_not_taken = 0;
int count_RET_stack_empty = 0;
int count_RET_taken = 0;
int count_RET_not_taken = 0;
int count_jump_taken[8] = { 0 };
int count_jump_not_taken[8] = { 0 };
int count_max_stack = 0;
@ -89,14 +104,15 @@ namespace RandomX {
int count_FPSUB_nop2 = 0;
int count_FPMUL_nop = 0;
int count_FPMUL_nop2 = 0;
int datasetAccess[256] = { 0 };
#endif
convertible_t loada(Instruction&);
convertible_t loadbr0(Instruction&);
convertible_t loadbr1(Instruction&);
convertible_t loadbiashift(Instruction&);
convertible_t loadbiadiv(Instruction&);
convertible_t loadbia(Instruction&);
convertible_t& getcr(Instruction&);
void writecf(Instruction&, fpu_reg_t&);
void writecflo(Instruction&, fpu_reg_t&);
void stackPush(convertible_t& c) {
stack.push_back(c);
@ -148,6 +164,7 @@ namespace RandomX {
void h_FPDIV(Instruction&);
void h_FPSQRT(Instruction&);
void h_FPROUND(Instruction&);
void h_JUMP(Instruction&);
void h_CALL(Instruction&);
void h_RET(Instruction&);
};

@ -170,13 +170,13 @@ namespace RandomX {
emit(instr.addra);
emit(uint16_t(0x8b41)); //mov
emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
emit(0x753fc3f6); //test bl,0x3f; jne
emit(uint16_t(0xe805));
emit(readDatasetOffset - (codePos + 4));
if ((instr.loca & 192) == 0) { //A.LOC.X
emit(uint16_t(0x3348));
emitByte(0xe8); //xor rbp, rax
}
emit(0x753fc3f6); //test bl,0x3f; jne
emit(uint16_t(0xe805));
emit(readDatasetOffset - (codePos + 4));
emitByte(0x25); //and eax,
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad

@ -0,0 +1,94 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "LightClientAsyncWorker.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
namespace RandomX {
template<bool softAes>
LightClientAsyncWorker<softAes>::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), workerThread(&LightClientAsyncWorker::runWorker, this) {
}
template<bool softAes>
void LightClientAsyncWorker<softAes>::prepareBlock(addr_t addr) {
{
std::lock_guard<std::mutex> lk(mutex);
startBlock = addr / CacheLineSize;
blockCount = 1;
output = currentLine.data();
hasWork = true;
}
notifier.notify_all();
}
template<bool softAes>
const uint64_t* LightClientAsyncWorker<softAes>::getBlock(addr_t addr) {
uint32_t currentBlock = addr / CacheLineSize;
if (currentBlock != startBlock || output != currentLine.data()) {
initBlock<softAes>(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock, cache->getKeys());
}
else {
sync();
}
return currentLine.data();
}
template<bool softAes>
void LightClientAsyncWorker<softAes>::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
{
std::lock_guard<std::mutex> lk(mutex);
startBlock = startBlock;
blockCount = blockCount;
output = out;
hasWork = true;
}
notifier.notify_all();
}
template<bool softAes>
void LightClientAsyncWorker<softAes>::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = 0; i < blockCount; ++i) {
initBlock<softAes>(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i, cache->getKeys());
}
}
template<bool softAes>
void LightClientAsyncWorker<softAes>::sync() {
std::unique_lock<std::mutex> lk(mutex);
notifier.wait(lk, [this] { return !hasWork; });
}
template<bool softAes>
void LightClientAsyncWorker<softAes>::runWorker() {
for (;;) {
std::unique_lock<std::mutex> lk(mutex);
notifier.wait(lk, [this] { return hasWork; });
getBlocks(output, startBlock, blockCount);
hasWork = false;
lk.unlock();
notifier.notify_all();
}
}
template class LightClientAsyncWorker<true>;
template class LightClientAsyncWorker<false>;
}

@ -0,0 +1,52 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "common.hpp"
#include <thread>
#include <mutex>
#include <condition_variable>
#include <array>
namespace RandomX {
class Cache;
using DatasetLine = std::array<uint64_t, CacheLineSize / sizeof(uint64_t)>;
template<bool softAes>
class LightClientAsyncWorker : public ILightClientAsyncWorker {
public:
LightClientAsyncWorker(const Cache*);
void prepareBlock(addr_t) final;
void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
const uint64_t* getBlock(addr_t) final;
void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
void sync() final;
private:
void runWorker();
std::condition_variable notifier;
std::mutex mutex;
DatasetLine currentLine;
void* output;
uint32_t startBlock, blockCount;
bool hasWork;
std::thread workerThread;
};
}

@ -19,8 +19,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "VirtualMachine.hpp"
#include "common.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
#include "t1ha/t1ha.h"
#include "blake2/blake2.h"
#include <cstring>
@ -37,62 +35,10 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
namespace RandomX {
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
VirtualMachine::VirtualMachine() {
mem.ds.dataset = nullptr;
}
VirtualMachine::~VirtualMachine() {
if (lightClient) {
delete mem.ds.lightDataset->block;
delete mem.ds.lightDataset;
}
}
void VirtualMachine::setDataset(dataset_t ds, bool light) {
if (mem.ds.dataset != nullptr) {
throw std::runtime_error("Dataset is already initialized");
}
lightClient = light;
if (light) {
auto lds = mem.ds.lightDataset = new LightClientDataset();
lds->cache = ds.cache;
//lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
lds->blockNumber = -1;
if (lds->block == nullptr) {
throw std::bad_alloc();
}
if (softAes) {
readDataset = &datasetReadLight<true>;
}
else {
readDataset = &datasetReadLight<false>;
}
}
else {
mem.ds = ds;
readDataset = &datasetRead;
}
}
void VirtualMachine::initializeScratchpad(uint32_t index) {
if (lightClient) {
auto cache = mem.ds.lightDataset->cache;
if (softAes) {
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
}
}
else {
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
}
}
}
else {
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
}
}
void VirtualMachine::getResult(void* out) {
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 2;
uint64_t smallState[smallStateLength];

@ -25,10 +25,10 @@ namespace RandomX {
class VirtualMachine {
public:
VirtualMachine(bool softAes);
virtual ~VirtualMachine();
virtual void setDataset(dataset_t ds, bool light = false);
void initializeScratchpad(uint32_t index);
VirtualMachine();
virtual ~VirtualMachine() {}
virtual void setDataset(dataset_t ds) = 0;
virtual void initializeScratchpad(uint32_t index) = 0;
virtual void initializeProgram(const void* seed) = 0;
virtual void execute() = 0;
void getResult(void*);
@ -36,7 +36,6 @@ namespace RandomX {
return reg;
}
protected:
bool softAes, lightClient;
DatasetReadFunc readDataset;
alignas(16) RegisterFile reg;
MemoryRegisters mem;

@ -38,7 +38,7 @@ namespace RandomX {
constexpr int CacheLineSize = 64;
constexpr int BlockExpansionRatio = 64;
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
constexpr int DatasetIterations = 64;
constexpr int DatasetIterations = 16;
constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
@ -86,16 +86,25 @@ namespace RandomX {
return i % RandomX::ProgramLength;
}
struct LightClientDataset {
Cache* cache;
uint8_t* block;
uint32_t blockNumber;
class ILightClientAsyncWorker {
public:
virtual void prepareBlock(addr_t) = 0;
virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
virtual const uint64_t* getBlock(addr_t) = 0;
virtual void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
virtual void sync() = 0;
const Cache* getCache() {
return cache;
}
protected:
ILightClientAsyncWorker(const Cache* c) : cache(c) {}
const Cache* cache;
};
union dataset_t {
uint8_t* dataset;
Cache* cache;
LightClientDataset* lightDataset;
ILightClientAsyncWorker* asyncWorker;
};
struct MemoryRegisters {
@ -112,7 +121,7 @@ namespace RandomX {
static_assert(sizeof(RegisterFile) == 3 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile");
typedef convertible_t(*DatasetReadFunc)(addr_t, MemoryRegisters&);
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*);

@ -30,7 +30,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#if defined(__SSE2__)
#include <wmmintrin.h>
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0)
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_NTA)
#else
#define PREFETCH(memory)
#endif
@ -106,32 +106,44 @@ namespace RandomX {
template
void initBlock<false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
convertible_t data;
data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma);
memory.ma += 8;
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset + memory.ma);
memory.mx ^= addr;
if ((memory.mx & 0xFFF8) == 0) {
memory.ma = memory.mx & ~7;
PREFETCH(memory);
}
return data;
memory.mx &= -64; //align to cache line
std::swap(memory.mx, memory.ma);
PREFETCH(memory);
for (int i = 0; i < RegistersCount; ++i)
reg.r[i].u64 ^= datasetLine[i];
}
template<bool softAes>
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
convertible_t data;
LightClientDataset* lds = memory.ds.lightDataset;
auto blockNumber = memory.ma / CacheLineSize;
return data;
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
Cache* cache = memory.ds.cache;
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
initBlock<softAes>(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
for (int i = 0; i < RegistersCount; ++i)
reg.r[i].u64 ^= datasetLine[i];
memory.mx ^= addr;
memory.mx &= -64; //align to cache line
std::swap(memory.mx, memory.ma);
}
template
convertible_t datasetReadLight<false>(addr_t addr, MemoryRegisters& memory);
void datasetReadLight<false>(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
template
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
void datasetReadLight<true>(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
ILightClientAsyncWorker* aw = memory.ds.asyncWorker;
const uint64_t* datasetLine = aw->getBlock(memory.ma);
for (int i = 0; i < RegistersCount; ++i)
reg.r[i].u64 ^= datasetLine[i];
memory.mx ^= addr;
memory.mx &= -64; //align to cache line
std::swap(memory.mx, memory.ma);
aw->prepareBlock(memory.ma);
}
void datasetAlloc(dataset_t& ds, bool largePages) {
if (sizeof(size_t) <= 4)

@ -40,12 +40,14 @@ namespace RandomX {
template<bool softAes>
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
template<bool softAes>
void datasetInitCache(const void* seed, dataset_t& dataset);
template<bool softAes>
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile&);
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
}

@ -22,12 +22,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
//Clears the 11 least-significant bits before conversion. This is done so the number
//fits exactly into the 52-bit mantissa without rounding.
inline double convertSigned52(int64_t x) {
return (double)(x & -2048L);
}
extern "C" {
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c);
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c);
@ -53,11 +47,11 @@ namespace RandomX {
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
bool JMP_COND(uint8_t, convertible_t&, int32_t);
void FPINIT();
void FPROUND(convertible_t, uint8_t);
void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
}
}

@ -370,9 +370,9 @@ namespace RandomX {
#endif
}
void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
c.lo.f64 = convertSigned52(a.i64);
switch (a.u64 & 3) {
void FPROUND(convertible_t a, uint8_t rot) {
uint64_t flag = ror64(a.u64, rot);
switch (flag & 3) {
case RoundDown:
#ifdef DEBUG
std::cout << "Round FE_DOWNWARD (" << FE_DOWNWARD << ") = " <<

@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
}
int main(int argc, char** argv) {
bool softAes, lightClient, genAsm, compiled, help, largePages;
bool softAes, lightClient, genAsm, compiled, help, largePages, async;
int programCount, threadCount;
readOption("--help", argc, argv, help);
@ -178,6 +178,7 @@ int main(int argc, char** argv) {
readIntOption("--threads", argc, argv, threadCount, 1);
readIntOption("--nonces", argc, argv, programCount, 1000);
readOption("--largePages", argc, argv, largePages);
readOption("--async", argc, argv, async);
if (genAsm) {
generateAsm(programCount);
@ -250,12 +251,12 @@ int main(int argc, char** argv) {
for (int i = 0; i < threadCount; ++i) {
RandomX::VirtualMachine* vm;
if (compiled) {
vm = new RandomX::CompiledVirtualMachine(softAes);
vm = new RandomX::CompiledVirtualMachine();
}
else {
vm = new RandomX::InterpretedVirtualMachine(softAes);
vm = new RandomX::InterpretedVirtualMachine(softAes, async);
}
vm->setDataset(dataset, lightClient);
vm->setDataset(dataset);
vms.push_back(vm);
}
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
@ -278,7 +279,12 @@ int main(int argc, char** argv) {
result.print(std::cout);
if(programCount == 1000)
std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl;
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
if (lightClient) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per program" << std::endl;
}
else {
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
}
}
catch (std::exception& e) {
std::cout << "ERROR: " << e.what() << std::endl;

Loading…
Cancel
Save