- void generateCode(P& prog) {
- for (unsigned i = 0; i < prog.getSize(); ++i) {
- Instruction& instr = prog(i);
- instr.src %= RegistersCount;
- instr.dst %= RegistersCount;
- generateCode(instr, i);
- }
- }
-
- void generateDatasetInitCode();
-
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&);
int getConditionRegister();
@@ -84,8 +72,8 @@ namespace RandomX {
void handleCondition(Instruction&, int);
- template
void generateCode(Instruction&, int);
+ void generateSuperscalarCode(Instruction &, std::vector &);
void emitByte(uint8_t val) {
code[codePos] = val;
diff --git a/src/Program.hpp b/src/Program.hpp
index 854a557..c7ba0f9 100644
--- a/src/Program.hpp
+++ b/src/Program.hpp
@@ -25,7 +25,7 @@ along with RandomX. If not, see.
#include "Instruction.hpp"
#include "blake2/endian.h"
-namespace RandomX {
+namespace randomx {
struct ProgramConfiguration {
uint64_t eMask[2];
@@ -59,46 +59,4 @@ namespace RandomX {
};
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
-
- class SuperscalarProgram {
- public:
- Instruction& operator()(int pc) {
- return programBuffer[pc];
- }
- friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
- p.print(os);
- return os;
- }
- uint32_t getSize() {
- return size;
- }
- void setSize(uint32_t val) {
- size = val;
- }
- int getAddressRegister() {
- return addrReg;
- }
- void setAddressRegister(uint32_t val) {
- addrReg = val;
- }
- double ipc;
- int codeSize;
- int macroOps;
- int decodeCycles;
- int cpuLatency;
- int asicLatency;
- int mulCount;
- int cpuLatencies[8];
- int asicLatencies[8];
- private:
- void print(std::ostream& os) const {
- for (unsigned i = 0; i < size; ++i) {
- auto instr = programBuffer[i];
- os << instr;
- }
- }
- Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE];
- uint32_t size;
- int addrReg;
- };
}
diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp
index 4af0374..05bf79b 100644
--- a/src/VirtualMachine.cpp
+++ b/src/VirtualMachine.cpp
@@ -24,9 +24,60 @@ along with RandomX. If not, see.
#include
#include
#include "intrinPortable.h"
+#include "allocator.hpp"
-std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
- for (int i = 0; i < RandomX::RegistersCount; ++i)
+randomx_vm::~randomx_vm() {
+
+}
+
+void randomx_vm::resetRoundingMode() {
+ initFpu();
+}
+
+constexpr int mantissaSize = 52;
+constexpr int exponentSize = 11;
+constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
+constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
+constexpr int exponentBias = 1023;
+
+static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
+ auto exponent = entropy >> 59; //0..31
+ auto mantissa = entropy & mantissaMask;
+ exponent += exponentBias;
+ exponent &= exponentMask;
+ exponent <<= mantissaSize;
+ return exponent | mantissa;
+}
+
+void randomx_vm::initialize() {
+ store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0)));
+ store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1)));
+ store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2)));
+ store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3)));
+ store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4)));
+ store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5)));
+ store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6)));
+ store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7)));
+ mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask;
+ mem.mx = program.getEntropy(10);
+ auto addressRegisters = program.getEntropy(12);
+ config.readReg0 = 0 + (addressRegisters & 1);
+ addressRegisters >>= 1;
+ config.readReg1 = 2 + (addressRegisters & 1);
+ addressRegisters >>= 1;
+ config.readReg2 = 4 + (addressRegisters & 1);
+ addressRegisters >>= 1;
+ config.readReg3 = 6 + (addressRegisters & 1);
+ //datasetBase = program.getEntropy(13) % datasetRange;
+ constexpr uint64_t mask22bit = (1ULL << 22) - 1;
+ constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
+ store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
+ store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
+}
+
+//TODO
+std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) {
+ for (int i = 0; i < randomx::RegistersCount; ++i)
os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
for (int i = 0; i < 4; ++i)
os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
@@ -40,66 +91,32 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
return os;
}
-namespace RandomX {
-
- constexpr int mantissaSize = 52;
- constexpr int exponentSize = 11;
- constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
- constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
- constexpr int exponentBias = 1023;
-
- static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
- auto exponent = entropy >> 59; //0..31
- auto mantissa = entropy & mantissaMask;
- exponent += exponentBias;
- exponent &= exponentMask;
- exponent <<= mantissaSize;
- return exponent | mantissa;
- }
+namespace randomx {
- VirtualMachine::VirtualMachine() {
- mem.ds.dataset.memory = nullptr;
+ template
+ VmBase::~VmBase() {
+ Allocator::freeMemory(scratchpad, ScratchpadSize);
}
- void VirtualMachine::resetRoundingMode() {
- initFpu();
+ template
+ bool VmBase::allocate() {
+ scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize);
+ return scratchpad != nullptr;
}
- void VirtualMachine::initialize() {
- store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0)));
- store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1)));
- store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2)));
- store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3)));
- store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4)));
- store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5)));
- store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6)));
- store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7)));
- mem.ma = program.getEntropy(8) & CacheLineAlignMask;
- mem.mx = program.getEntropy(10);
- auto addressRegisters = program.getEntropy(12);
- config.readReg0 = 0 + (addressRegisters & 1);
- addressRegisters >>= 1;
- config.readReg1 = 2 + (addressRegisters & 1);
- addressRegisters >>= 1;
- config.readReg2 = 4 + (addressRegisters & 1);
- addressRegisters >>= 1;
- config.readReg3 = 6 + (addressRegisters & 1);
- datasetBase = program.getEntropy(13) % datasetRange;
- constexpr uint64_t mask22bit = (1ULL << 22) - 1;
- constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
- store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
- store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
+ template
+ void VmBase::generate(void* seed, void* buffer, size_t bufferSize) {
+ fillAes1Rx4(seed, bufferSize, buffer);
}
- template
- void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash) {
- if (scratchpadSize > 0) {
- hashAes1Rx4(scratchpad, scratchpadSize, ®.a);
- }
- blake2b(outHash, ResultSize, ®, sizeof(RegisterFile), nullptr, 0);
+ template
+ void VmBase::getFinalResult(void* out, size_t outSize) {
+ hashAes1Rx4(scratchpad, ScratchpadSize, ®.a);
+ blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
}
- template void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash);
- template void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash);
-
+ template class VmBase, false>;
+ template class VmBase, true>;
+ template class VmBase;
+ template class VmBase;
}
\ No newline at end of file
diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp
index b8382f6..15aa6b9 100644
--- a/src/VirtualMachine.hpp
+++ b/src/VirtualMachine.hpp
@@ -18,38 +18,40 @@ along with RandomX. If not, see.
*/
#pragma once
+
#include
#include "common.hpp"
+#include "dataset.hpp"
#include "Program.hpp"
-namespace RandomX {
+/* Global namespace for C binding */
+struct randomx_vm {
+ virtual ~randomx_vm() = 0;
+ virtual bool allocate() = 0;
+ virtual void generate(void* seed, void* buffer, size_t bufferSize) = 0;
+ void resetRoundingMode();
+ virtual void initialize();
+ virtual void execute() = 0;
+ virtual void getFinalResult(void* out, size_t outSize) = 0;
+ virtual void setDataset(randomx_dataset* dataset) { }
+ virtual void setCache(randomx_cache* cache) { }
+
+ alignas(64) randomx::Program program;
+ alignas(64) randomx::RegisterFile reg;
+ alignas(16) randomx::ProgramConfiguration config;
+ randomx::MemoryRegisters mem;
+ uint8_t* scratchpad;
+};
- class VirtualMachine {
+namespace randomx {
+
+ template
+ class VmBase : public randomx_vm {
public:
- VirtualMachine();
- virtual ~VirtualMachine() {}
- virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
- void setScratchpad(void* ptr) {
- scratchpad = (uint8_t*)ptr;
- }
- void resetRoundingMode();
- virtual void initialize();
- virtual void execute() = 0;
- template
- void getResult(void* scratchpad, size_t scratchpadSize, void* outHash);
- const RegisterFile& getRegisterFile() {
- return reg;
- }
- Program* getProgramBuffer() {
- return &program;
- }
- protected:
- alignas(64) Program program;
- alignas(64) RegisterFile reg;
- alignas(16) ProgramConfiguration config;
- MemoryRegisters mem;
- uint8_t* scratchpad;
- uint32_t datasetRange;
- uint32_t datasetBase;
+ ~VmBase() override;
+ bool allocate() override;
+ void generate(void* seed, void* buffer, size_t bufferSize) override;
+ void getFinalResult(void* out, size_t outSize) override;
};
+
}
\ No newline at end of file
diff --git a/src/allocator.cpp b/src/allocator.cpp
new file mode 100644
index 0000000..f5a9a06
--- /dev/null
+++ b/src/allocator.cpp
@@ -0,0 +1,52 @@
+/*
+Copyright (c) 2019 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#pragma once
+
+#include "allocator.hpp"
+#include "virtualMemory.hpp"
+#include "intrinPortable.h"
+#include "common.hpp"
+
+namespace randomx {
+
+ template
+ void* AlignedAllocator::allocMemory(size_t count) {
+ return _mm_malloc(count, alignment);
+ }
+
+ template
+ void AlignedAllocator::freeMemory(void* ptr, size_t count) {
+ _mm_free(ptr);
+ }
+
+ template void* AlignedAllocator::allocMemory(size_t count);
+ template void AlignedAllocator::freeMemory(void* ptr, size_t count);
+ template void* AlignedAllocator::allocMemory(size_t count);
+ template void AlignedAllocator::freeMemory(void* ptr, size_t count);
+
+ void* LargePageAllocator::allocMemory(size_t count) {
+ return allocLargePagesMemory(count);
+ }
+
+ void LargePageAllocator::freeMemory(void* ptr, size_t count) {
+ freePagedMemory(ptr, count);
+ };
+
+}
\ No newline at end of file
diff --git a/src/allocator.hpp b/src/allocator.hpp
new file mode 100644
index 0000000..8d7402c
--- /dev/null
+++ b/src/allocator.hpp
@@ -0,0 +1,37 @@
+/*
+Copyright (c) 2019 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#pragma once
+
+#include
+
+namespace randomx {
+
+ template
+ struct AlignedAllocator {
+ static void* allocMemory(size_t);
+ static void freeMemory(void*, size_t);
+ };
+
+ struct LargePageAllocator {
+ static void* allocMemory(size_t);
+ static void freeMemory(void*, size_t);
+ };
+
+}
\ No newline at end of file
diff --git a/src/common.hpp b/src/common.hpp
index ade8abc..4de76b2 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -23,8 +23,9 @@ along with RandomX. If not, see.
#include
#include "blake2/endian.h"
#include "configuration.h"
+#include "randomx.h"
-namespace RandomX {
+namespace randomx {
static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2.");
@@ -58,6 +59,7 @@ namespace RandomX {
constexpr int ArgonBlockSize = 1024;
constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
constexpr int CacheLineSize = 64;
+ constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1);
constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024;
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
@@ -98,39 +100,9 @@ namespace RandomX {
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
constexpr int RegisterNeedsSib = 4; //x86 r12 register
- struct Cache {
- uint8_t* memory;
- uint64_t size;
- };
-
- struct Dataset : public Cache {
- };
-
- class ILightClientAsyncWorker {
- public:
- virtual ~ILightClientAsyncWorker() {}
- virtual void prepareBlock(addr_t) = 0;
- virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
- virtual const uint64_t* getBlock(addr_t) = 0;
- virtual void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
- virtual void sync() = 0;
- const Cache& getCache() {
- return cache;
- }
- protected:
- ILightClientAsyncWorker(const Cache& c) : cache(c) {}
- const Cache& cache;
- };
-
- union dataset_t {
- Dataset dataset;
- Cache cache;
- ILightClientAsyncWorker* asyncWorker;
- };
-
struct MemoryRegisters {
addr_t mx, ma;
- dataset_t ds;
+ uint8_t* memory = nullptr;
};
struct RegisterFile {
@@ -141,9 +113,8 @@ namespace RandomX {
};
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]);
-
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
- typedef void(*DatasetInitFunc)(uint8_t* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
+ typedef void(*DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
}
-std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf);
+std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf);
diff --git a/src/dataset.cpp b/src/dataset.cpp
index 40e72b1..e632547 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -22,14 +22,17 @@ along with RandomX. If not, see.
#include
#include
#include
+#include
#include "common.hpp"
#include "dataset.hpp"
-#include "Cache.hpp"
#include "virtualMemory.hpp"
-#include "softAes.h"
-#include "squareHash.h"
+#include "superscalarGenerator.hpp"
+#include "Blake2Generator.hpp"
+#include "reciprocal.h"
#include "blake2/endian.h"
+#include "argon2.h"
+#include "argon2_core.h"
#if defined(__SSE2__)
#include
@@ -38,113 +41,174 @@ along with RandomX. If not, see.
#define PREFETCH(memory)
#endif
-namespace RandomX {
+randomx_dataset::~randomx_dataset() {
-#if true //RANDOMX_ARGON_GROWTH != 0 || (!defined(_M_X64) && !defined(__x86_64__))
- static FORCE_INLINE uint8_t* selectMixBlock(const Cache& cache, uint64_t& currentIndex, uint64_t& nextIndex) {
- uint8_t* mixBlock;
- if (RANDOMX_ARGON_GROWTH == 0) {
- constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
- mixBlock = cache.memory + (currentIndex & mask) * CacheLineSize;
- }
- else {
- const uint32_t modulus = cache.size / CacheLineSize;
- mixBlock = cache.memory + (currentIndex % modulus) * CacheLineSize;
- }
- PREFETCHNTA(mixBlock);
- nextIndex = squareHash(currentIndex + nextIndex);
- return mixBlock;
- }
+}
- static FORCE_INLINE void mixCache(uint8_t* mixBlock, uint64_t& c0, uint64_t& c1, uint64_t& c2, uint64_t& c3, uint64_t& c4, uint64_t& c5, uint64_t& c6, uint64_t& c7) {
- c0 ^= load64(mixBlock + 0);
- c1 ^= load64(mixBlock + 8);
- c2 ^= load64(mixBlock + 16);
- c3 ^= load64(mixBlock + 24);
- c4 ^= load64(mixBlock + 32);
- c5 ^= load64(mixBlock + 40);
- c6 ^= load64(mixBlock + 48);
- c7 ^= load64(mixBlock + 56);
+static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
+
+void randomx_cache::initialize(const void *seed, size_t seedSize) {
+ uint32_t memory_blocks, segment_length;
+ argon2_instance_t instance;
+ argon2_context context;
+
+ context.out = nullptr;
+ context.outlen = 0;
+ context.pwd = CONST_CAST(uint8_t *)seed;
+ context.pwdlen = (uint32_t)seedSize;
+ context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
+ context.saltlen = (uint32_t)randomx::ArgonSaltSize;
+ context.secret = NULL;
+ context.secretlen = 0;
+ context.ad = NULL;
+ context.adlen = 0;
+ context.t_cost = RANDOMX_ARGON_ITERATIONS;
+ context.m_cost = RANDOMX_ARGON_MEMORY;
+ context.lanes = RANDOMX_ARGON_LANES;
+ context.threads = 1;
+ context.allocate_cbk = NULL;
+ context.free_cbk = NULL;
+ context.flags = ARGON2_DEFAULT_FLAGS;
+ context.version = ARGON2_VERSION_NUMBER;
+
+ /* 2. Align memory size */
+ /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
+ memory_blocks = context.m_cost;
+
+ segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
+
+ instance.version = context.version;
+ instance.memory = NULL;
+ instance.passes = context.t_cost;
+ instance.memory_blocks = memory_blocks;
+ instance.segment_length = segment_length;
+ instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
+ instance.lanes = context.lanes;
+ instance.threads = context.threads;
+ instance.type = Argon2_d;
+ instance.memory = (block*)memory;
+
+ if (instance.threads > instance.lanes) {
+ instance.threads = instance.lanes;
}
- void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations) {
- uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
-
- c0 = blockNumber;
- c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
-
- uint8_t* mixBlock;
-
- for (auto i = 0; i < iterations; ++i) {
- mixBlock = selectMixBlock(cache, c0, c1);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
-
- mixBlock = selectMixBlock(cache, c1, c2);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
+ /* 3. Initialization: Hashing inputs, allocating memory, filling first
+ * blocks
+ */
+ argon_initialize(&instance, &context);
+
+ fill_memory_blocks(&instance);
+
+ reciprocalCache.clear();
+ randomx::Blake2Generator gen(seed, 1000);
+ for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+ randomx::generateSuperscalar(programs[i], gen);
+ for (unsigned j = 0; j < programs[i].getSize(); ++j) {
+ auto& instr = programs[i](j);
+ if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) {
+ auto rcp = reciprocal(instr.getImm32());
+ instr.setImm32(reciprocalCache.size());
+ reciprocalCache.push_back(rcp);
+ }
+ }
+ }
+}
- mixBlock = selectMixBlock(cache, c2, c3);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
+namespace randomx {
- mixBlock = selectMixBlock(cache, c3, c4);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
+ template
+ bool Dataset::allocate() {
+ memory = (uint8_t*)Allocator::allocMemory(RANDOMX_DATASET_SIZE);
+ return true;
+ }
- mixBlock = selectMixBlock(cache, c4, c5);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
+ template
+ Dataset::~Dataset() {
+ Allocator::freeMemory(memory, RANDOMX_DATASET_SIZE);
+ }
- mixBlock = selectMixBlock(cache, c5, c6);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
+ template
+ bool Cache::allocate() {
+ memory = (uint8_t*)Allocator::allocMemory(RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE);
+ return true;
+ }
- mixBlock = selectMixBlock(cache, c6, c7);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
+ template
+ Cache::~Cache() {
+ Allocator::freeMemory(memory, RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE);
+ }
- mixBlock = selectMixBlock(cache, c7, c0);
- mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
- }
+ template
+ DatasetInitFunc Cache::getInitFunc() {
+ return &initDataset;
+ }
- store64(out + 0, c0);
- store64(out + 8, c1);
- store64(out + 16, c2);
- store64(out + 24, c3);
- store64(out + 32, c4);
- store64(out + 40, c5);
- store64(out + 48, c6);
- store64(out + 56, c7);
+ template
+ DatasetInitFunc CacheWithJit::getInitFunc() {
+ return jit.getDatasetInitFunc();
}
-#endif
- void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
- uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset.memory + memory.ma);
- memory.mx ^= addr;
- memory.mx &= -64; //align to cache line
- std::swap(memory.mx, memory.ma);
- PREFETCHNTA(memory.ds.dataset.memory + memory.ma);
- for (int i = 0; i < RegistersCount; ++i)
- reg.r[i] ^= datasetLine[i];
+ template
+ void CacheWithJit::initialize(const void *seed, size_t seedSize) {
+ randomx_cache::initialize(seed, seedSize);
+ jit.generateSuperscalarHash(programs, reciprocalCache);
+ jit.generateDatasetInitCode();
}
- void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t (®)[RegistersCount]) {
- memory.mx ^= addr;
- memory.mx &= CacheLineAlignMask; //align to cache line
- Cache& cache = memory.ds.cache;
- uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
- initBlock(cache, (uint8_t*)datasetLine, memory.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
- for (int i = 0; i < RegistersCount; ++i)
- reg[i] ^= datasetLine[i];
- std::swap(memory.mx, memory.ma);
+ template class Dataset>;
+ template class Dataset;
+ template class Cache>;
+ template class Cache;
+ template class CacheWithJit>;
+ template class CacheWithJit;
+
+ constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
+ constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL;
+ constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
+ constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
+ constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
+ constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
+ constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
+ constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
+
+ static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
+ constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
+ return memory + (registerValue & mask) * CacheLineSize;
}
- void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]) {
- ILightClientAsyncWorker* aw = memory.ds.asyncWorker;
- const uint64_t* datasetLine = aw->getBlock(memory.ma);
- for (int i = 0; i < RegistersCount; ++i)
- reg[i] ^= datasetLine[i];
- memory.mx ^= addr;
- memory.mx &= CacheLineAlignMask; //align to cache line
- std::swap(memory.mx, memory.ma);
- aw->prepareBlock(memory.ma);
+ void initDatasetBlock(randomx_cache* cache, uint8_t* out, uint64_t blockNumber) {
+ int_reg_t rl[8];
+ uint8_t* mixBlock;
+ uint64_t registerValue = blockNumber;
+ rl[0] = (blockNumber + 1) * superscalarMul0;
+ rl[1] = rl[0] ^ superscalarAdd1;
+ rl[2] = rl[0] ^ superscalarAdd2;
+ rl[3] = rl[0] ^ superscalarAdd3;
+ rl[4] = rl[0] ^ superscalarAdd4;
+ rl[5] = rl[0] ^ superscalarAdd5;
+ rl[6] = rl[0] ^ superscalarAdd6;
+ rl[7] = rl[0] ^ superscalarAdd7;
+ for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+ mixBlock = getMixBlock(registerValue, cache->memory);
+ SuperscalarProgram& prog = cache->programs[i];
+
+ executeSuperscalar(rl, prog, &cache->reciprocalCache);
+
+ for (unsigned q = 0; q < 8; ++q)
+ rl[q] ^= load64(mixBlock + 8 * q);
+
+ registerValue = rl[prog.getAddressRegister()];
+ }
+
+ memcpy(out, &rl, CacheLineSize);
}
- void datasetAlloc(dataset_t& ds, bool largePages) {
+ void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock) {
+ for (uint32_t blockNumber = startBlock; blockNumber < endBlock; ++blockNumber, dataset += CacheLineSize)
+ initDatasetBlock(cache, dataset, blockNumber);
+ }
+
+ /*void datasetAlloc(dataset_t& ds, bool largePages) {
if (std::numeric_limits::max() < RANDOMX_DATASET_SIZE)
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
if (largePages) {
@@ -158,14 +222,8 @@ namespace RandomX {
}
}
- void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount) {
- for (uint64_t i = startBlock; i < startBlock + blockCount; ++i) {
- initBlock(cache, ds.memory + i * CacheLineSize, i, RANDOMX_CACHE_ACCESSES / 8);
- }
- }
-
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
ds.cache.memory = allocCache(ds.cache.size, largePages);
argonFill(ds.cache, seed, SeedSize);
- }
+ }*/
}
diff --git a/src/dataset.hpp b/src/dataset.hpp
index fcc863b..12c2021 100644
--- a/src/dataset.hpp
+++ b/src/dataset.hpp
@@ -20,26 +20,62 @@ along with RandomX. If not, see.
#pragma once
#include
+#include
#include "intrinPortable.h"
#include "common.hpp"
+#include "randomx.h"
+#include "Program.hpp"
+#include "superscalar_program.hpp"
+#include "JitCompilerX86.hpp"
+#include "allocator.hpp"
-namespace RandomX {
+struct randomx_dataset {
+ virtual ~randomx_dataset() = 0;
+ virtual bool allocate() = 0;
+ uint8_t* memory = nullptr;
+};
-#if false //RANDOMX_ARGON_GROWTH == 0 && (defined(_M_X64) || defined(__x86_64__))
- extern "C"
-#endif
- void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations);
+struct randomx_cache : public randomx_dataset {
+ virtual randomx::DatasetInitFunc getInitFunc() = 0;
+ virtual void initialize(const void *seed, size_t seedSize); //argon2
+ randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
+ std::vector reciprocalCache;
+};
- void datasetAlloc(dataset_t& ds, bool largePages);
- void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount);
- void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
+namespace randomx {
- void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
+ template
+ struct Dataset : public randomx_dataset {
+ ~Dataset() override;
+ bool allocate() override;
+ };
- void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
+ using DatasetDefault = Dataset>;
+ using DatasetLargePage = Dataset;
- void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
-}
+ template
+ struct Cache : public randomx_cache {
+ ~Cache() override;
+ bool allocate() override;
+ DatasetInitFunc getInitFunc() override;
+ };
+
+ template
+ struct CacheWithJit : public Cache {
+ using Cache::programs;
+ using Cache::reciprocalCache;
+ void initialize(const void *seed, size_t seedSize) override;
+ DatasetInitFunc getInitFunc() override;
+ JitCompilerX86 jit;
+ };
+ using CacheDefault = Cache>;
+ using CacheWithJitDefault = CacheWithJit>;
+ using CacheLargePage = Cache;
+ using CacheWithJitLargePage = CacheWithJit;
+
+ void initDatasetBlock(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
+ void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
+}
diff --git a/src/main.cpp b/src/main.cpp
index 2b653ae..cbf2a61 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -17,31 +17,28 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
//#define TRACE
-#include "InterpretedVirtualMachine.hpp"
-#include "CompiledVirtualMachine.hpp"
-#include "CompiledLightVirtualMachine.hpp"
-#include "AssemblyGeneratorX86.hpp"
+
+//#include "AssemblyGeneratorX86.hpp"
#include "Stopwatch.hpp"
-#include "blake2/blake2.h"
+//#include "blake2/blake2.h"
#include "blake2/endian.h"
#include
#include
#include
#include
#include
-#include "Program.hpp"
+//#include "Program.hpp"
#include
+#include
#include