Dataset size increased to 2080 MiB

Implemented dataset base offset
Tweaked SuperscalarHash constants to prevent register collisions
pull/40/head
tevador 5 years ago
parent f66da3911e
commit 270a4f97fe

@ -13,4 +13,5 @@
mov ebx, ebp ;# ecx = ma
and ebx, 2147483584 ;# align "ma" to the start of a cache line
shr ebx, 6 ;# ebx = Dataset block number
;# add ebx, datasetOffset / 64
;# call 32768

@ -2,14 +2,14 @@ r0_mul:
;#/ 6364136223846793005
db 45, 127, 149, 76, 45, 244, 81, 88
r1_add:
;#/ 9298410992540426748
db 252, 161, 245, 89, 136, 151, 10, 129
;#/ 9298411001130361340
db 252, 161, 245, 89, 138, 151, 10, 129
r2_add:
;#/ 12065312585734608966
db 70, 216, 194, 56, 223, 153, 112, 167
r3_add:
;#/ 9306329213124610396
db 92, 9, 34, 191, 28, 185, 38, 129
;#/ 9306329213124626780
db 92, 73, 34, 191, 28, 185, 38, 129
r4_add:
;#/ 5281919268842080866
db 98, 138, 159, 23, 151, 37, 77, 73

@ -28,8 +28,9 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace randomx {
static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2.");
static_assert(RANDOMX_DATASET_SIZE <= 4294967296ULL, "RANDOMX_DATASET_SIZE must not exceed 4294967296.");
static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
@ -56,8 +57,10 @@ namespace randomx {
constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1);
constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024;
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1);
constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize;
constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
#ifdef TRACE
constexpr bool trace = true;

@ -37,8 +37,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define RANDOMX_SUPERSCALAR_LATENCY 170
#define RANDOMX_SUPERSCALAR_MAX_SIZE 512
//Dataset size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024)
//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE (2ULL * 1024 * 1024 * 1024)
//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368
//Number of instructions in a RandomX program
#define RANDOMX_PROGRAM_SIZE 256

@ -45,6 +45,7 @@ randomx_dataset::~randomx_dataset() {
}
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
void randomx_cache::initialize(const void *seed, size_t seedSize) {
uint32_t memory_blocks, segment_length;
@ -117,22 +118,22 @@ namespace randomx {
template<class Allocator>
void Dataset<Allocator>::allocate() {
memory = (uint8_t*)Allocator::allocMemory(RANDOMX_DATASET_SIZE);
memory = (uint8_t*)Allocator::allocMemory(DatasetSize);
}
template<class Allocator>
Dataset<Allocator>::~Dataset() {
Allocator::freeMemory(memory, RANDOMX_DATASET_SIZE);
Allocator::freeMemory(memory, DatasetSize);
}
template<class Allocator>
void Cache<Allocator>::allocate() {
memory = (uint8_t*)Allocator::allocMemory(RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE);
memory = (uint8_t*)Allocator::allocMemory(CacheSize);
}
template<class Allocator>
Cache<Allocator>::~Cache() {
Allocator::freeMemory(memory, RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE);
Allocator::freeMemory(memory, CacheSize);
}
template<class Allocator>
@ -160,16 +161,16 @@ namespace randomx {
template class CacheWithJit<LargePageAllocator>;
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL;
constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL;
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL;
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
constexpr uint32_t mask = CacheSize / CacheLineSize - 1;
return memory + (registerValue & mask) * CacheLineSize;
}

@ -217,6 +217,7 @@ namespace randomx {
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 };
static const uint8_t NOP1[] = { 0x90 };
static const uint8_t NOP2[] = { 0x66, 0x90 };
@ -250,9 +251,11 @@ namespace randomx {
generateProgramEpilogue(prog);
}
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
generateProgramPrologue(prog, pcfg);
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
emit(ADD_EBX_I);
emit32(datasetOffset / CacheLineSize);
emitByte(CALL);
emit32(superScalarHashOffset - (codePos + 4));
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);

@ -41,7 +41,7 @@ namespace randomx {
JitCompilerX86();
~JitCompilerX86();
void generateProgram(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
template<size_t N>
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
void generateDatasetInitCode();

@ -91,7 +91,7 @@ extern "C" {
}
unsigned long randomx_dataset_item_count() {
return RANDOMX_DATASET_SIZE / RANDOMX_DATASET_ITEM_SIZE;
return randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE;
}
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {

@ -225,7 +225,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl;
std::cout << "Reference result: 918a8bc3ce0e537eec9d3c5e1a8bb3204ae3954f14c50c14810b38e49588a9e0" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
}

@ -24,27 +24,25 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "../superscalar.hpp"
#include "../common.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
int main() {
std::cout << "THIS PROGRAM REQUIRES MORE THAN 10 GB OF RAM TO COMPLETE" << std::endl;
std::cout << "THIS PROGRAM REQUIRES MORE THAN 16 GB OF RAM TO COMPLETE" << std::endl;
std::vector<uint64_t> dummy;
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; //9298410992540426048ULL
constexpr uint64_t superscalarAdd1 = 0x810A978A59F5A1FC; //9298410992540426748ULL; //9298410992540426048ULL
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
constexpr uint64_t superscalarAdd3 = 0x8126B91CBF22495C; //9306329213124610396ULL;
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
constexpr uint32_t totalBlocks = RANDOMX_DATASET_SIZE / randomx::CacheLineSize;
constexpr uint32_t totalItems = randomx::DatasetSize / randomx::CacheLineSize;
std::unordered_set<uint64_t> registerValues;
registerValues.reserve(totalBlocks);
registerValues.rehash(totalBlocks);
registerValues.reserve(totalItems);
registerValues.rehash(totalItems);
int collisionCount[9] = { 0 };
for (uint32_t blockNumber = 0; blockNumber < totalBlocks; ++blockNumber) {
for (uint32_t itemNumber = 0; itemNumber < totalItems; ++itemNumber) {
uint64_t rl[8];
rl[0] = (blockNumber + 1) * superscalarMul0;
rl[0] = (itemNumber + 1) * superscalarMul0;
rl[1] = rl[0] ^ superscalarAdd1;
rl[2] = rl[0] ^ superscalarAdd2;
rl[3] = rl[0] ^ superscalarAdd3;
@ -57,19 +55,19 @@ int main() {
uint64_t reducedValue = rl[i] & 0x3FFFFFFFFFFFF8; //bits 3-53 only
if (registerValues.find(reducedValue) != registerValues.end()) {
blockCollisions++;
std::cout << "Block " << blockNumber << ": collision of register r" << i << std::endl;
std::cout << "Item " << itemNumber << ": collision of register r" << i << std::endl;
}
else {
registerValues.insert(reducedValue);
}
}
collisionCount[blockCollisions]++;
if ((blockNumber % (320 * 1024)) == 0)
std::cout << "Block " << blockNumber << " processed" << std::endl;
if ((itemNumber % (320 * 1024)) == 0)
std::cout << "Item " << itemNumber << " processed" << std::endl;
}
for (int i = 0; i < 9; ++i) {
std::cout << i << " register(s) collide in " << collisionCount[i] << " blocks" << std::endl;
std::cout << i << " register(s) collide in " << collisionCount[i] << " items" << std::endl;
}
return 0;

@ -69,7 +69,7 @@ void randomx_vm::initialize() {
config.readReg2 = 4 + (addressRegisters & 1);
addressRegisters >>= 1;
config.readReg3 = 6 + (addressRegisters & 1);
//datasetBase = program.getEntropy(13) % datasetRange;
datasetOffset = (program.getEntropy(13) & randomx::DatasetExtraItems) * randomx::CacheLineSize;
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);

@ -44,6 +44,8 @@ protected:
alignas(16) randomx::ProgramConfiguration config;
randomx::MemoryRegisters mem;
uint8_t* scratchpad;
uint8_t* datasetBasePtr;
uint32_t datasetOffset;
};
namespace randomx {

@ -28,7 +28,7 @@ namespace randomx {
template<class Allocator, bool softAes>
void CompiledVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
mem.memory = dataset->memory;
//datasetBasePtr = dataset.memory;
datasetBasePtr = dataset->memory;
}
template<class Allocator, bool softAes>
@ -36,7 +36,7 @@ namespace randomx {
VmBase<Allocator, softAes>::generateProgram(seed);
randomx_vm::initialize();
compiler.generateProgram(program, config);
//mem.memory = datasetBasePtr + (datasetBase * CacheLineSize);
mem.memory = datasetBasePtr + datasetOffset;
execute();
}

@ -48,11 +48,12 @@ namespace randomx {
using VmBase<Allocator, softAes>::config;
using VmBase<Allocator, softAes>::reg;
using VmBase<Allocator, softAes>::scratchpad;
using VmBase<Allocator, softAes>::datasetBasePtr;
using VmBase<Allocator, softAes>::datasetOffset;
protected:
void execute();
JitCompilerX86 compiler;
uint8_t* datasetBasePtr;
};
using CompiledVmDefault = CompiledVm<AlignedAllocator<CacheLineSize>, true>;

@ -27,15 +27,13 @@ namespace randomx {
void CompiledLightVm<Allocator, softAes>::setCache(randomx_cache* cache) {
mem.memory = cache->memory;
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
//datasetBasePtr = ds.dataset.memory;
}
template<class Allocator, bool softAes>
void CompiledLightVm<Allocator, softAes>::run(void* seed) {
VmBase<Allocator, softAes>::generateProgram(seed);
randomx_vm::initialize();
compiler.generateProgramLight(program, config);
//mem.memory = datasetBasePtr + (datasetBase * CacheLineSize);
compiler.generateProgramLight(program, config, datasetOffset);
CompiledVm<Allocator, softAes>::execute();
}

@ -44,6 +44,7 @@ namespace randomx {
using CompiledVm<Allocator, softAes>::compiler;
using CompiledVm<Allocator, softAes>::program;
using CompiledVm<Allocator, softAes>::config;
using CompiledVm<Allocator, softAes>::datasetOffset;
};
using CompiledLightVmDefault = CompiledLightVm<AlignedAllocator<CacheLineSize>, true>;

@ -331,7 +331,7 @@ namespace randomx {
mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
mem.mx &= CacheLineAlignMask;
datasetRead(mem.ma, r);
datasetRead(datasetOffset + mem.ma, r);
std::swap(mem.mx, mem.ma);
if (trace) {

@ -57,6 +57,8 @@ namespace randomx {
using VmBase<Allocator, softAes>::program;
using VmBase<Allocator, softAes>::config;
using VmBase<Allocator, softAes>::reg;
using VmBase<Allocator, softAes>::datasetBasePtr;
using VmBase<Allocator, softAes>::datasetOffset;
void* operator new(size_t size) {
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
if (ptr == nullptr)

@ -25,7 +25,6 @@ namespace randomx {
template<class Allocator, bool softAes>
void InterpretedLightVm<Allocator, softAes>::setCache(randomx_cache* cache) {
mem.memory = cache->memory;
//datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
cachePtr = cache;
}

Loading…
Cancel
Save