From 270a4f97fe68881effeea56ac9fd449df16d904a Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 26 Apr 2019 16:05:30 +0200 Subject: [PATCH] Dataset size increased to 2080 MiB Implemented dataset base offset Tweaked SuperscalarHash constants to prevent register collisions --- src/asm/program_read_dataset_sshash_init.inc | 1 + src/asm/program_sshash_constants.inc | 8 +++--- src/common.hpp | 11 ++++++--- src/configuration.h | 7 ++++-- src/dataset.cpp | 15 +++++------ src/jit_compiler_x86.cpp | 5 +++- src/jit_compiler_x86.hpp | 2 +- src/randomx.cpp | 2 +- src/tests/benchmark.cpp | 2 +- src/tests/superscalar-init.cpp | 26 +++++++++----------- src/virtual_machine.cpp | 2 +- src/virtual_machine.hpp | 2 ++ src/vm_compiled.cpp | 4 +-- src/vm_compiled.hpp | 3 ++- src/vm_compiled_light.cpp | 4 +-- src/vm_compiled_light.hpp | 1 + src/vm_interpreted.cpp | 2 +- src/vm_interpreted.hpp | 2 ++ src/vm_interpreted_light.cpp | 1 - 19 files changed, 56 insertions(+), 44 deletions(-) diff --git a/src/asm/program_read_dataset_sshash_init.inc b/src/asm/program_read_dataset_sshash_init.inc index a186d2e..0145f08 100644 --- a/src/asm/program_read_dataset_sshash_init.inc +++ b/src/asm/program_read_dataset_sshash_init.inc @@ -13,4 +13,5 @@ mov ebx, ebp ;# ecx = ma and ebx, 2147483584 ;# align "ma" to the start of a cache line shr ebx, 6 ;# ebx = Dataset block number + ;# add ebx, datasetOffset / 64 ;# call 32768 \ No newline at end of file diff --git a/src/asm/program_sshash_constants.inc b/src/asm/program_sshash_constants.inc index 2044a0e..53dc175 100644 --- a/src/asm/program_sshash_constants.inc +++ b/src/asm/program_sshash_constants.inc @@ -2,14 +2,14 @@ r0_mul: ;#/ 6364136223846793005 db 45, 127, 149, 76, 45, 244, 81, 88 r1_add: - ;#/ 9298410992540426748 - db 252, 161, 245, 89, 136, 151, 10, 129 + ;#/ 9298411001130361340 + db 252, 161, 245, 89, 138, 151, 10, 129 r2_add: ;#/ 12065312585734608966 db 70, 216, 194, 56, 223, 153, 112, 167 r3_add: - ;#/ 9306329213124610396 - db 92, 9, 34, 191, 28, 185, 38, 129 + ;#/ 9306329213124626780 + db 92, 73, 34, 191, 28, 185, 38, 129 r4_add: ;#/ 5281919268842080866 db 98, 138, 159, 23, 151, 37, 77, 73 diff --git a/src/common.hpp b/src/common.hpp index 84d0e26..1cdc347 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -28,8 +28,9 @@ along with RandomX. If not, see. namespace randomx { static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); - static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2."); - static_assert(RANDOMX_DATASET_SIZE <= 4294967296ULL, "RANDOMX_DATASET_SIZE must not exceed 4294967296."); + static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2."); + static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296."); + static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); @@ -56,8 +57,10 @@ namespace randomx { constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; - constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1); - constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024; + constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1); + constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; + constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; + constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; #ifdef TRACE constexpr bool trace = true; diff --git a/src/configuration.h b/src/configuration.h index bf10f51..32b41d0 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -37,8 +37,11 @@ along with RandomX. If not, see. #define RANDOMX_SUPERSCALAR_LATENCY 170 #define RANDOMX_SUPERSCALAR_MAX_SIZE 512 -//Dataset size in bytes. Must be a power of 2. -#define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024) +//Dataset base size in bytes. Must be a power of 2. +#define RANDOMX_DATASET_BASE_SIZE (2ULL * 1024 * 1024 * 1024) + +//Dataset extra size. Must be divisible by 64. +#define RANDOMX_DATASET_EXTRA_SIZE 33554368 //Number of instructions in a RandomX program #define RANDOMX_PROGRAM_SIZE 256 diff --git a/src/dataset.cpp b/src/dataset.cpp index ee0958f..0e292b8 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -45,6 +45,7 @@ randomx_dataset::~randomx_dataset() { } static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); +static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE"); void randomx_cache::initialize(const void *seed, size_t seedSize) { uint32_t memory_blocks, segment_length; @@ -117,22 +118,22 @@ namespace randomx { template void Dataset::allocate() { - memory = (uint8_t*)Allocator::allocMemory(RANDOMX_DATASET_SIZE); + memory = (uint8_t*)Allocator::allocMemory(DatasetSize); } template Dataset::~Dataset() { - Allocator::freeMemory(memory, RANDOMX_DATASET_SIZE); + Allocator::freeMemory(memory, DatasetSize); } template void Cache::allocate() { - memory = (uint8_t*)Allocator::allocMemory(RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE); + memory = (uint8_t*)Allocator::allocMemory(CacheSize); } template Cache::~Cache() { - Allocator::freeMemory(memory, RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE); + Allocator::freeMemory(memory, CacheSize); } template @@ -160,16 +161,16 @@ namespace randomx { template class CacheWithJit; constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; - constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; + constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL; constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; - constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; + constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL; constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) { - constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); + constexpr uint32_t mask = CacheSize / CacheLineSize - 1; return memory + (registerValue & mask) * CacheLineSize; } diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index b24eba3..1e295bd 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -217,6 +217,7 @@ namespace randomx { static const uint8_t RET = 0xc3; static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; + static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; static const uint8_t NOP1[] = { 0x90 }; static const uint8_t NOP2[] = { 0x66, 0x90 }; @@ -250,9 +251,11 @@ namespace randomx { generateProgramEpilogue(prog); } - void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) { + void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { generateProgramPrologue(prog, pcfg); emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); + emit(ADD_EBX_I); + emit32(datasetOffset / CacheLineSize); emitByte(CALL); emit32(superScalarHashOffset - (codePos + 4)); emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp index fec2747..8bccb1f 100644 --- a/src/jit_compiler_x86.hpp +++ b/src/jit_compiler_x86.hpp @@ -41,7 +41,7 @@ namespace randomx { JitCompilerX86(); ~JitCompilerX86(); void generateProgram(Program&, ProgramConfiguration&); - void generateProgramLight(Program&, ProgramConfiguration&); + void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); template void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector &); void generateDatasetInitCode(); diff --git a/src/randomx.cpp b/src/randomx.cpp index c75afcd..e126687 100644 --- a/src/randomx.cpp +++ b/src/randomx.cpp @@ -91,7 +91,7 @@ extern "C" { } unsigned long randomx_dataset_item_count() { - return RANDOMX_DATASET_SIZE / RANDOMX_DATASET_ITEM_SIZE; + return randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE; } void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) { diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index c422702..d10d25e 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -225,7 +225,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl; + std::cout << "Reference result: 918a8bc3ce0e537eec9d3c5e1a8bb3204ae3954f14c50c14810b38e49588a9e0" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } diff --git a/src/tests/superscalar-init.cpp b/src/tests/superscalar-init.cpp index d7eea75..d92fbde 100644 --- a/src/tests/superscalar-init.cpp +++ b/src/tests/superscalar-init.cpp @@ -24,27 +24,25 @@ along with RandomX. If not, see. #include "../superscalar.hpp" #include "../common.hpp" -const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; - int main() { - std::cout << "THIS PROGRAM REQUIRES MORE THAN 10 GB OF RAM TO COMPLETE" << std::endl; + std::cout << "THIS PROGRAM REQUIRES MORE THAN 16 GB OF RAM TO COMPLETE" << std::endl; std::vector dummy; constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; - constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; //9298410992540426048ULL + constexpr uint64_t superscalarAdd1 = 0x810A978A59F5A1FC; //9298410992540426748ULL; //9298410992540426048ULL constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; - constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; + constexpr uint64_t superscalarAdd3 = 0x8126B91CBF22495C; //9306329213124610396ULL; constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; - constexpr uint32_t totalBlocks = RANDOMX_DATASET_SIZE / randomx::CacheLineSize; + constexpr uint32_t totalItems = randomx::DatasetSize / randomx::CacheLineSize; std::unordered_set registerValues; - registerValues.reserve(totalBlocks); - registerValues.rehash(totalBlocks); + registerValues.reserve(totalItems); + registerValues.rehash(totalItems); int collisionCount[9] = { 0 }; - for (uint32_t blockNumber = 0; blockNumber < totalBlocks; ++blockNumber) { + for (uint32_t itemNumber = 0; itemNumber < totalItems; ++itemNumber) { uint64_t rl[8]; - rl[0] = (blockNumber + 1) * superscalarMul0; + rl[0] = (itemNumber + 1) * superscalarMul0; rl[1] = rl[0] ^ superscalarAdd1; rl[2] = rl[0] ^ superscalarAdd2; rl[3] = rl[0] ^ superscalarAdd3; @@ -57,19 +55,19 @@ int main() { uint64_t reducedValue = rl[i] & 0x3FFFFFFFFFFFF8; //bits 3-53 only if (registerValues.find(reducedValue) != registerValues.end()) { blockCollisions++; - std::cout << "Block " << blockNumber << ": collision of register r" << i << std::endl; + std::cout << "Item " << itemNumber << ": collision of register r" << i << std::endl; } else { registerValues.insert(reducedValue); } } collisionCount[blockCollisions]++; - if ((blockNumber % (320 * 1024)) == 0) - std::cout << "Block " << blockNumber << " processed" << std::endl; + if ((itemNumber % (320 * 1024)) == 0) + std::cout << "Item " << itemNumber << " processed" << std::endl; } for (int i = 0; i < 9; ++i) { - std::cout << i << " register(s) collide in " << collisionCount[i] << " blocks" << std::endl; + std::cout << i << " register(s) collide in " << collisionCount[i] << " items" << std::endl; } return 0; diff --git a/src/virtual_machine.cpp b/src/virtual_machine.cpp index a0e60cb..3881685 100644 --- a/src/virtual_machine.cpp +++ b/src/virtual_machine.cpp @@ -69,7 +69,7 @@ void randomx_vm::initialize() { config.readReg2 = 4 + (addressRegisters & 1); addressRegisters >>= 1; config.readReg3 = 6 + (addressRegisters & 1); - //datasetBase = program.getEntropy(13) % datasetRange; + datasetOffset = (program.getEntropy(13) & randomx::DatasetExtraItems) * randomx::CacheLineSize; constexpr uint64_t mask22bit = (1ULL << 22) - 1; constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>(); store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240); diff --git a/src/virtual_machine.hpp b/src/virtual_machine.hpp index f18230c..69910b7 100644 --- a/src/virtual_machine.hpp +++ b/src/virtual_machine.hpp @@ -44,6 +44,8 @@ protected: alignas(16) randomx::ProgramConfiguration config; randomx::MemoryRegisters mem; uint8_t* scratchpad; + uint8_t* datasetBasePtr; + uint32_t datasetOffset; }; namespace randomx { diff --git a/src/vm_compiled.cpp b/src/vm_compiled.cpp index 7e4ef92..25232cb 100644 --- a/src/vm_compiled.cpp +++ b/src/vm_compiled.cpp @@ -28,7 +28,7 @@ namespace randomx { template void CompiledVm::setDataset(randomx_dataset* dataset) { mem.memory = dataset->memory; - //datasetBasePtr = dataset.memory; + datasetBasePtr = dataset->memory; } template @@ -36,7 +36,7 @@ namespace randomx { VmBase::generateProgram(seed); randomx_vm::initialize(); compiler.generateProgram(program, config); - //mem.memory = datasetBasePtr + (datasetBase * CacheLineSize); + mem.memory = datasetBasePtr + datasetOffset; execute(); } diff --git a/src/vm_compiled.hpp b/src/vm_compiled.hpp index 5dcf2ae..f39b90a 100644 --- a/src/vm_compiled.hpp +++ b/src/vm_compiled.hpp @@ -48,11 +48,12 @@ namespace randomx { using VmBase::config; using VmBase::reg; using VmBase::scratchpad; + using VmBase::datasetBasePtr; + using VmBase::datasetOffset; protected: void execute(); JitCompilerX86 compiler; - uint8_t* datasetBasePtr; }; using CompiledVmDefault = CompiledVm, true>; diff --git a/src/vm_compiled_light.cpp b/src/vm_compiled_light.cpp index c817191..2cec985 100644 --- a/src/vm_compiled_light.cpp +++ b/src/vm_compiled_light.cpp @@ -27,15 +27,13 @@ namespace randomx { void CompiledLightVm::setCache(randomx_cache* cache) { mem.memory = cache->memory; compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); - //datasetBasePtr = ds.dataset.memory; } template void CompiledLightVm::run(void* seed) { VmBase::generateProgram(seed); randomx_vm::initialize(); - compiler.generateProgramLight(program, config); - //mem.memory = datasetBasePtr + (datasetBase * CacheLineSize); + compiler.generateProgramLight(program, config, datasetOffset); CompiledVm::execute(); } diff --git a/src/vm_compiled_light.hpp b/src/vm_compiled_light.hpp index 242680d..1ac3629 100644 --- a/src/vm_compiled_light.hpp +++ b/src/vm_compiled_light.hpp @@ -44,6 +44,7 @@ namespace randomx { using CompiledVm::compiler; using CompiledVm::program; using CompiledVm::config; + using CompiledVm::datasetOffset; }; using CompiledLightVmDefault = CompiledLightVm, true>; diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 3d82b18..9656220 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -331,7 +331,7 @@ namespace randomx { mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; mem.mx &= CacheLineAlignMask; - datasetRead(mem.ma, r); + datasetRead(datasetOffset + mem.ma, r); std::swap(mem.mx, mem.ma); if (trace) { diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index e45b4fd..4b08b18 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -57,6 +57,8 @@ namespace randomx { using VmBase::program; using VmBase::config; using VmBase::reg; + using VmBase::datasetBasePtr; + using VmBase::datasetOffset; void* operator new(size_t size) { void* ptr = AlignedAllocator::allocMemory(size); if (ptr == nullptr) diff --git a/src/vm_interpreted_light.cpp b/src/vm_interpreted_light.cpp index 64b0bf8..06757d5 100644 --- a/src/vm_interpreted_light.cpp +++ b/src/vm_interpreted_light.cpp @@ -25,7 +25,6 @@ namespace randomx { template void InterpretedLightVm::setCache(randomx_cache* cache) { mem.memory = cache->memory; - //datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; cachePtr = cache; }