Merge remote-tracking branch 'upstream/master'

master
SChernykh 4 years ago
commit 1ce204fb80

@ -157,6 +157,21 @@ void rx_set_rounding_mode(uint32_t mode) {
}
}
uint32_t rx_get_rounding_mode() {
switch (fegetround()) {
case FE_DOWNWARD:
return RoundDown;
case FE_UPWARD:
return RoundUp;
case FE_TOWARDZERO:
return RoundToZero;
case FE_TONEAREST:
return RoundToNearest;
default:
UNREACHABLE;
}
}
#endif
#ifdef RANDOMX_USE_X87

@ -173,6 +173,10 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
_mm_setcsr(rx_mxcsr_default | (mode << 13));
}
FORCE_INLINE uint32_t rx_get_rounding_mode() {
return (_mm_getcsr() >> 13) & 3;
}
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
#include <cstdint>
#include <stdexcept>
@ -736,6 +740,8 @@ void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
uint32_t rx_get_rounding_mode();
#endif
double loadDoublePortable(const void* addr);

@ -35,3 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__)
#define RANDOMX_FORCE_SECURE
#endif

@ -338,7 +338,7 @@ namespace randomx {
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}

@ -36,13 +36,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu.hpp"
#include <cassert>
#include <limits>
#include <cfenv>
extern "C" {
randomx_flags randomx_get_flags() {
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
randomx::Cpu cpu;
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
if (flags == RANDOMX_FLAG_JIT) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -328,7 +329,7 @@ extern "C" {
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
if (machine->cacheKey != cache->cacheKey) {
if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) {
machine->setCache(cache);
machine->cacheKey = cache->cacheKey;
}
@ -349,6 +350,8 @@ extern "C" {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
fenv_t fpstate;
fegetenv(&fpstate);
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
@ -361,6 +364,7 @@ extern "C" {
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
fesetenv(&fpstate);
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
@ -380,4 +384,14 @@ extern "C" {
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
}
void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
machine->resetRoundingMode();
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
}

@ -240,9 +240,13 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
/**
* Paired functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first is called for the first input value.
* randomx_calculate_hash_next will output the hash value of the previous input.
* Set of functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first will begin a hash calculation.
* randomx_calculate_hash_next will output the hash value of the previous input
* and begin the calculation of the next hash.
* randomx_calculate_hash_last will output the hash value of the previous input.
*
* WARNING: These functions may alter the floating point rounding mode of the calling thread.
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param input is a pointer to memory to be hashed. Must not be NULL.
@ -254,6 +258,7 @@ RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *inpu
*/
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
#if defined(__cplusplus)
}

@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread,
(thread_policy_t)&policy, 1);
#elif defined(_WIN32) || defined(__CYGWIN__)
rc = SetThreadAffinityMask(reinterpret_cast<HANDLE>(thread), 1ULL << cpuid) == 0 ? -2 : 0;
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__)
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && !defined(__NetBSD__)
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(cpuid, &cs);

@ -40,9 +40,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "../dataset.hpp"
#include "../blake2/endian.h"
#include "../common.hpp"
#include "../jit_compiler.hpp"
#ifdef _WIN32
#include <windows.h>
#include <VersionHelpers.h>
#include <versionhelpers.h>
#endif
#include "affinity.hpp"
@ -94,6 +95,7 @@ void printUsage(const char* executable) {
std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
}
struct MemoryException : public std::exception {
@ -109,11 +111,14 @@ struct DatasetAllocException : public MemoryException {
}
};
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid=-1) {
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
template<bool batch>
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
if (cpuid >= 0) {
int rc = set_thread_affinity(cpuid);
if (rc) {
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
}
}
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
@ -122,19 +127,27 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
void* noncePtr = blockTemplate + 39;
auto nonce = atomicNonce.fetch_add(1);
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
if (batch) {
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
}
while (nonce < noncesCount) {
nonce = atomicNonce.fetch_add(1);
if (batch) {
nonce = atomicNonce.fetch_add(1);
}
store32(noncePtr, nonce);
randomx_calculate_hash_next(vm, blockTemplate, sizeof(blockTemplate), &hash);
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
if (!batch) {
nonce = atomicNonce.fetch_add(1);
}
}
}
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2, autoFlags;
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
bool ssse3, avx2, autoFlags, noBatch;
int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity;
int32_t seedValue;
@ -158,10 +171,11 @@ int main(int argc, char** argv) {
readOption("--ssse3", argc, argv, ssse3);
readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
readOption("--noBatch", argc, argv, noBatch);
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.7" << std::endl;
std::cout << "RandomX benchmark v1.1.8" << std::endl;
if (help) {
printUsage(argv[0]);
@ -199,7 +213,7 @@ int main(int argc, char** argv) {
}
if (jit) {
flags |= RANDOMX_FLAG_JIT;
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
flags |= RANDOMX_FLAG_SECURE;
#endif
}
@ -211,7 +225,7 @@ int main(int argc, char** argv) {
if (miningMode) {
flags |= RANDOMX_FLAG_FULL_MEM;
}
#ifndef __OpenBSD__
#ifndef RANDOMX_FORCE_SECURE
if (secure) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -263,6 +277,16 @@ int main(int argc, char** argv) {
std::cout << " - thread affinity (" << mask_to_string(threadAffinity) << ")" << std::endl;
}
MineFunc* func;
if (noBatch) {
func = &mine<false>;
}
else {
func = &mine<true>;
std::cout << " - batch mode" << std::endl;
}
std::cout << "Initializing";
if (miningMode)
std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")");
@ -333,14 +357,14 @@ int main(int argc, char** argv) {
int cpuid = -1;
if (threadAffinity)
cpuid = cpuid_from_mask(threadAffinity, i);
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
threads.push_back(std::thread(func, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
else {
mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0);
func(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0, -1);
}
double elapsed = sw.getElapsed();

@ -143,7 +143,7 @@ int main() {
randomx::JitCompiler jit;
jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
jit.generateDatasetInitCode();
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
jit.enableExecution();
#else
jit.enableAll();
@ -954,7 +954,7 @@ int main() {
assert(ibc.memMask == randomx::ScratchpadL3Mask);
});
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
@ -1009,10 +1009,10 @@ int main() {
vm = nullptr;
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
initCache("test key 000");
#ifdef __OpenBSD__
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_JIT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr);
#endif
}
@ -1026,9 +1026,6 @@ int main() {
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
randomx_destroy_vm(vm);
vm = nullptr;
auto flags = randomx_get_flags();
randomx_release_cache(cache);
@ -1054,6 +1051,40 @@ int main() {
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
});
if (cache != nullptr)
randomx_release_cache(cache);
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash1[RANDOMX_HASH_SIZE];
char hash2[RANDOMX_HASH_SIZE];
char hash3[RANDOMX_HASH_SIZE];
initCache("test key 000");
char input1[] = "This is a test";
char input2[] = "Lorem ipsum dolor sit amet";
char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1);
randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1);
randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2);
randomx_calculate_hash_last(vm, &hash3);
assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"));
assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"));
});
runTest("Preserve rounding mode", RANDOMX_FREQ_CFROUND > 0, []() {
rx_set_rounding_mode(RoundToNearest);
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(rx_get_rounding_mode() == RoundToNearest);
});
randomx_destroy_vm(vm);
vm = nullptr;
if (cache != nullptr)
randomx_release_cache(cache);

@ -54,6 +54,9 @@ public:
{
return program;
}
const uint8_t* getMemory() const {
return mem.memory;
}
protected:
void initialize();
alignas(64) randomx::Program program;

@ -94,7 +94,12 @@ void* allocMemoryPages(std::size_t bytes) {
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
#else
mem = mmap(nullptr, bytes, PAGE_READWRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#if defined(__NetBSD__)
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#endif
@ -141,7 +146,7 @@ void* allocLargePagesMemory(std::size_t bytes) {
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
#elif defined(__OpenBSD__)
#elif defined(__OpenBSD__) || defined(__NetBSD__)
mem = MAP_FAILED; // OpenBSD does not support huge pages
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);

@ -54,12 +54,17 @@
<ItemGroup>
<ClCompile Include="..\src\aes_hash.cpp" />
<ClCompile Include="..\src\allocator.cpp" />
<ClCompile Include="..\src\argon2_avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\argon2_ssse3.c" />
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\bytecode_machine.cpp" />
<ClCompile Include="..\src\cpu.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
<ClCompile Include="..\src\instruction.cpp" />
<ClCompile Include="..\src\instructions_portable.cpp" />

@ -172,5 +172,14 @@
<ClCompile Include="..\src\bytecode_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_avx2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_ssse3.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>
Loading…
Cancel
Save