Compare commits

..

3 Commits

Author SHA1 Message Date
SChernykh d5fe2268a5 Fixed light mode hashing
3 years ago
SChernykh 65d0997181 ARMv8: optimized dataset read
3 years ago
SChernykh 7f670bc1a8 Optimized dataset read
3 years ago

@ -153,7 +153,7 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
src/jit_compiler_a64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# not sure if this check is needed
include(CheckIncludeFile)

@ -53,15 +53,8 @@ namespace randomx {
int info[4];
cpuid(info, 0);
int nIds = info[0];
manufacturer_string[0] = info[1];
manufacturer_string[1] = info[3];
manufacturer_string[2] = info[2];
manufacturer_string[3] = 0;
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
processor_info_data = info[0];
ssse3_ = (info[2] & (1 << 9)) != 0;
aes_ = (info[2] & (1 << 25)) != 0;
}
@ -69,13 +62,9 @@ namespace randomx {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__)
#if defined(HWCAP_AES)
#elif defined(__aarch64__) && defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#elif defined(__APPLE__)
aes_ = true;
#endif
#endif
//TODO POWER8 AES
}

@ -42,34 +42,8 @@ namespace randomx {
bool hasAvx2() const {
return avx2_;
}
const char* manufacturer() const {
return (const char*) manufacturer_string;
}
struct ProcessorInfo
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
};
ProcessorInfo processorInfo() const {
return processor_info;
}
private:
bool aes_, ssse3_, avx2_;
int manufacturer_string[4];
union
{
ProcessorInfo processor_info;
int processor_info_data;
};
};
}

@ -36,6 +36,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
#if defined(__OpenBSD__) || defined(__NetBSD__)
#define RANDOMX_FORCE_SECURE
#endif

@ -95,10 +95,6 @@ JitCompilerA64::JitCompilerA64()
{
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code), reinterpret_cast<char*>(code + CodeSize));
#endif
}
JitCompilerA64::~JitCompilerA64()

@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
#include "cpu.hpp"
namespace randomx {
/*
@ -198,7 +197,6 @@ namespace randomx {
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t JZ_SHORT = 0x74;
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
@ -215,52 +213,11 @@ namespace randomx {
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
{},
{0x2E},
{0x2E, 0x2E},
{0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
};
bool JitCompilerX86::BranchesWithin32B = false;
size_t JitCompilerX86::getCodeSize() {
return CodeSize;
}
JitCompilerX86::JitCompilerX86() {
// CPU-specific tweaks
Cpu cpu;
if (strcmp(cpu.manufacturer(), "GenuineIntel") == 0) {
Cpu::ProcessorInfo info = cpu.processorInfo();
// Intel JCC erratum mitigation
if (info.family == 6) {
const uint32_t model = info.model | (info.ext_model << 4);
const uint32_t stepping = info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
code = (uint8_t*)allocMemoryPages(CodeSize);
memcpy(code, codePrologue, prologueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
@ -366,22 +323,6 @@ namespace randomx {
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
uint32_t alignment_size = 32 - (branch_begin & 31);
if (alignment_size > 8) {
emit(NOPX[alignment_size - 9], alignment_size - 8);
alignment_size = 8;
}
emit(NOPX[alignment_size - 1], alignment_size);
}
}
emit(SUB_EBX);
emit(JNZ);
emit32(prologueSize - codePos - 4);
@ -830,42 +771,18 @@ namespace randomx {
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
int32_t jmp_offset = instructionOffsets[target] - (codePos + 16);
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
const uint32_t alignment_size = 32 - (branch_begin & 31);
jmp_offset -= alignment_size;
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size);
}
}
emit(REX_ADD_I);
emitByte(0xc0 + reg);
const int shift = instr.getModCond() + ConditionOffset;
int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);
if (jmp_offset >= -128) {
emitByte(JZ_SHORT);
emitByte(jmp_offset);
}
else {
emit(JZ);
emit32(jmp_offset - 4);
}
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;

@ -65,9 +65,6 @@ namespace randomx {
void enableWriting();
void enableExecution();
void enableAll();
static bool BranchesWithin32B;
private:
static InstructionGeneratorX86 engine[256];
std::vector<int32_t> instructionOffsets;

@ -35,11 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
# if defined(__aarch64__) && TARGET_OS_OSX
# define USE_PTHREAD_JIT_WP 1
# include <pthread.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
@ -114,17 +109,9 @@ void* allocMemoryPages(std::size_t bytes) {
#else
#define MEXTRA 0
#endif
#ifdef USE_PTHREAD_JIT_WP
#define PEXTRA PROT_EXEC
#else
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#endif
#endif
return mem;
}
@ -142,19 +129,11 @@ static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
}
void setPagesRW(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(true);
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {

Loading…
Cancel
Save