Compare commits

...

38 Commits

Author SHA1 Message Date
SChernykh 0ff83a2a81 Merge remote-tracking branch 'upstream/master'
3 years ago
tevador fe4324e8c0
Merge pull request #213 from hyc/m1typo
3 years ago
SChernykh 3c8c7ee097
Optimized dataset read (#211)
3 years ago
Howard Chu 1b3db29fb8 Fix typo for M1 Mac build
3 years ago
SChernykh a44c5a47ec Merge remote-tracking branch 'upstream/master'
3 years ago
tevador c12097400b
Merge pull request #212 from hyc/m1prot
3 years ago
tevador 0db4c57823
Merge pull request #198 from tevador/pr-applem1
3 years ago
tevador 4aae0d834d
Merge pull request #202 from SChernykh/fix-crash
3 years ago
Howard Chu d9b7e8c25d Faster W^X policy for apple silicon macs
3 years ago
SChernykh 2ba7df2844 Fix illegal instruction crash on some ARM systems
3 years ago
SChernykh 862556f239 Fix illegal instruction crash on some ARM systems
3 years ago
SChernykh f50635efd6 Merge remote-tracking branch 'upstream/master'
3 years ago
tevador a38ce601fd Apple silicon: force W^X, enable hardware AES
4 years ago
SChernykh 1ce204fb80 Merge remote-tracking branch 'upstream/master'
4 years ago
SChernykh bbca8b1ea4 Merge remote-tracking branch 'origin/dev'
4 years ago
SChernykh cdeb06ab3f Merge remote-tracking branch 'upstream/master'
4 years ago
SChernykh f7f821631b Merge remote-tracking branch 'upstream/master'
5 years ago
SChernykh 3eaff21eaf Added Intel JCC bug detection
5 years ago
SChernykh 1537612d53 Mitigation for Intel JCC erratum
5 years ago
SChernykh 82e5aa4817 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 42fa6f74a3 Enabled CFROUND
5 years ago
SChernykh d5627e9f0a Enabled FP instructions
5 years ago
SChernykh 1a431a8853 Enabled FSWAP_R and FSCAL_R
5 years ago
SChernykh 1bb8957c2c Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 4b42969a91 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 0db8631b73 Enabled CBRANCH
5 years ago
SChernykh 93afb248bf Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh e431502f95 Update instruction_weights.hpp
5 years ago
SChernykh 4a546761b9 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 930ea8ff20 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh ea81a4d854 Enabled ISTORE
5 years ago
SChernykh b165774643 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 9548422379 Enabled all integer instructions
5 years ago
SChernykh d6512a3a33 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh 37cd162b17 Enabled IADD_RS
5 years ago
SChernykh dec384eaed Temporarily disabled all instructions
5 years ago
SChernykh 2d05741f07 Merge remote-tracking branch 'upstream/dev' into dev
5 years ago
SChernykh e5347497e9 Disabled code execution in interpreted VM
5 years ago

@ -153,7 +153,7 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
src/jit_compiler_a64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# not sure if this check is needed
include(CheckIncludeFile)

@ -15,6 +15,7 @@
mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -28,6 +28,7 @@
mov rbx, r9 ;# loop counter
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8

@ -1,17 +1,16 @@
mov ecx, ebp ;# ecx = ma
and ecx, RANDOMX_DATASET_BASE_MASK
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
and edx, RANDOMX_DATASET_BASE_MASK
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]

@ -8,10 +8,10 @@
mov qword ptr [rsp+16], r13
mov qword ptr [rsp+8], r14
mov qword ptr [rsp+0], r15
xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx"
mov ebx, ebp ;# ecx = ma
and ebx, RANDOMX_DATASET_BASE_MASK
shr ebx, 6 ;# ebx = Dataset block number
xor rbp, rax ;# modify "mx"
mov rbx, rbp ;# ebx = ma
shr rbx, 38
and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
;# add ebx, datasetOffset / 64
;# call 32768

@ -53,8 +53,15 @@ namespace randomx {
int info[4];
cpuid(info, 0);
int nIds = info[0];
manufacturer_string[0] = info[1];
manufacturer_string[1] = info[3];
manufacturer_string[2] = info[2];
manufacturer_string[3] = 0;
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
processor_info_data = info[0];
ssse3_ = (info[2] & (1 << 9)) != 0;
aes_ = (info[2] & (1 << 25)) != 0;
}
@ -62,9 +69,13 @@ namespace randomx {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__) && defined(HWCAP_AES)
#elif defined(__aarch64__)
#if defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#elif defined(__APPLE__)
aes_ = true;
#endif
#endif
//TODO POWER8 AES
}

@ -42,8 +42,34 @@ namespace randomx {
bool hasAvx2() const {
return avx2_;
}
const char* manufacturer() const {
return (const char*) manufacturer_string;
}
struct ProcessorInfo
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
};
ProcessorInfo processorInfo() const {
return processor_info;
}
private:
bool aes_, ssse3_, avx2_;
int manufacturer_string[4];
union
{
ProcessorInfo processor_info;
int processor_info_data;
};
};
}

@ -36,6 +36,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__)
#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
#define RANDOMX_FORCE_SECURE
#endif

@ -95,6 +95,10 @@ JitCompilerA64::JitCompilerA64()
{
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code), reinterpret_cast<char*>(code + CodeSize));
#endif
}
JitCompilerA64::~JitCompilerA64()

@ -307,6 +307,9 @@ literal_v14: .fill 2,8,0
literal_v15: .fill 2,8,0
DECL(randomx_program_aarch64_vm_instructions_end):
# Calculate dataset pointer for dataset read
# Do it here to break false dependency from readReg2 and readReg3 (see next line)
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
@ -324,8 +327,6 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
# mx <-> ma
ror x9, x9, 32
# Calculate dataset pointer for dataset read
mov w10, w9
DECL(randomx_program_aarch64_cacheline_align_mask2):
# Actual mask will be inserted by JIT compiler
and x10, x10, 1

@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
#include "cpu.hpp"
namespace randomx {
/*
@ -197,6 +198,7 @@ namespace randomx {
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t JZ_SHORT = 0x74;
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
@ -213,11 +215,52 @@ namespace randomx {
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
{},
{0x2E},
{0x2E, 0x2E},
{0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
};
bool JitCompilerX86::BranchesWithin32B = false;
size_t JitCompilerX86::getCodeSize() {
return CodeSize;
}
JitCompilerX86::JitCompilerX86() {
// CPU-specific tweaks
Cpu cpu;
if (strcmp(cpu.manufacturer(), "GenuineIntel") == 0) {
Cpu::ProcessorInfo info = cpu.processorInfo();
// Intel JCC erratum mitigation
if (info.family == 6) {
const uint32_t model = info.model | (info.ext_model << 4);
const uint32_t stepping = info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
code = (uint8_t*)allocMemoryPages(CodeSize);
memcpy(code, codePrologue, prologueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
@ -323,6 +366,22 @@ namespace randomx {
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
uint32_t alignment_size = 32 - (branch_begin & 31);
if (alignment_size > 8) {
emit(NOPX[alignment_size - 9], alignment_size - 8);
alignment_size = 8;
}
emit(NOPX[alignment_size - 1], alignment_size);
}
}
emit(SUB_EBX);
emit(JNZ);
emit32(prologueSize - codePos - 4);
@ -771,18 +830,42 @@ namespace randomx {
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
int32_t jmp_offset = instructionOffsets[target] - (codePos + 16);
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
const uint32_t alignment_size = 32 - (branch_begin & 31);
jmp_offset -= alignment_size;
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size);
}
}
emit(REX_ADD_I);
emitByte(0xc0 + reg);
int shift = instr.getModCond() + ConditionOffset;
const int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
if (jmp_offset >= -128) {
emitByte(JZ_SHORT);
emitByte(jmp_offset);
}
else {
emit(JZ);
emit32(jmp_offset - 4);
}
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;

@ -65,6 +65,9 @@ namespace randomx {
void enableWriting();
void enableExecution();
void enableAll();
static bool BranchesWithin32B;
private:
static InstructionGeneratorX86 engine[256];
std::vector<int32_t> instructionOffsets;

@ -35,6 +35,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
# if defined(__aarch64__) && TARGET_OS_OSX
# define USE_PTHREAD_JIT_WP 1
# include <pthread.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
@ -109,9 +114,17 @@ void* allocMemoryPages(std::size_t bytes) {
#else
#define MEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
#ifdef USE_PTHREAD_JIT_WP
#define PEXTRA PROT_EXEC
#else
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#endif
#endif
return mem;
}
@ -129,11 +142,19 @@ static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
}
void setPagesRW(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(false);
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
#ifdef USE_PTHREAD_JIT_WP
pthread_jit_write_protect_np(true);
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {

Loading…
Cancel
Save