Configurable parameters, part 2

feature/configurable
SChernykh 5 years ago
parent 020958ed9c
commit c28144e54e

@ -27,6 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "soft_aes.h"
#include "randomx.h"
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
@ -157,15 +158,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
#define AES_GEN_4R_KEY7 0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609
template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
@ -174,14 +166,14 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0);
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
key0 = RandomX_CurrentConfig.fillAes4Rx4_Key[0];
key1 = RandomX_CurrentConfig.fillAes4Rx4_Key[1];
key2 = RandomX_CurrentConfig.fillAes4Rx4_Key[2];
key3 = RandomX_CurrentConfig.fillAes4Rx4_Key[3];
key4 = RandomX_CurrentConfig.fillAes4Rx4_Key[4];
key5 = RandomX_CurrentConfig.fillAes4Rx4_Key[5];
key6 = RandomX_CurrentConfig.fillAes4Rx4_Key[6];
key7 = RandomX_CurrentConfig.fillAes4Rx4_Key[7];
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);

@ -1,41 +0,0 @@
; File start: ..\src\configuration.h
RANDOMX_CACHE_ACCESSES EQU 8t
RANDOMX_SUPERSCALAR_LATENCY EQU 170t
RANDOMX_DATASET_BASE_SIZE EQU 2147483648t
RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t
RANDOMX_SCRATCHPAD_L3 EQU 2097152t
RANDOMX_SCRATCHPAD_L2 EQU 262144t
RANDOMX_SCRATCHPAD_L1 EQU 16384t
RANDOMX_JUMP_BITS EQU 8t
RANDOMX_JUMP_OFFSET EQU 8t
RANDOMX_FREQ_IADD_RS EQU 25t
RANDOMX_FREQ_IADD_M EQU 7t
RANDOMX_FREQ_ISUB_R EQU 16t
RANDOMX_FREQ_ISUB_M EQU 7t
RANDOMX_FREQ_IMUL_R EQU 16t
RANDOMX_FREQ_IMUL_M EQU 4t
RANDOMX_FREQ_IMULH_R EQU 4t
RANDOMX_FREQ_IMULH_M EQU 1t
RANDOMX_FREQ_ISMULH_R EQU 4t
RANDOMX_FREQ_ISMULH_M EQU 1t
RANDOMX_FREQ_IMUL_RCP EQU 8t
RANDOMX_FREQ_INEG_R EQU 2t
RANDOMX_FREQ_IXOR_R EQU 15t
RANDOMX_FREQ_IXOR_M EQU 5t
RANDOMX_FREQ_IROR_R EQU 8t
RANDOMX_FREQ_IROL_R EQU 2t
RANDOMX_FREQ_ISWAP_R EQU 4t
RANDOMX_FREQ_FSWAP_R EQU 4t
RANDOMX_FREQ_FADD_R EQU 16t
RANDOMX_FREQ_FADD_M EQU 5t
RANDOMX_FREQ_FSUB_R EQU 16t
RANDOMX_FREQ_FSUB_M EQU 5t
RANDOMX_FREQ_FSCAL_R EQU 6t
RANDOMX_FREQ_FMUL_R EQU 32t
RANDOMX_FREQ_FDIV_M EQU 4t
RANDOMX_FREQ_FSQRT_R EQU 6t
RANDOMX_FREQ_CBRANCH EQU 16t
RANDOMX_FREQ_CFROUND EQU 1t
RANDOMX_FREQ_ISTORE EQU 16t
RANDOMX_FREQ_NOP EQU 0t
; File end: ..\src\configuration.h

@ -549,12 +549,12 @@ namespace randomx {
void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
int shift = instr.getModCond() + ConditionOffset;
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
int32_t imm = instr.getImm32() | (1L << shift);
if (ConditionOffset > 0 || shift > 0)
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
imm &= ~(1L << (shift - 1));
asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
asmCode << "\ttest " << regR[reg] << ", " << (RandomX_CurrentConfig.ConditionMask_Calculated << shift) << std::endl;
asmCode << "\tjz randomx_isn_" << target << std::endl;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
@ -573,39 +573,5 @@ namespace randomx {
tracenop(instr);
}
#include "instruction_weights.hpp"
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
}
InstructionGenerator AssemblyGeneratorX86::engine[256] = {};
}

@ -48,7 +48,7 @@ namespace randomx {
void printCode(std::ostream& os) {
os << asmCode.rdbuf();
}
private:
void genAddressReg(Instruction&, const char*);
void genAddressRegDst(Instruction&, int);
int32_t genAddressImm(Instruction&);

@ -81,7 +81,7 @@ namespace randomx {
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
int opcode = instr.opcode;
if (opcode < ceil_IADD_RS) {
if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS;
@ -100,7 +100,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IADD_M) {
if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M;
@ -118,7 +118,7 @@ namespace randomx {
return;
}
if (opcode < ceil_ISUB_R) {
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_R;
@ -134,7 +134,7 @@ namespace randomx {
return;
}
if (opcode < ceil_ISUB_M) {
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M;
@ -152,7 +152,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IMUL_R) {
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_R;
@ -168,7 +168,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IMUL_M) {
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M;
@ -186,7 +186,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IMULH_R) {
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_R;
@ -196,7 +196,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IMULH_M) {
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M;
@ -214,7 +214,7 @@ namespace randomx {
return;
}
if (opcode < ceil_ISMULH_R) {
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_R;
@ -224,7 +224,7 @@ namespace randomx {
return;
}
if (opcode < ceil_ISMULH_M) {
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M;
@ -242,7 +242,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IMUL_RCP) {
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!isPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
@ -258,7 +258,7 @@ namespace randomx {
return;
}
if (opcode < ceil_INEG_R) {
if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &nreg->r[dst];
@ -266,7 +266,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IXOR_R) {
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_R;
@ -282,7 +282,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IXOR_M) {
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M;
@ -300,7 +300,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IROR_R) {
if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROR_R;
@ -316,7 +316,7 @@ namespace randomx {
return;
}
if (opcode < ceil_IROL_R) {
if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROL_R;
@ -332,7 +332,7 @@ namespace randomx {
return;
}
if (opcode < ceil_ISWAP_R) {
if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
if (src != dst) {
@ -348,7 +348,7 @@ namespace randomx {
return;
}
if (opcode < ceil_FSWAP_R) {
if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
if (dst < RegisterCountFlt)
@ -358,7 +358,7 @@ namespace randomx {
return;
}
if (opcode < ceil_FADD_R) {
if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
@ -367,7 +367,7 @@ namespace randomx {
return;
}
if (opcode < ceil_FADD_M) {
if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
@ -378,7 +378,7 @@ namespace randomx {
return;
}
if (opcode < ceil_FSUB_R) {
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
@ -387,7 +387,7 @@ namespace randomx {
return;
}
if (opcode < ceil_FSUB_M) {
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
@ -398,14 +398,14 @@ namespace randomx {
return;
}
if (opcode < ceil_FSCAL_R) {
if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &nreg->f[dst];
ibc.type = InstructionType::FSCAL_R;
return;
}
if (opcode < ceil_FMUL_R) {
if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
@ -414,7 +414,7 @@ namespace randomx {
return;
}
if (opcode < ceil_FDIV_M) {
if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
@ -425,24 +425,24 @@ namespace randomx {
return;
}
if (opcode < ceil_FSQRT_R) {
if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &nreg->e[dst];
return;
}
if (opcode < ceil_CBRANCH) {
if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) {
ibc.type = InstructionType::CBRANCH;
//jump condition
int creg = instr.dst % RegistersCount;
ibc.idst = &nreg->r[creg];
ibc.target = registerUsage[creg];
int shift = instr.getModCond() + ConditionOffset;
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = ConditionMask << shift;
ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
@ -450,7 +450,7 @@ namespace randomx {
return;
}
if (opcode < ceil_CFROUND) {
if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) {
auto src = instr.src % RegistersCount;
ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::CFROUND;
@ -458,7 +458,7 @@ namespace randomx {
return;
}
if (opcode < ceil_ISTORE) {
if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISTORE;
@ -472,7 +472,7 @@ namespace randomx {
return;
}
if (opcode < ceil_NOP) {
if (opcode < RandomX_CurrentConfig.CEIL_NOP) {
ibc.type = InstructionType::NOP;
return;
}

@ -64,40 +64,6 @@ namespace randomx {
uint32_t memMask;
};
#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr;
constexpr int ceil_NULL = 0;
OPCODE_CEIL_DECLARE(IADD_RS, NULL);
OPCODE_CEIL_DECLARE(IADD_M, IADD_RS);
OPCODE_CEIL_DECLARE(ISUB_R, IADD_M);
OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R);
OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M);
OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R);
OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M);
OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R);
OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M);
OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R);
OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M);
OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP);
OPCODE_CEIL_DECLARE(IXOR_R, INEG_R);
OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R);
OPCODE_CEIL_DECLARE(IROR_R, IXOR_M);
OPCODE_CEIL_DECLARE(IROL_R, IROR_R);
OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R);
OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R);
OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R);
OPCODE_CEIL_DECLARE(FADD_M, FADD_R);
OPCODE_CEIL_DECLARE(FSUB_R, FADD_M);
OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R);
OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M);
OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R);
OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R);
OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M);
OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R);
OPCODE_CEIL_DECLARE(CFROUND, CBRANCH);
OPCODE_CEIL_DECLARE(ISTORE, CFROUND);
OPCODE_CEIL_DECLARE(NOP, ISTORE);
#undef OPCODE_CEIL_DECLARE
#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc

@ -39,46 +39,43 @@ namespace randomx {
//static_assert(RANDOMX_ARGON_MEMORY > 0, "RANDOMX_ARGON_MEMORY must be greater than 0.");
//static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
//static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
//static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
//static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
//static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
//static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
//static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
//static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
//static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
//static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
//static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
//static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
//static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
//static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
//static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
//static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
//static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
//static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
//static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
//static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
//constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
// RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
// RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
// RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
// RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
// RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
// RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
//static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
constexpr uint32_t ArgonBlockSize = 1024;
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1);
constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
#define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated
#define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated
constexpr int StoreL3Condition = 14;
//Prevent some unsafe configurations.
@ -129,15 +126,12 @@ namespace randomx {
double hi;
};
constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t);
constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t);
constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t);
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
#define ScratchpadL1Mask RandomX_CurrentConfig.ScratchpadL1Mask_Calculated
#define ScratchpadL1Mask16 RandomX_CurrentConfig.ScratchpadL1Mask16_Calculated
#define ScratchpadL2Mask RandomX_CurrentConfig.ScratchpadL2Mask_Calculated
#define ScratchpadL2Mask16 RandomX_CurrentConfig.ScratchpadL2Mask16_Calculated
#define ScratchpadL3Mask RandomX_CurrentConfig.ScratchpadL3Mask_Calculated
#define ScratchpadL3Mask64 RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated
constexpr int RegistersCount = 8;
constexpr int RegisterCountFlt = RegistersCount / 2;
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register

@ -28,98 +28,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <stdint.h>
// Increase it if some configs use more cache accesses
#define RANDOMX_CACHE_MAX_ACCESSES 16
struct RandomX_Configuration
{
int ArgonMemory;
int ArgonIterations;
int ArgonLanes;
const char* ArgonSalt;
// Increase it if some configs use larger superscalar latency
#define RANDOMX_SUPERSCALAR_MAX_LATENCY 256
int ProgramSize;
int ProgramIterations;
int ProgramCount;
// Increase it if some configs use larger cache
#define RANDOMX_CACHE_MAX_SIZE 268435456
uint8_t codeShhPrefetchTweaked[64];
// Increase it if some configs use larger dataset
#define RANDOMX_DATASET_MAX_SIZE 2181038080
void initCode();
};
extern const RandomX_Configuration RandomX_DefaultConfig;
extern RandomX_Configuration RandomX_CurrentConfig;
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
//Target latency for SuperscalarHash (in cycles of the reference CPU).
#define RANDOMX_SUPERSCALAR_LATENCY 170
//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE 2147483648
//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368
//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384
//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8
//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
#define RANDOMX_JUMP_OFFSET 8
/*
Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256
*/
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 25
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4
//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
//Control instructions
#define RANDOMX_FREQ_CBRANCH 16
#define RANDOMX_FREQ_CFROUND 1
//Store instruction
#define RANDOMX_FREQ_ISTORE 16
//No-op instruction
#define RANDOMX_FREQ_NOP 0
/* ------
256
*/
// Increase it if some configs use larger scratchpad
#define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152

@ -59,7 +59,7 @@ namespace randomx {
template<class Allocator>
void deallocCache(randomx_cache* cache) {
if (cache->memory != nullptr)
Allocator::freeMemory(cache->memory, RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize);
Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE);
if (cache->jit != nullptr)
delete cache->jit;
}
@ -121,7 +121,7 @@ namespace randomx {
cache->reciprocalCache.clear();
randomx::Blake2Generator gen(key, keySize);
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
for (int i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) {
randomx::generateSuperscalar(cache->programs[i], gen);
for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) {
auto& instr = cache->programs[i](j);
@ -166,7 +166,7 @@ namespace randomx {
rl[5] = rl[0] ^ superscalarAdd5;
rl[6] = rl[0] ^ superscalarAdd6;
rl[7] = rl[0] ^ superscalarAdd7;
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
for (unsigned i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) {
mixBlock = getMixBlock(registerValue, cache->memory);
rx_prefetch_nta(mixBlock);
SuperscalarProgram& prog = cache->programs[i];

@ -48,7 +48,7 @@ struct randomx_cache {
randomx::JitCompiler* jit;
randomx::CacheInitializeFunc* initialize;
randomx::DatasetInitFunc* datasetInit;
randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES];
std::vector<uint64_t> reciprocalCache;
bool isInitialized() {
@ -67,7 +67,7 @@ namespace randomx {
template<class Allocator>
void deallocDataset(randomx_dataset* dataset) {
if (dataset->memory != nullptr)
Allocator::freeMemory(dataset->memory, DatasetSize);
Allocator::freeMemory(dataset->memory, RANDOMX_DATASET_MAX_SIZE);
}
template<class Allocator>

@ -317,74 +317,7 @@ namespace randomx {
os << std::endl;
}
#include "instruction_weights.hpp"
#define INST_NAME(x) REPN(#x, WT(x))
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
const char* Instruction::names[256] = {
INST_NAME(IADD_RS)
INST_NAME(IADD_M)
INST_NAME(ISUB_R)
INST_NAME(ISUB_M)
INST_NAME(IMUL_R)
INST_NAME(IMUL_M)
INST_NAME(IMULH_R)
INST_NAME(IMULH_M)
INST_NAME(ISMULH_R)
INST_NAME(ISMULH_M)
INST_NAME(IMUL_RCP)
INST_NAME(INEG_R)
INST_NAME(IXOR_R)
INST_NAME(IXOR_M)
INST_NAME(IROR_R)
INST_NAME(IROL_R)
INST_NAME(ISWAP_R)
INST_NAME(FSWAP_R)
INST_NAME(FADD_R)
INST_NAME(FADD_M)
INST_NAME(FSUB_R)
INST_NAME(FSUB_M)
INST_NAME(FSCAL_R)
INST_NAME(FMUL_R)
INST_NAME(FDIV_M)
INST_NAME(FSQRT_R)
INST_NAME(CBRANCH)
INST_NAME(CFROUND)
INST_NAME(ISTORE)
INST_NAME(NOP)
};
InstructionFormatter Instruction::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
const char* Instruction::names[256] = {};
InstructionFormatter Instruction::engine[256] = {};
}

@ -212,14 +212,14 @@ namespace randomx {
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, codeReadDataset, readDatasetSize);
memcpy(code + codePos, RandomX_CurrentConfig.codeReadDatasetTweaked, readDatasetSize);
codePos += readDatasetSize;
generateProgramEpilogue(prog);
}
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
generateProgramPrologue(prog, pcfg);
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
emit(RandomX_CurrentConfig.codeReadDatasetLightSshInitTweaked, readDatasetLightInitSize);
emit(ADD_EBX_I);
emit32(datasetOffset / CacheLineSize);
emitByte(CALL);
@ -232,14 +232,14 @@ namespace randomx {
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache) {
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
codePos = superScalarHashOffset + codeSshInitSize;
for (unsigned j = 0; j < N; ++j) {
for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) {
SuperscalarProgram& prog = programs[j];
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
generateSuperscalarCode(instr, reciprocalCache);
}
emit(codeShhLoad, codeSshLoadSize);
if (j < N - 1) {
if (j < RandomX_CurrentConfig.CacheAccesses - 1) {
emit(REX_MOV_RR64);
emitByte(0xd8 + prog.getAddressRegister());
emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize);
@ -258,7 +258,7 @@ namespace randomx {
}
template
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t> &reciprocalCache);
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES], std::vector<uint64_t> &reciprocalCache);
void JitCompilerX86::generateDatasetInitCode() {
memcpy(code, codeDatasetInit, datasetInitSize);
@ -275,7 +275,7 @@ namespace randomx {
emitByte(0xc0 + pcfg.readReg0);
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + pcfg.readReg1);
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
memcpy(code + codePos, RandomX_CurrentConfig.codeLoopLoadTweaked, loopLoadSize);
codePos += loopLoadSize;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
@ -742,14 +742,14 @@ namespace randomx {
int target = registerUsage[reg] + 1;
emit(REX_ADD_I);
emitByte(0xc0 + reg);
int shift = instr.getModCond() + ConditionOffset;
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift);
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
//mark all registers as used
@ -769,40 +769,6 @@ namespace randomx {
emit(NOP1);
}
#include "instruction_weights.hpp"
#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
InstructionGeneratorX86 JitCompilerX86::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
}

@ -64,7 +64,7 @@ namespace randomx {
return code;
}
size_t getCodeSize();
private:
static InstructionGeneratorX86 engine[256];
std::vector<int32_t> instructionOffsets;
int registerUsage[RegistersCount];

@ -55,11 +55,9 @@
.global DECL(randomx_program_end)
.global DECL(randomx_reciprocal_fast)
#include "configuration.h"
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1)
#define RANDOMX_SCRATCHPAD_MASK 2097088
#define RANDOMX_DATASET_BASE_MASK 2147483584
#define RANDOMX_CACHE_MASK 4194303
#define db .byte

@ -46,10 +46,8 @@ PUBLIC randomx_sshash_init
PUBLIC randomx_program_end
PUBLIC randomx_reciprocal_fast
include asm/configuration.asm
RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64)
RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64)
RANDOMX_SCRATCHPAD_MASK EQU 2097088
RANDOMX_DATASET_BASE_MASK EQU 2147483584
RANDOMX_CACHE_MASK EQU 4194303
ALIGN 64

@ -34,36 +34,207 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "vm_compiled_light.hpp"
#include "blake2/blake2.h"
#include "jit_compiler_x86_static.hpp"
#include "assembly_generator_x86.hpp"
#include <cassert>
const RandomX_Configuration RandomX_DefaultConfig = {
/* ArgonMemory */ 262144,
/* ArgonIterations */ 3,
/* ArgonLanes */ 1,
/* ArgonSalt */ "RandomX\x03",
/* ProgramSize */ 256,
/* ProgramIterations */ 2048,
/* ProgramCount */ 8,
/* codeShhPrefetchTweaked */ {},
};
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
{
ArgonSalt = "RandomWOW\x01";
ProgramIterations = 1024;
ProgramCount = 16;
ScratchpadL2_Size = 131072;
ScratchpadL3_Size = 1048576;
RANDOMX_FREQ_IROR_R = 10;
RANDOMX_FREQ_IROL_R = 0;
RANDOMX_FREQ_FSWAP_R = 8;
RANDOMX_FREQ_FADD_R = 20;
RANDOMX_FREQ_FSUB_R = 20;
RANDOMX_FREQ_FMUL_R = 20;
fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d);
fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450);
fillAes4Rx4_Key[2] = rx_set_int_vec_i128(0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4);
fillAes4Rx4_Key[3] = rx_set_int_vec_i128(0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a);
fillAes4Rx4_Key[4] = fillAes4Rx4_Key[0];
fillAes4Rx4_Key[5] = fillAes4Rx4_Key[1];
fillAes4Rx4_Key[6] = fillAes4Rx4_Key[2];
fillAes4Rx4_Key[7] = fillAes4Rx4_Key[3];
}
RandomX_Configuration RandomX_CurrentConfig = RandomX_DefaultConfig;
RandomX_ConfigurationLoki::RandomX_ConfigurationLoki()
{
ArgonIterations = 4;
ArgonLanes = 2;
ArgonSalt = "RandomXL\x12";
ProgramSize = 320;
ProgramCount = 7;
}
void RandomX_Configuration::initCode()
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
: ArgonMemory(262144)
, ArgonIterations(3)
, ArgonLanes(1)
, ArgonSalt("RandomX\x03")
, CacheAccesses(8)
, SuperscalarLatency(170)
, DatasetBaseSize(2147483648)
, DatasetExtraSize(33554368)
, ScratchpadL1_Size(16384)
, ScratchpadL2_Size(262144)
, ScratchpadL3_Size(2097152)
, ProgramSize(256)
, ProgramIterations(2048)
, ProgramCount(8)
, JumpBits(8)
, JumpOffset(8)
, RANDOMX_FREQ_IADD_RS(25)
, RANDOMX_FREQ_IADD_M(7)
, RANDOMX_FREQ_ISUB_R(16)
, RANDOMX_FREQ_ISUB_M(7)
, RANDOMX_FREQ_IMUL_R(16)
, RANDOMX_FREQ_IMUL_M(4)
, RANDOMX_FREQ_IMULH_R(4)
, RANDOMX_FREQ_IMULH_M(1)
, RANDOMX_FREQ_ISMULH_R(4)
, RANDOMX_FREQ_ISMULH_M(1)
, RANDOMX_FREQ_IMUL_RCP(8)
, RANDOMX_FREQ_INEG_R(2)
, RANDOMX_FREQ_IXOR_R(15)
, RANDOMX_FREQ_IXOR_M(5)
, RANDOMX_FREQ_IROR_R(8)
, RANDOMX_FREQ_IROL_R(2)
, RANDOMX_FREQ_ISWAP_R(4)
, RANDOMX_FREQ_FSWAP_R(4)
, RANDOMX_FREQ_FADD_R(16)
, RANDOMX_FREQ_FADD_M(5)
, RANDOMX_FREQ_FSUB_R(16)
, RANDOMX_FREQ_FSUB_M(5)
, RANDOMX_FREQ_FSCAL_R(6)
, RANDOMX_FREQ_FMUL_R(32)
, RANDOMX_FREQ_FDIV_M(4)
, RANDOMX_FREQ_FSQRT_R(6)
, RANDOMX_FREQ_CBRANCH(16)
, RANDOMX_FREQ_CFROUND(1)
, RANDOMX_FREQ_ISTORE(16)
, RANDOMX_FREQ_NOP(0)
{
const uint8_t* a = (const uint8_t*) &randomx_sshash_prefetch;
const uint8_t* b = (const uint8_t*) &randomx_sshash_end;
memcpy(codeShhPrefetchTweaked, a, b - a);
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd);
fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450);
fillAes4Rx4_Key[2] = rx_set_int_vec_i128(0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904);
fillAes4Rx4_Key[3] = rx_set_int_vec_i128(0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763);
fillAes4Rx4_Key[4] = rx_set_int_vec_i128(0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73);
fillAes4Rx4_Key[5] = rx_set_int_vec_i128(0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3);
fillAes4Rx4_Key[6] = rx_set_int_vec_i128(0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7);
fillAes4Rx4_Key[7] = rx_set_int_vec_i128(0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609);
}
extern "C" {
void RandomX_ConfigurationBase::Apply()
{
#if defined(_M_X64) || defined(__x86_64__)
{
const uint8_t* a = (const uint8_t*)&randomx_sshash_prefetch;
const uint8_t* b = (const uint8_t*)&randomx_sshash_end;
memcpy(codeShhPrefetchTweaked, a, b - a);
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
}
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
{
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
memcpy(codeReadDatasetTweaked, a, b - a);
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
}
{
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_fin;
memcpy(codeReadDatasetLightSshInitTweaked, a, b - a);
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
}
#endif
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
ScratchpadL2Mask16_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) / 2 - 1) * 16;
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
#if defined(_M_X64) || defined(__x86_64__)
{
const uint8_t* a = (const uint8_t*)&randomx_program_loop_load;
const uint8_t* b = (const uint8_t*)&randomx_program_start;
memcpy(codeLoopLoadTweaked, a, b - a);
*(uint32_t*)(codeLoopLoadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codeLoopLoadTweaked + 50) = ScratchpadL3Mask64_Calculated;
}
#endif
ConditionMask_Calculated = (1 << JumpBits) - 1;
constexpr int CEIL_NULL = 0;
int k = 0;
void randomx_apply_config(const RandomX_Configuration* config) {
RandomX_CurrentConfig = *config;
RandomX_CurrentConfig.initCode();
#if defined(_M_X64) || defined(__x86_64__)
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
#else
#define JIT_HANDLE(x, prev)
#endif
#define INST_HANDLE(x, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
for (int i = 0; i < RANDOMX_FREQ_##x; ++i, ++k) \
{ \
randomx::AssemblyGeneratorX86::engine[k] = &randomx::AssemblyGeneratorX86::h_##x; \
JIT_HANDLE(x, prev); \
}
INST_HANDLE(IADD_RS, NULL);
INST_HANDLE(IADD_M, IADD_RS);
INST_HANDLE(ISUB_R, IADD_M);
INST_HANDLE(ISUB_M, ISUB_R);
INST_HANDLE(IMUL_R, ISUB_M);
INST_HANDLE(IMUL_M, IMUL_R);
INST_HANDLE(IMULH_R, IMUL_M);
INST_HANDLE(IMULH_M, IMULH_R);
INST_HANDLE(ISMULH_R, IMULH_M);
INST_HANDLE(ISMULH_M, ISMULH_R);
INST_HANDLE(IMUL_RCP, ISMULH_M);
INST_HANDLE(INEG_R, IMUL_RCP);
INST_HANDLE(IXOR_R, INEG_R);
INST_HANDLE(IXOR_M, IXOR_R);
INST_HANDLE(IROR_R, IXOR_M);
INST_HANDLE(IROL_R, IROR_R);
INST_HANDLE(ISWAP_R, IROL_R);
INST_HANDLE(FSWAP_R, ISWAP_R);
INST_HANDLE(FADD_R, FSWAP_R);
INST_HANDLE(FADD_M, FADD_R);
INST_HANDLE(FSUB_R, FADD_M);
INST_HANDLE(FSUB_M, FSUB_R);
INST_HANDLE(FSCAL_R, FSUB_M);
INST_HANDLE(FMUL_R, FSCAL_R);
INST_HANDLE(FDIV_M, FMUL_R);
INST_HANDLE(FSQRT_R, FDIV_M);
INST_HANDLE(CBRANCH, FSQRT_R);
INST_HANDLE(CFROUND, CBRANCH);
INST_HANDLE(ISTORE, CFROUND);
INST_HANDLE(NOP, ISTORE);
#undef INST_HANDLE
}
RandomX_ConfigurationMonero RandomX_MoneroConfig;
RandomX_ConfigurationWownero RandomX_WowneroConfig;
RandomX_ConfigurationLoki RandomX_LokiConfig;
RandomX_ConfigurationBase RandomX_CurrentConfig;
extern "C" {
randomx_cache *randomx_alloc_cache(randomx_flags flags) {
randomx_cache *cache;
@ -75,7 +246,7 @@ extern "C" {
cache->jit = nullptr;
cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize);
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break;
case RANDOMX_FLAG_JIT:
@ -83,7 +254,7 @@ extern "C" {
cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize);
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break;
case RANDOMX_FLAG_LARGE_PAGES:
@ -91,7 +262,7 @@ extern "C" {
cache->jit = nullptr;
cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize);
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
@ -99,7 +270,7 @@ extern "C" {
cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize);
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break;
default:
@ -135,11 +306,11 @@ extern "C" {
dataset = new randomx_dataset();
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize);
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
else {
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize);
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
}
catch (std::exception &ex) {
@ -152,7 +323,7 @@ extern "C" {
return dataset;
}
constexpr unsigned long DatasetItemCount = randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE;
#define DatasetItemCount ((RandomX_CurrentConfig.DatasetBaseSize + RandomX_CurrentConfig.DatasetExtraSize) / RANDOMX_DATASET_ITEM_SIZE)
unsigned long randomx_dataset_item_count() {
return DatasetItemCount;

@ -30,6 +30,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_H
#include <stddef.h>
#include <stdint.h>
#include <type_traits>
#include "intrin_portable.h"
#define RANDOMX_HASH_SIZE 32
#define RANDOMX_DATASET_ITEM_SIZE 64
@ -49,14 +52,139 @@ typedef enum {
typedef struct randomx_dataset randomx_dataset;
typedef struct randomx_cache randomx_cache;
typedef struct randomx_vm randomx_vm;
typedef struct RandomX_Configuration;
struct RandomX_ConfigurationBase
{
RandomX_ConfigurationBase();
void Apply();
uint32_t ArgonMemory;
uint32_t ArgonIterations;
uint32_t ArgonLanes;
const char* ArgonSalt;
uint32_t CacheAccesses;
uint32_t SuperscalarLatency;
uint32_t DatasetBaseSize;
uint32_t DatasetExtraSize;
uint32_t ScratchpadL1_Size;
uint32_t ScratchpadL2_Size;
uint32_t ScratchpadL3_Size;
uint32_t ProgramSize;
uint32_t ProgramIterations;
uint32_t ProgramCount;
uint32_t JumpBits;
uint32_t JumpOffset;
uint32_t RANDOMX_FREQ_IADD_RS;
uint32_t RANDOMX_FREQ_IADD_M;
uint32_t RANDOMX_FREQ_ISUB_R;
uint32_t RANDOMX_FREQ_ISUB_M;
uint32_t RANDOMX_FREQ_IMUL_R;
uint32_t RANDOMX_FREQ_IMUL_M;
uint32_t RANDOMX_FREQ_IMULH_R;
uint32_t RANDOMX_FREQ_IMULH_M;
uint32_t RANDOMX_FREQ_ISMULH_R;
uint32_t RANDOMX_FREQ_ISMULH_M;
uint32_t RANDOMX_FREQ_IMUL_RCP;
uint32_t RANDOMX_FREQ_INEG_R;
uint32_t RANDOMX_FREQ_IXOR_R;
uint32_t RANDOMX_FREQ_IXOR_M;
uint32_t RANDOMX_FREQ_IROR_R;
uint32_t RANDOMX_FREQ_IROL_R;
uint32_t RANDOMX_FREQ_ISWAP_R;
uint32_t RANDOMX_FREQ_FSWAP_R;
uint32_t RANDOMX_FREQ_FADD_R;
uint32_t RANDOMX_FREQ_FADD_M;
uint32_t RANDOMX_FREQ_FSUB_R;
uint32_t RANDOMX_FREQ_FSUB_M;
uint32_t RANDOMX_FREQ_FSCAL_R;
uint32_t RANDOMX_FREQ_FMUL_R;
uint32_t RANDOMX_FREQ_FDIV_M;
uint32_t RANDOMX_FREQ_FSQRT_R;
uint32_t RANDOMX_FREQ_CBRANCH;
uint32_t RANDOMX_FREQ_CFROUND;
uint32_t RANDOMX_FREQ_ISTORE;
uint32_t RANDOMX_FREQ_NOP;
rx_vec_i128 fillAes4Rx4_Key[8];
uint8_t codeShhPrefetchTweaked[20];
uint8_t codeReadDatasetTweaked[64];
uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codeLoopLoadTweaked[140];
uint32_t CacheLineAlignMask_Calculated;
uint32_t DatasetExtraItems_Calculated;
uint32_t ScratchpadL1Mask_Calculated;
uint32_t ScratchpadL1Mask16_Calculated;
uint32_t ScratchpadL2Mask_Calculated;
uint32_t ScratchpadL2Mask16_Calculated;
uint32_t ScratchpadL3Mask_Calculated;
uint32_t ScratchpadL3Mask64_Calculated;
uint32_t ConditionMask_Calculated;
int CEIL_IADD_RS;
int CEIL_IADD_M;
int CEIL_ISUB_R;
int CEIL_ISUB_M;
int CEIL_IMUL_R;
int CEIL_IMUL_M;
int CEIL_IMULH_R;
int CEIL_IMULH_M;
int CEIL_ISMULH_R;
int CEIL_ISMULH_M;
int CEIL_IMUL_RCP;
int CEIL_INEG_R;
int CEIL_IXOR_R;
int CEIL_IXOR_M;
int CEIL_IROR_R;
int CEIL_IROL_R;
int CEIL_ISWAP_R;
int CEIL_FSWAP_R;
int CEIL_FADD_R;
int CEIL_FADD_M;
int CEIL_FSUB_R;
int CEIL_FSUB_M;
int CEIL_FSCAL_R;
int CEIL_FMUL_R;
int CEIL_FDIV_M;
int CEIL_FSQRT_R;
int CEIL_CBRANCH;
int CEIL_CFROUND;
int CEIL_ISTORE;
int CEIL_NOP;
};
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); };
struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); };
extern RandomX_ConfigurationMonero RandomX_MoneroConfig;
extern RandomX_ConfigurationWownero RandomX_WowneroConfig;
extern RandomX_ConfigurationLoki RandomX_LokiConfig;
extern RandomX_ConfigurationBase RandomX_CurrentConfig;
template<typename T>
void randomx_apply_config(const T& config)
{
static_assert(sizeof(T) == sizeof(RandomX_ConfigurationBase), "Invalid RandomX configuration struct size");
static_assert(std::is_base_of<RandomX_ConfigurationBase, T>::value, "Incompatible RandomX configuration struct");
RandomX_CurrentConfig = config;
RandomX_CurrentConfig.Apply();
}
#if defined(__cplusplus)
extern "C" {
#endif
RANDOMX_EXPORT void randomx_apply_config(const RandomX_Configuration* config);
/**
* Creates a randomx_cache structure and allocates memory for RandomX Cache.
*

@ -578,7 +578,7 @@ namespace randomx {
const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP);
constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4;
constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_MAX_LATENCY + 4;
constexpr int LOOK_FORWARD_CYCLES = 4;
constexpr int MAX_THROWAWAY_COUNT = 256;
@ -586,7 +586,7 @@ namespace randomx {
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
//port P1 (multiplication) by instructions that can go to any port.
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
for (; cycle < RandomX_CurrentConfig.SuperscalarLatency + 4; ++cycle) {
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
if (commit) {
if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
@ -631,7 +631,7 @@ namespace randomx {
}
else {
//macro-ops with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
for (; cycle < RandomX_CurrentConfig.SuperscalarLatency + 4; ++cycle) {
int cycle1 = scheduleUop<false>(mop.getUop1(), portBusy, cycle);
int cycle2 = scheduleUop<false>(mop.getUop2(), portBusy, cycle);
@ -674,7 +674,7 @@ namespace randomx {
//Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always
//saturated first. The cycle limit is present only to guarantee loop termination.
//Program size is limited to SuperscalarMaxSize instructions.
for (decodeCycle = 0; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY && !portsSaturated && programSize < SuperscalarMaxSize; ++decodeCycle) {
for (decodeCycle = 0; decodeCycle < RandomX_CurrentConfig.SuperscalarLatency && !portsSaturated && programSize < 3 * RandomX_CurrentConfig.SuperscalarLatency + 2; ++decodeCycle) {
//select a decode configuration
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
@ -688,7 +688,7 @@ namespace randomx {
//if we have issued all macro-ops for the current RandomX instruction, create a new instruction
if (macroOpIndex >= currentInstruction.getInfo().getSize()) {
if (portsSaturated || programSize >= SuperscalarMaxSize)
if (portsSaturated || programSize >= 3 * RandomX_CurrentConfig.SuperscalarLatency + 2)
break;
//select an instruction so that the first macro-op fits into the current slot
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
@ -782,7 +782,7 @@ namespace randomx {
macroOpCount++;
//terminating condition
if (scheduleCycle >= RANDOMX_SUPERSCALAR_LATENCY) {
if (scheduleCycle >= RandomX_CurrentConfig.SuperscalarLatency) {
portsSaturated = true;
}
cycle = topCycle;
@ -836,7 +836,7 @@ namespace randomx {
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
int portCycles = 0;
for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
for (int i = 0; i < RandomX_Config.SuperscalarLatency + 4; ++i) {
std::cout << "; " << std::setw(3) << i << " ";
for (int j = 0; j < 3; ++j) {
std::cout << (portBusy[i][j] ? '*' : '_');

@ -157,8 +157,8 @@ int main(int argc, char** argv) {
AtomicHash result;
std::vector<randomx_vm*> vms;
std::vector<std::thread> threads;
randomx_dataset* dataset;
randomx_cache* cache;
randomx_dataset* dataset = nullptr;
randomx_cache* cache = nullptr;
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
if (miningMode) {
@ -202,7 +202,9 @@ int main(int argc, char** argv) {
std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")");
std::cout << " ..." << std::endl;
randomx_apply_config(&RandomX_DefaultConfig);
randomx_apply_config(RandomX_MoneroConfig);
//randomx_apply_config(RandomX_WowneroConfig);
//randomx_apply_config(RandomX_LokiConfig);
try {
if (jit && !RANDOMX_HAVE_COMPILER) {

@ -78,7 +78,7 @@ void randomx_vm::initialize() {
store64(&reg.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5)));
store64(&reg.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6)));
store64(&reg.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7)));
mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask;
mem.ma = program.getEntropy(8) & CacheLineAlignMask;
mem.mx = program.getEntropy(10);
auto addressRegisters = program.getEntropy(12);
config.readReg0 = 0 + (addressRegisters & 1);
@ -88,7 +88,7 @@ void randomx_vm::initialize() {
config.readReg2 = 4 + (addressRegisters & 1);
addressRegisters >>= 1;
config.readReg3 = 6 + (addressRegisters & 1);
datasetOffset = (program.getEntropy(13) % (randomx::DatasetExtraItems + 1)) * randomx::CacheLineSize;
datasetOffset = (program.getEntropy(13) % (DatasetExtraItems + 1)) * randomx::CacheLineSize;
store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14)));
store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15)));
}
@ -99,7 +99,7 @@ namespace randomx {
template<class Allocator, bool softAes>
VmBase<Allocator, softAes>::~VmBase() {
Allocator::freeMemory(scratchpad, ScratchpadSize);
Allocator::freeMemory(scratchpad, RANDOMX_SCRATCHPAD_L3_MAX_SIZE);
}
template<class Allocator, bool softAes>
@ -111,7 +111,7 @@ namespace randomx {
tmp = rx_aesenc_vec_i128(tmp, tmp);
rx_store_vec_i128((rx_vec_i128*)&aesDummy, tmp);
}
scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize);
scratchpad = (uint8_t*)Allocator::allocMemory(RANDOMX_SCRATCHPAD_L3_MAX_SIZE);
}
template<class Allocator, bool softAes>

@ -124,10 +124,7 @@
</AdditionalOptions>
<StackReserveSize>4194304</StackReserveSize>
</Link>
<PreBuildEvent>
<Command>powershell -ExecutionPolicy Bypass -File .\h2inc.ps1 ..\src\configuration.h &gt; ..\src\asm\configuration.asm
SET ERRORLEVEL = 0</Command>
</PreBuildEvent>
<PreBuildEvent />
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\src\allocator.cpp" />
@ -154,7 +151,10 @@ SET ERRORLEVEL = 0</Command>
<ClCompile Include="..\src\virtual_memory.cpp" />
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\jit_compiler_x86_static.asm" />
<MASM Include="..\src\jit_compiler_x86_static.asm">
<EnableAssemblyGeneratedCodeListing Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</EnableAssemblyGeneratedCodeListing>
<AssembledCodeListingFile Condition="'$(Configuration)|$(Platform)'=='Release|x64'">jit_compiler_x86_static.lst</AssembledCodeListingFile>
</MASM>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\allocator.hpp" />

Loading…
Cancel
Save