Formatting & refactoring

pull/40/head
tevador 5 years ago
parent 87b8253374
commit 4c66b2305a

@ -34,8 +34,6 @@ namespace randomx {
static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
static const char* fsumInstr[4] = { "paddb", "paddw", "paddd", "paddq" };
static const char* regA4 = "xmm12";
static const char* dblMin = "xmm13";
static const char* absMask = "xmm14";
@ -58,7 +56,6 @@ namespace randomx {
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
generateCode(instr, i);
//asmCode << std::endl;
}
}
@ -494,7 +491,6 @@ namespace randomx {
//2 uOPs
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
if (instr.src != instr.dst) {
//std::swap(registerUsage[instr.dst], registerUsage[instr.src]);
registerUsage[instr.dst] = i;
registerUsage[instr.src] = i;
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
@ -516,7 +512,6 @@ namespace randomx {
instr.dst %= 4;
instr.src %= 4;
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
traceflt(instr);
}
@ -534,7 +529,6 @@ namespace randomx {
instr.dst %= 4;
instr.src %= 4;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
traceflt(instr);
}

@ -40,12 +40,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Dataset size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024)
//Number of blocks per epoch
#define RANDOMX_EPOCH_BLOCKS 2048
//Number of blocks between the seed block and the start of new epoch
#define RANDOMX_EPOCH_LAG 64
//Number of instructions in a RandomX program
#define RANDOMX_PROGRAM_SIZE 256

@ -40,13 +40,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "argon2.h"
#include "argon2_core.h"
#if defined(__SSE2__)
#include <wmmintrin.h>
#define PREFETCHNTA(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
#else
#define PREFETCH(memory)
#endif
randomx_dataset::~randomx_dataset() {
}

@ -43,30 +43,27 @@ namespace randomx {
constexpr int ISMULH_R = 10;
constexpr int ISMULH_M = 11;
constexpr int IMUL_RCP = 12;
//constexpr int ISDIV_C = 13;
constexpr int INEG_R = 14;
constexpr int IXOR_R = 15;
constexpr int IXOR_M = 16;
constexpr int IROR_R = 17;
constexpr int IROL_R = 18;
constexpr int ISWAP_R = 19;
constexpr int FSWAP_R = 20;
constexpr int FADD_R = 21;
constexpr int FADD_M = 22;
constexpr int FSUB_R = 23;
constexpr int FSUB_M = 24;
constexpr int FSCAL_R = 25;
constexpr int FMUL_R = 26;
constexpr int FMUL_M = 27;
constexpr int FDIV_R = 28;
constexpr int FDIV_M = 29;
constexpr int FSQRT_R = 30;
constexpr int COND_R = 31;
constexpr int COND_M = 32;
constexpr int CFROUND = 33;
constexpr int ISTORE = 34;
constexpr int FSTORE = 35;
constexpr int NOP = 36;
constexpr int INEG_R = 13;
constexpr int IXOR_R = 14;
constexpr int IXOR_M = 15;
constexpr int IROR_R = 16;
constexpr int IROL_R = 17;
constexpr int ISWAP_R = 18;
constexpr int FSWAP_R = 19;
constexpr int FADD_R = 20;
constexpr int FADD_M = 21;
constexpr int FSUB_R = 22;
constexpr int FSUB_M = 23;
constexpr int FSCAL_R = 24;
constexpr int FMUL_R = 25;
constexpr int FDIV_M = 26;
constexpr int FSQRT_R = 27;
constexpr int COND_R = 28;
constexpr int COND_M = 29;
constexpr int CFROUND = 30;
constexpr int ISTORE = 31;
constexpr int FSTORE = 32;
constexpr int NOP = 33;
}
class Instruction {

@ -73,14 +73,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define HAVE_SMULH
#endif
static void setRoundMode__(uint32_t mode) {
static void setRoundMode_(uint32_t mode) {
_controlfp(mode, _MCW_RC);
}
#define HAVE_SETROUNDMODE_IMPL
#endif
#ifndef HAVE_SETROUNDMODE_IMPL
static void setRoundMode__(uint32_t mode) {
static void setRoundMode_(uint32_t mode) {
fesetround(mode);
}
#endif
@ -135,7 +135,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#if defined(__has_builtin)
#if __has_builtin(__builtin_sub_overflow)
static inline bool subOverflow__(uint32_t a, uint32_t b) {
static inline bool subOverflow_(uint32_t a, uint32_t b) {
int32_t temp;
return __builtin_sub_overflow(unsigned32ToSigned2sCompl(a), unsigned32ToSigned2sCompl(b), &temp);
}
@ -144,7 +144,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#endif
#ifndef HAVE_SUB_OVERFLOW
static inline bool subOverflow__(uint32_t a, uint32_t b) {
static inline bool subOverflow_(uint32_t a, uint32_t b) {
auto c = unsigned32ToSigned2sCompl(a - b);
return (c < unsigned32ToSigned2sCompl(a)) != (unsigned32ToSigned2sCompl(b) > 0);
}
@ -166,16 +166,16 @@ static inline double FlushNaN(double x) {
void setRoundMode(uint32_t rcflag) {
switch (rcflag & 3) {
case RoundDown:
setRoundMode__(FE_DOWNWARD);
setRoundMode_(FE_DOWNWARD);
break;
case RoundUp:
setRoundMode__(FE_UPWARD);
setRoundMode_(FE_UPWARD);
break;
case RoundToZero:
setRoundMode__(FE_TOWARDZERO);
setRoundMode_(FE_TOWARDZERO);
break;
case RoundToNearest:
setRoundMode__(FE_TONEAREST);
setRoundMode_(FE_TONEAREST);
break;
default:
UNREACHABLE;
@ -194,9 +194,9 @@ bool condition(uint32_t type, uint32_t value, uint32_t imm32) {
case 3:
return unsigned32ToSigned2sCompl(value - imm32) >= 0;
case 4:
return subOverflow__(value, imm32);
return subOverflow_(value, imm32);
case 5:
return !subOverflow__(value, imm32);
return !subOverflow_(value, imm32);
case 6:
return unsigned32ToSigned2sCompl(value) < unsigned32ToSigned2sCompl(imm32);
case 7:

@ -103,13 +103,11 @@ namespace randomx {
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm14 -> exponent 2**-240 = 0x30f00000000xxxxx30f00000000xxxxx
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/
#define NOP_TEST true
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
@ -254,18 +252,10 @@ namespace randomx {
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
generateProgramPrologue(prog, pcfg);
//if (superscalar) {
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
emitByte(CALL);
emit32(superScalarHashOffset - (codePos + 4));
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
/*}
else {
memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
codePos += readDatasetLightSize;
emitByte(CALL);
emit32(readDatasetLightSubOffset - (codePos + 4));
}*/
generateProgramEpilogue(prog);
}
@ -483,10 +473,6 @@ namespace randomx {
emitByte(0xc0 + instr.dst);
emit32(instr.getImm32());
}*/
if (false && NOP_TEST) {
emit(NOP4);
return;
}
emit(REX_LEA);
if (instr.dst == RegisterNeedsDisplacement)
emitByte(0xac);
@ -527,18 +513,10 @@ namespace randomx {
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
if (false && NOP_TEST) {
emit(NOP3);
return;
}
emit(REX_SUB_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
if (false && NOP_TEST) {
emit(NOP7);
return;
}
emit(REX_81);
emitByte(0xe8 + instr.dst);
emit32(instr.getImm32());
@ -571,18 +549,10 @@ namespace randomx {
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
if (false && NOP_TEST) {
emit(NOP4);
return;
}
emit(REX_IMUL_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
if (false && NOP_TEST) {
emit(NOP7);
return;
}
emit(REX_IMUL_RRI);
emitByte(0xc0 + 9 * instr.dst);
emit32(instr.getImm32());
@ -606,12 +576,6 @@ namespace randomx {
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (false && NOP_TEST) {
emit(NOP3);
emit(NOP3);
emit(NOP3);
return;
}
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
@ -641,12 +605,6 @@ namespace randomx {
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (false && NOP_TEST) {
emit(NOP3);
emit(NOP3);
emit(NOP3);
return;
}
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
@ -676,13 +634,6 @@ namespace randomx {
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
if (instr.getImm32() != 0) {
if (false && NOP_TEST) {
emitByte(0x66);
emitByte(0x66);
emit(NOP8);
emit(NOP4);
return;
}
registerUsage[instr.dst] = i;
emit(MOV_RAX_I);
emit64(randomx_reciprocal(instr.getImm32()));
@ -704,18 +655,10 @@ namespace randomx {
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
if (false && NOP_TEST) {
emit(NOP3);
return;
}
emit(REX_XOR_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
if (false && NOP_TEST) {
emit(NOP7);
return;
}
emit(REX_XOR_RI);
emitByte(0xf0 + instr.dst);
emit32(instr.getImm32());
@ -740,21 +683,12 @@ namespace randomx {
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
if (false && NOP_TEST) {
emit(NOP3);
emit(NOP3);
return;
}
emit(REX_MOV_RR);
emitByte(0xc8 + instr.src);
emit(REX_ROT_CL);
emitByte(0xc8 + instr.dst);
}
else {
if (false && NOP_TEST) {
emit(NOP4);
return;
}
emit(REX_ROT_I8);
emitByte(0xc8 + instr.dst);
emitByte(instr.getImm32() & 63);
@ -949,21 +883,14 @@ namespace randomx {
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
if (false && NOP_TEST) {
emit(NOP7);
emit(NOP7);
emit(NOP6);
}
else {
emit(REX_ADD_I);
emitByte(0xc0 + reg);
emit32(1 << shift);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(conditionMask);
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
}
emit(REX_ADD_I);
emitByte(0xc0 + reg);
emit32(1 << shift);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(conditionMask);
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
registerUsage[j] = i;
}
@ -973,13 +900,6 @@ namespace randomx {
#ifdef RANDOMX_JUMP
handleCondition(instr, i);
#endif
if (false && NOP_TEST) {
emit(NOP3);
emit(NOP7);
emit(NOP3);
emit(NOP3);
return;
}
emit(XOR_ECX_ECX);
emit(REX_CMP_R32I);
emitByte(0xf8 + instr.src);

@ -50,8 +50,8 @@ extern "C" {
* subsequent Dataset initialization faster
*
* @return Pointer to an allocated randomx_cache structure.
NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT
is set and JIT compilation is not supported on the current platform.
* NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT
* is set and JIT compilation is not supported on the current platform.
*/
randomx_cache *randomx_alloc_cache(randomx_flags flags);
@ -76,9 +76,9 @@ void randomx_release_cache(randomx_cache* cache);
*
* @param flags is the initialization flags. Only one flag is supported (can be set or not set):
* RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
*
* @return Pointer to an allocated randomx_cache structure.
NULL is returned if memory allocation fails.
* NULL is returned if memory allocation fails.
*/
randomx_dataset *randomx_alloc_dataset(randomx_flags flags);

@ -700,7 +700,7 @@ namespace randomx {
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
if (scheduleCycle < 0) {
/*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
//__debugbreak();
portsSaturated = true;
break;
@ -725,7 +725,7 @@ namespace randomx {
continue;
}
//abort this decode buffer
/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
currentInstruction = SuperscalarInstruction::Null;
break;
}
@ -748,7 +748,7 @@ namespace randomx {
continue;
}
//abort this decode buffer
/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
currentInstruction = SuperscalarInstruction::Null;
break;
}

@ -42,7 +42,6 @@ namespace randomx {
template<class Allocator, bool softAes>
void CompiledVm<Allocator, softAes>::execute() {
//executeProgram(reg, mem, scratchpad, InstructionCount);
compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS);
}

@ -28,10 +28,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace randomx {
extern "C" {
void executeProgram(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
}
template<class Allocator, bool softAes>
class CompiledVm : public VmBase<Allocator, softAes> {
public:

@ -115,7 +115,6 @@ namespace randomx {
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic];
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
//if(trace) printState(r, f, e, a);
switch (ibc.type)
{
case InstructionType::IADD_RS: {
@ -237,15 +236,9 @@ namespace randomx {
*ibc.creg += (1 << ibc.shift);
const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
if ((*ibc.creg & conditionMask) == 0) {
#ifdef STATS
count_JUMP_taken++;
#endif
ic = ibc.target;
break;
}
#ifdef STATS
count_JUMP_not_taken++;
#endif
#endif
*ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0;
} break;
@ -255,15 +248,9 @@ namespace randomx {
*ibc.creg += (1uLL << ibc.shift);
const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
if ((*ibc.creg & conditionMask) == 0) {
#ifdef STATS
count_JUMP_taken++;
#endif
ic = ibc.target;
break;
}
#ifdef STATS
count_JUMP_not_taken++;
#endif
#endif
*ibc.idst += condition(ibc.condition, load64(getScratchpadAddress(ibc)), ibc.imm) ? 1 : 0;
} break;
@ -328,7 +315,6 @@ namespace randomx {
}
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
//std::cout << "Iteration " << iter << std::endl;
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
spAddr0 ^= spMix;
spAddr0 &= ScratchpadL3Mask64;
@ -366,7 +352,6 @@ namespace randomx {
mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
mem.mx &= CacheLineAlignMask;
datasetRead(mem.ma, r);
//executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);
std::swap(mem.mx, mem.ma);
if (trace) {
@ -450,22 +435,6 @@ namespace randomx {
r[i] ^= datasetLine[i];
}
/*template<bool superscalar>
void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(SuperscalarProgram* programs) {
memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
reciprocals.clear();
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
for (unsigned j = 0; j < superScalarPrograms[i].getSize(); ++j) {
Instruction& instr = superScalarPrograms[i](j);
if (instr.opcode == SuperscalarInstructionType::IMUL_RCP) {
auto rcp = reciprocal(instr.getImm32());
instr.setImm32(reciprocals.size());
reciprocals.push_back(rcp);
}
}
}
}*/
#include "instruction_weights.hpp"
template<class Allocator, bool softAes>

@ -49,8 +49,6 @@ namespace randomx {
uint16_t shift;
};
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
template<class Allocator, bool softAes>
class InterpretedVm : public VmBase<Allocator, softAes> {
public:

Loading…
Cancel
Save