Compare commits

...

7 Commits

Author SHA1 Message Date
tevador 102f8acf90 bump benchmark version to 1.2.1
7 months ago
tevador 73d70fe82a
Merge pull request #286 from tevador/pr-zext
7 months ago
tevador e895d451a3 Avoid `zext.b`
7 months ago
tevador e5b9c02417
Merge pull request #285 from tevador/pr-120
7 months ago
tevador 06a7cc1c33 Update README and benchmark version
7 months ago
SChernykh 7db92b73f7
Merge pull request #284 from SChernykh/opt-rcp
7 months ago
SChernykh 5c49ab12a0 Optimized randomx_reciprocal
7 months ago

@ -37,7 +37,7 @@ RandomX is written in C++11 and builds a static library with a C API provided by
### Linux
Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
Build dependencies: `cmake` (minimum 3.5) and `gcc` (minimum version 4.8, but version 7+ is recommended).
To build optimized binaries for your machine, run:
```
@ -82,7 +82,7 @@ Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)|
Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)|
Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)|
Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower.
Note that RandomX currently includes a JIT compiler for x86-64, ARM64 and RISCV64. Other architectures have to use the portable interpreter, which is much slower.
### GPU performance
@ -129,6 +129,7 @@ The reference implementation has been validated on the following platforms:
* ARMv7+VFPv3 (32-bit, little-endian)
* ARMv8 (64-bit, little-endian)
* PPC64 (64-bit, big-endian)
* RISCV64 (64-bit, little-endian)
### Can FPGAs mine RandomX?

@ -445,7 +445,7 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;

@ -243,7 +243,7 @@ namespace randomx {
}
if (opcode < ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;

@ -686,7 +686,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
{
const uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (isZeroOrPowerOf2(divisor))
return;
@ -695,22 +695,11 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63;
const uint64_t q = N / divisor;
const uint64_t r = N % divisor;
#ifdef __GNUC__
const uint64_t shift = 64 - __builtin_clzll(divisor);
#else
uint64_t shift = 32;
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
literalPos -= sizeof(uint64_t);
const uint64_t randomx_reciprocal = (q << shift) + ((r << shift) / divisor);
memcpy(code + literalPos, &randomx_reciprocal, sizeof(randomx_reciprocal));
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
if (literal_id < 12)
{

@ -776,7 +776,7 @@ namespace randomx {
}
static void v1_IMUL_RCP(HANDLER_ARGS) {
uint64_t divisor = isn.getImm32();
const uint32_t divisor = isn.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
state.registerUsage[isn.dst] = i;
if (state.rcpCount < 4) {

@ -745,7 +745,7 @@ softaes_enc:
#endif
/* byte 0 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
addi x12, x13, -2048
#ifdef __riscv_zba
@ -757,7 +757,7 @@ softaes_enc:
lwu x14, -2048(x14)
/* byte 1 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -769,7 +769,7 @@ softaes_enc:
xor x8, x8, x14
/* byte 2 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -781,7 +781,7 @@ softaes_enc:
xor x11, x11, x15
/* byte 3 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -793,7 +793,7 @@ softaes_enc:
xor x10, x10, x14
/* byte 4 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -805,7 +805,7 @@ softaes_enc:
xor x9, x9, x15
/* byte 5 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -817,7 +817,7 @@ softaes_enc:
xor x9, x9, x14
/* byte 6 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -829,7 +829,7 @@ softaes_enc:
xor x8, x8, x15
/* byte 7 */
zext.b x15, x30
andi x15, x30, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else
@ -840,7 +840,7 @@ softaes_enc:
xor x11, x11, x14
/* byte 8 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -852,7 +852,7 @@ softaes_enc:
xor x10, x10, x15
/* byte 9 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -864,7 +864,7 @@ softaes_enc:
xor x10, x10, x14
/* byte 10 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -876,7 +876,7 @@ softaes_enc:
xor x9, x9, x15
/* byte 11 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -888,7 +888,7 @@ softaes_enc:
xor x8, x8, x14
/* byte 12 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -900,7 +900,7 @@ softaes_enc:
xor x11, x11, x15
/* byte 13 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -912,7 +912,7 @@ softaes_enc:
xor x11, x11, x14
/* byte 14 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -924,7 +924,7 @@ softaes_enc:
xor x10, x10, x15
/* byte 15 */
zext.b x15, x31
andi x15, x31, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else
@ -960,7 +960,7 @@ softaes_dec:
#endif
/* byte 0 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
addi x12, x13, -2048
#ifdef __riscv_zba
@ -972,7 +972,7 @@ softaes_dec:
lwu x14, -2048(x14)
/* byte 1 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -984,7 +984,7 @@ softaes_dec:
xor x8, x8, x14
/* byte 2 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -996,7 +996,7 @@ softaes_dec:
xor x9, x9, x15
/* byte 3 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -1008,7 +1008,7 @@ softaes_dec:
xor x10, x10, x14
/* byte 4 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -1020,7 +1020,7 @@ softaes_dec:
xor x11, x11, x15
/* byte 5 */
zext.b x15, x30
andi x15, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -1032,7 +1032,7 @@ softaes_dec:
xor x9, x9, x14
/* byte 6 */
zext.b x14, x30
andi x14, x30, 255
srli x30, x30, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -1044,7 +1044,7 @@ softaes_dec:
xor x10, x10, x15
/* byte 7 */
zext.b x15, x30
andi x15, x30, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else
@ -1055,7 +1055,7 @@ softaes_dec:
xor x11, x11, x14
/* byte 8 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -1067,7 +1067,7 @@ softaes_dec:
xor x8, x8, x15
/* byte 9 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -1079,7 +1079,7 @@ softaes_dec:
xor x10, x10, x14
/* byte 10 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -1091,7 +1091,7 @@ softaes_dec:
xor x11, x11, x15
/* byte 11 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x13
@ -1103,7 +1103,7 @@ softaes_dec:
xor x8, x8, x14
/* byte 12 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x12
@ -1115,7 +1115,7 @@ softaes_dec:
xor x9, x9, x15
/* byte 13 */
zext.b x15, x31
andi x15, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x15, x15, x12
@ -1127,7 +1127,7 @@ softaes_dec:
xor x11, x11, x14
/* byte 14 */
zext.b x14, x31
andi x14, x31, 255
srli x31, x31, 8
#ifdef __riscv_zba
sh2add x14, x14, x13
@ -1139,7 +1139,7 @@ softaes_dec:
xor x8, x8, x15
/* byte 15 */
zext.b x15, x31
andi x15, x31, 255
#ifdef __riscv_zba
sh2add x15, x15, x13
#else

@ -618,7 +618,7 @@ namespace randomx {
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I);

@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ret
*/
uint64_t randomx_reciprocal(uint64_t divisor) {
uint64_t randomx_reciprocal(uint32_t divisor) {
assert(divisor != 0);
const uint64_t p2exp63 = 1ULL << 63;
const uint64_t q = p2exp63 / divisor;
const uint64_t r = p2exp63 % divisor;
#ifdef __GNUC__
const uint32_t shift = 64 - __builtin_clzll(divisor);
#else
uint32_t shift = 32;
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
unsigned bsr = 0; //highest set bit in divisor
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
bsr++;
for (unsigned shift = 0; shift < bsr; shift++) {
if (remainder >= divisor - remainder) {
quotient = quotient * 2 + 1;
remainder = remainder * 2 - divisor;
}
else {
quotient = quotient * 2;
remainder = remainder * 2;
}
}
return quotient;
return (q << shift) + ((r << shift) / divisor);
}
#if !RANDOMX_HAVE_FAST_RECIPROCAL
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
uint64_t randomx_reciprocal_fast(uint32_t divisor) {
return randomx_reciprocal(divisor);
}

@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" {
#endif
uint64_t randomx_reciprocal(uint64_t);
uint64_t randomx_reciprocal_fast(uint64_t);
uint64_t randomx_reciprocal(uint32_t);
uint64_t randomx_reciprocal_fast(uint32_t);
#if defined(__cplusplus)
}

@ -180,7 +180,7 @@ int main(int argc, char** argv) {
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.12" << std::endl;
std::cout << "RandomX benchmark v1.2.1" << std::endl;
if (help) {
printUsage(argv[0]);

@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
}
if (opcode < randomx::ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!randomx::isZeroOrPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;

Loading…
Cancel
Save