ALU and FPU tests

random-access
tevador 6 years ago
parent ec2d378fce
commit f19995d4c5

@ -205,7 +205,7 @@ The shift/rotate instructions use just the bottom 6 bits of the `B` operand (`im
|22|FSUB|A - B| |22|FSUB|A - B|
|22|FMUL|A * B| |22|FMUL|A * B|
|8|FDIV|A / B| |8|FDIV|A / B|
|6|FSQRT|sqrt(A)| |6|FABSQRT|sqrt(A)|
|2|FROUND|A| |2|FROUND|A|
FPU instructions conform to the IEEE-754 specification, so they must give correctly rounded results. Initial rounding mode is RN (Round to Nearest). Denormal values may not be produced by any operation. FPU instructions conform to the IEEE-754 specification, so they must give correctly rounded results. Initial rounding mode is RN (Round to Nearest). Denormal values may not be produced by any operation.
@ -214,8 +214,8 @@ FPU instructions conform to the IEEE-754 specification, so they must give correc
Operands loaded from memory are treated as signed 64-bit integers and converted to double precision floating point format. Operands loaded from floating point registers are used directly. Operands loaded from memory are treated as signed 64-bit integers and converted to double precision floating point format. Operands loaded from floating point registers are used directly.
##### FSQRT ##### FABSQRT
The sign bit of the FSQRT operand is always cleared first, so only non-negative values are used. The sign bit of the FABSQRT operand is always cleared first, so only non-negative values are used.
*In x86, the `SQRTSD` instruction must be used. The legacy `FSQRT` instruction doesn't produce correctly rounded results in all cases.* *In x86, the `SQRTSD` instruction must be used. The legacy `FSQRT` instruction doesn't produce correctly rounded results in all cases.*
@ -225,11 +225,11 @@ The FROUND instruction changes the rounding mode for all subsequent FPU operatio
|A[1:0]|rounding mode| |A[1:0]|rounding mode|
|-------|------------| |-------|------------|
|00|Round to Nearest (RN) mode| |00|Round to Nearest (RN) mode|
|01|Round towards Plus Infinity (RP) mode |01|Round towards Minus Infinity (RM) mode
|10|Round towards Minus Infinity (RM) mode |10|Round towards Plus Infinity (RP) mode
|11|Round towards Zero (RZ) mode |11|Round towards Zero (RZ) mode
*The two-bit flag value exactly corresponds to bits 13-14 of the x86 `MXCSR` register and bits 22-23 of the ARM `FPSCR` register.* *The two-bit flag value exactly corresponds to bits 13-14 of the x86 `MXCSR` register and bits 23 and 22 (reversed) of the ARM `FPSCR` register.*
### Control flow instructions ### Control flow instructions
The following 2 control flow instructions are supported: The following 2 control flow instructions are supported:

@ -0,0 +1,69 @@
//RandomX ALU + FPU test
//https://github.com/tevador/RandomX
//License: GPL v3
#include <cstdint>
namespace RandomX {
constexpr int RoundToNearest = 0;
constexpr int RoundDown = 1;
constexpr int RoundUp = 2;
constexpr int RoundToZero = 3;
typedef union {
double f64;
int64_t i64;
uint64_t u64;
int32_t i32;
uint32_t u32;
} convertible_t;
extern "C" {
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c);
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c);
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c);
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c);
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c);
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c);
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c);
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c);
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c);
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c);
void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c);
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c);
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c);
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c);
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c);
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c);
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c);
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c);
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c);
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c);
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c);
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
void FPINIT();
void FADD_64(convertible_t& a, double b, convertible_t& c);
void FSUB_64(convertible_t& a, double b, convertible_t& c);
void FMUL_64(convertible_t& a, double b, convertible_t& c);
void FDIV_64(convertible_t& a, double b, convertible_t& c);
void FABSQRT(convertible_t& a, convertible_t& b, convertible_t& c);
void FROUND(convertible_t& a, convertible_t& b, convertible_t& c);
inline void FADD(convertible_t& a, convertible_t& b, convertible_t& c) {
FADD_64(a, (double)b.i64, c);
}
inline void FSUB(convertible_t& a, convertible_t& b, convertible_t& c) {
FSUB_64(a, (double)b.i64, c);
}
inline void FMUL(convertible_t& a, convertible_t& b, convertible_t& c) {
FMUL_64(a, (double)b.i64, c);
}
inline void FDIV(convertible_t& a, convertible_t& b, convertible_t& c) {
FDIV_64(a, (double)b.i64, c);
}
}
}

@ -0,0 +1,248 @@
//RandomX ALU + FPU test
//https://github.com/tevador/RandomX
//License: GPL v3
#include "Instructions.h"
#include <cfenv>
#include <cmath>
#if defined(__SIZEOF_INT128__)
typedef unsigned __int128 uint128_t;
typedef __int128 int128_t;
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
return ((uint128_t)a * b) >> 64;
}
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
return ((int128_t)a * b) >> 64;
}
#define umulhi64 __umulhi64
#define imulhi64 __imulhi64
#endif
#if defined(_MSC_VER)
#include <intrin.h>
#include <stdlib.h>
#define ror64 _rotr64
#define rol64 _rotl64
#ifdef __MACHINEARM64_X64
#define umulhi64 __umulh
#endif
#ifdef __MACHINEX64
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
int64_t hi;
_mul128(a, b, &hi);
return hi;
}
#define imulhi64 __imulhi64
#endif
#ifdef __MACHINEX86_X64
#define sar64 __ll_rshift
#endif
#endif
#ifndef ror64
static inline uint64_t __ror64(uint64_t a, int b) {
return (a >> b) | (a << (64 - b));
}
#define ror64 __ror64
#endif
#ifndef rol64
static inline uint64_t __rol64(uint64_t a, int b) {
return (a << b) | (a >> (64 - b));
}
#define rol64 __rol64
#endif
#ifndef sar64
#include <type_traits>
constexpr int64_t builtintShr64(int64_t value, int shift) noexcept {
return value >> shift;
}
struct usesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> {
};
static inline int64_t __sar64(int64_t a, int b) {
return usesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b);
}
#define sar64 __sar64
#endif
#ifndef umulhi64
#define LO(x) ((x)&0xffffffff)
#define HI(x) ((x)>>32)
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
uint64_t ah = HI(a), al = LO(a);
uint64_t bh = HI(b), bl = LO(b);
uint64_t x00 = al * bl;
uint64_t x01 = al * bh;
uint64_t x10 = ah * bl;
uint64_t x11 = ah * bh;
uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
uint64_t m3 = HI(x11) + HI(m2);
return (m3 << 32) + LO(m2);
}
#define umulhi64 __umulhi64
#endif
#ifndef imulhi64
static inline int64_t __imulhi64(int64_t a, int64_t b) {
int64_t hi = umulhi64(a, b);
if (a < 0LL) hi -= b;
if (b < 0LL) hi -= a;
return hi;
}
#define imulhi64 __imulhi64
#endif
static double FlushDenormal(double x) {
if (std::fpclassify(x) == FP_SUBNORMAL) {
return 0;
}
return x;
}
#define FTZ(x) FlushDenormal(x)
namespace RandomX {
extern "C" {
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 + b.u64;
}
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u32 + b.u32;
}
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 - b.u64;
}
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u32 - b.u32;
}
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 * b.u64;
}
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = umulhi64(a.u64, b.u64);
}
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = (uint64_t)a.u32 * b.u32;
}
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.i64 = (int64_t)a.i32 * b.i32;
}
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.i64 = imulhi64(a.i64, b.i64);
}
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
}
void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
if (a.i64 == INT64_MIN && b.i64 == -1)
c.i64 = INT64_MIN;
else
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
}
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 & b.u64;
}
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u32 & b.u32;
}
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 | b.u64;
}
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u32 | b.u32;
}
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 ^ b.u64;
}
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u32 ^ b.u32;
}
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 << (b.u64 & 63);
}
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = a.u64 >> (b.u64 & 63);
}
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = sar64(a.i64, b.u64 & 63);
}
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = rol64(a.u64, (b.u64 & 63));
}
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
c.u64 = ror64(a.u64, (b.u64 & 63));
}
void FPINIT() {
fesetround(FE_TONEAREST);
}
void FADD_64(convertible_t& a, double b, convertible_t& c) {
c.f64 = FTZ((double)a.i64 + b);
}
void FSUB_64(convertible_t& a, double b, convertible_t& c) {
c.f64 = FTZ((double)a.i64 - b);
}
void FMUL_64(convertible_t& a, double b, convertible_t& c) {
c.f64 = FTZ((double)a.i64 * b);
}
void FDIV_64(convertible_t& a, double b, convertible_t& c) {
c.f64 = FTZ((double)a.i64 / b);
}
void FABSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {
double d = fabs((double)a.i64);
c.f64 = FTZ(sqrt(d));
}
void FROUND(convertible_t& a, convertible_t& b, convertible_t& c) {
c.f64 = (double)a.i64;
switch (a.u64 & 3) {
case RoundDown:
fesetround(FE_DOWNWARD);
break;
case RoundUp:
fesetround(FE_UPWARD);
break;
case RoundToZero:
fesetround(FE_TOWARDZERO);
break;
default:
fesetround(FE_TONEAREST);
break;
}
}
}
}

@ -0,0 +1,276 @@
;RandomX ALU + FPU test
;https://github.com/tevador/RandomX
;License: GPL v3
PUBLIC ADD_64
PUBLIC ADD_32
PUBLIC SUB_64
PUBLIC SUB_32
PUBLIC MUL_64
PUBLIC MULH_64
PUBLIC MUL_32
PUBLIC IMUL_32
PUBLIC IMULH_64
PUBLIC DIV_64
PUBLIC IDIV_64
PUBLIC AND_64
PUBLIC AND_32
PUBLIC OR_64
PUBLIC OR_32
PUBLIC XOR_64
PUBLIC XOR_32
PUBLIC SHL_64
PUBLIC SHR_64
PUBLIC SAR_64
PUBLIC ROL_64
PUBLIC ROR_64
PUBLIC FPINIT
PUBLIC FADD_64
PUBLIC FSUB_64
PUBLIC FMUL_64
PUBLIC FDIV_64
PUBLIC FABSQRT
PUBLIC FROUND
CONST SEGMENT
__XMMABS DB 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 07fH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 07fH
CONST ENDS
.code
ADD_64 PROC
mov rax, QWORD PTR [rcx]
add rax, QWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
ADD_64 ENDP
ADD_32 PROC
mov eax, DWORD PTR [rcx]
add eax, DWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
ADD_32 ENDP
SUB_64 PROC
mov rax, QWORD PTR [rcx]
sub rax, QWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
SUB_64 ENDP
SUB_32 PROC
mov eax, DWORD PTR [rcx]
sub eax, DWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
SUB_32 ENDP
MUL_64 PROC
mov rax, QWORD PTR [rcx]
imul rax, QWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
MUL_64 ENDP
MULH_64 PROC
mov rax, QWORD PTR [rdx]
mul QWORD PTR [rcx]
mov QWORD PTR [r8], rdx
ret 0
MULH_64 ENDP
MUL_32 PROC
mov r9d, DWORD PTR [rcx]
mov eax, DWORD PTR [rdx]
imul r9, rax
mov QWORD PTR [r8], r9
ret 0
MUL_32 ENDP
IMUL_32 PROC
movsxd r9, DWORD PTR [rcx]
movsxd rax, DWORD PTR [rdx]
imul r9, rax
mov QWORD PTR [r8], r9
ret 0
IMUL_32 ENDP
IMULH_64 PROC
mov rax, QWORD PTR [rdx]
imul QWORD PTR [rcx]
mov QWORD PTR [r8], rdx
ret 0
IMULH_64 ENDP
DIV_64 PROC
mov r9d, DWORD PTR [rdx]
mov eax, 1
test r9d, r9d
cmovne eax, r9d
xor edx, edx
mov r9d, eax
mov rax, QWORD PTR [rcx]
div r9
mov QWORD PTR [r8], rax
ret 0
DIV_64 ENDP
IDIV_64 PROC
mov rax, QWORD PTR [rcx]
mov rcx, -9223372036854775808
cmp rax, rcx
jne SHORT SAFE_IDIV_64
cmp QWORD PTR [rdx], -1
jne SHORT SAFE_IDIV_64
mov QWORD PTR [r8], rcx
ret 0
SAFE_IDIV_64:
mov ecx, DWORD PTR [rdx]
test ecx, ecx
mov edx, 1
cmovne edx, ecx
movsxd rcx, edx
cqo
idiv rcx
mov QWORD PTR [r8], rax
ret 0
IDIV_64 ENDP
AND_64 PROC
mov rax, QWORD PTR [rcx]
and rax, QWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
AND_64 ENDP
AND_32 PROC
mov eax, DWORD PTR [rcx]
and eax, DWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
AND_32 ENDP
OR_64 PROC
mov rax, QWORD PTR [rcx]
or rax, QWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
OR_64 ENDP
OR_32 PROC
mov eax, DWORD PTR [rcx]
or eax, DWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
OR_32 ENDP
XOR_64 PROC
mov rax, QWORD PTR [rcx]
xor rax, QWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
XOR_64 ENDP
XOR_32 PROC
mov eax, DWORD PTR [rcx]
xor eax, DWORD PTR [rdx]
mov QWORD PTR [r8], rax
ret 0
XOR_32 ENDP
SHL_64 PROC
mov rax, QWORD PTR [rcx]
mov rcx, QWORD PTR [rdx]
shl rax, cl
mov QWORD PTR [r8], rax
ret 0
SHL_64 ENDP
SHR_64 PROC
mov rax, QWORD PTR [rcx]
mov rcx, QWORD PTR [rdx]
shr rax, cl
mov QWORD PTR [r8], rax
ret 0
SHR_64 ENDP
SAR_64 PROC
mov rax, QWORD PTR [rcx]
mov rcx, QWORD PTR [rdx]
sar rax, cl
mov QWORD PTR [r8], rax
ret 0
SAR_64 ENDP
ROL_64 PROC
mov rax, QWORD PTR [rcx]
mov rcx, QWORD PTR [rdx]
rol rax, cl
mov QWORD PTR [r8], rax
ret 0
ROL_64 ENDP
ROR_64 PROC
mov rax, QWORD PTR [rcx]
mov rcx, QWORD PTR [rdx]
ror rax, cl
mov QWORD PTR [r8], rax
ret 0
ROR_64 ENDP
FPINIT PROC
mov DWORD PTR [rsp+8], 40896
ldmxcsr DWORD PTR [rsp+8]
ret 0
FPINIT ENDP
FADD_64 PROC
cvtsi2sd xmm0, QWORD PTR [rcx]
addsd xmm0, xmm1
movsd QWORD PTR [r8], xmm0
ret 0
FADD_64 ENDP
FSUB_64 PROC
cvtsi2sd xmm0, QWORD PTR [rcx]
subsd xmm0, xmm1
movsd QWORD PTR [r8], xmm0
ret 0
FSUB_64 ENDP
FMUL_64 PROC
cvtsi2sd xmm0, QWORD PTR [rcx]
mulsd xmm0, xmm1
movsd QWORD PTR [r8], xmm0
ret 0
FMUL_64 ENDP
FDIV_64 PROC
cvtsi2sd xmm0, QWORD PTR [rcx]
divsd xmm0, xmm1
movsd QWORD PTR [r8], xmm0
ret 0
FDIV_64 ENDP
FABSQRT PROC
cvtsi2sd xmm0, QWORD PTR [rcx]
andps xmm0, XMMWORD PTR __XMMABS
sqrtsd xmm1, xmm0
movsd QWORD PTR [r8], xmm1
ret 0
FABSQRT ENDP
FROUND PROC
cvtsi2sd xmm0, QWORD PTR [rcx]
movsd QWORD PTR [r8], xmm0
mov rax, QWORD PTR [rcx]
shl rax, 13
and eax, 24576
or eax, 40896
mov DWORD PTR [rsp+8], eax
ldmxcsr DWORD PTR [rsp+8]
ret 0
FROUND ENDP
END

@ -0,0 +1,283 @@
//RandomX ALU + FPU test
//https://github.com/tevador/RandomX
//License: GPL v3
#include <iostream>
#include <iomanip>
#include <limits>
#include "Instructions.h"
using namespace RandomX;
typedef void(*VmOperation)(convertible_t&, convertible_t&, convertible_t&);
double rxRound(uint32_t mode, int64_t x, int64_t y, VmOperation op) {
convertible_t a, b, c;
a.u64 = mode;
FROUND(a, b, c);
a.i64 = x;
b.i64 = y;
op(a, b, c);
return c.f64;
}
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#define RX_EXECUTE_U64(va, vb, INST) do { \
a.u64 = va; \
b.u64 = vb; \
INST(a, b, c); \
} while(false)
#define RX_EXECUTE_I64(va, vb, INST) do { \
a.i64 = va; \
b.i64 = vb; \
INST(a, b, c); \
} while(false)
TEST_CASE("Integer addition (64-bit)", "[ADD_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_64);
REQUIRE(c.u64 == 0x100000000);
RX_EXECUTE_U64(0x8000000000000000, 0x8000000000000000, ADD_64);
REQUIRE(c.u64 == 0x0);
}
TEST_CASE("Integer addition (32-bit)", "[ADD_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_32);
REQUIRE(c.u64 == 0);
RX_EXECUTE_U64(0xFF00000000000001, 0x0000000100000001, ADD_32);
REQUIRE(c.u64 == 2);
}
TEST_CASE("Integer subtraction (64-bit)", "[SUB_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_64);
REQUIRE(c.u64 == 0xFFFFFFFF00000002);
}
TEST_CASE("Integer subtraction (32-bit)", "[SUB_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_32);
REQUIRE(c.u64 == 2);
}
TEST_CASE("Unsigned multiplication (64-bit, low half)", "[MUL_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_64);
REQUIRE(c.u64 == 0x28723424A9108E51);
}
TEST_CASE("Unsigned multiplication (64-bit, high half)", "[MULH_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MULH_64);
REQUIRE(c.u64 == 0xB4676D31D2B34883);
}
TEST_CASE("Unsigned multiplication (32-bit x 32-bit -> 64-bit)", "[MUL_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_32);
REQUIRE(c.u64 == 0xB001AA5FA9108E51);
}
TEST_CASE("Signed multiplication (32-bit x 32-bit -> 64-bit)", "[IMUL_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMUL_32);
REQUIRE(c.u64 == 0x03EBA0C1A9108E51);
}
TEST_CASE("Signed multiplication (64-bit, high half)", "[IMULH_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMULH_64);
REQUIRE(c.u64 == 0x02D93EF1269D3EE5);
}
TEST_CASE("Unsigned division (64-bit / 32-bit -> 32-bit)", "[DIV_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(8774217225983458895, 3014068202, DIV_64);
REQUIRE(c.u64 == 2911087818);
RX_EXECUTE_U64(8774217225983458895, 0, DIV_64);
REQUIRE(c.u64 == 8774217225983458895);
RX_EXECUTE_U64(3014068202, 8774217225983458895, DIV_64);
REQUIRE(c.u64 == 2);
}
TEST_CASE("Signed division (64-bit / 32-bit -> 32-bit)", "[IDIV_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(8774217225983458895, 3014068202, IDIV_64);
REQUIRE(c.u64 == 0xFFFFFFFE67B4994E);
RX_EXECUTE_U64(8774217225983458895, 0, IDIV_64);
REQUIRE(c.u64 == 8774217225983458895);
RX_EXECUTE_U64(0x8000000000000000, 0xFFFFFFFFFFFFFFFF, IDIV_64);
REQUIRE(c.u64 == 0x8000000000000000);
RX_EXECUTE_U64(0xFFFFFFFFB3A707EA, 8774217225983458895, IDIV_64);
REQUIRE(c.u64 == 0xFFFFFFFFFFFFFFFF);
}
TEST_CASE("Bitwise AND (64-bit)", "[AND_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_64);
REQUIRE(c.u64 == 0x8888888888888888);
}
TEST_CASE("Bitwise AND (32-bit)", "[AND_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_32);
REQUIRE(c.u64 == 0x88888888);
}
TEST_CASE("Bitwise OR (64-bit)", "[OR_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_64);
REQUIRE(c.u64 == 0xEEEEEEEEEEEEEEEE);
}
TEST_CASE("Bitwise OR (32-bit)", "[OR_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_32);
REQUIRE(c.u64 == 0xEEEEEEEE);
}
TEST_CASE("Bitwise XOR (64-bit)", "[XOR_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_64);
REQUIRE(c.u64 == 0x2222222222222222);
}
TEST_CASE("Bitwise XOR (32-bit)", "[XOR_32]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_32);
REQUIRE(c.u64 == 0x22222222);
}
TEST_CASE("Logical left shift (64-bit)", "[SHL_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x3, 52, SHL_64);
REQUIRE(c.u64 == 0x30000000000000);
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHL_64);
REQUIRE(c.u64 == 6978065200108797952);
RX_EXECUTE_U64(0x8000000000000000, 1, SHL_64);
REQUIRE(c.u64 == 0);
}
TEST_CASE("Logical right shift (64-bit)", "[SHR_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x3, 52, SHR_64);
REQUIRE(c.u64 == 0);
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHR_64);
REQUIRE(c.u64 == 110985711);
RX_EXECUTE_U64(0x8000000000000000, 1, SHR_64);
REQUIRE(c.u64 == 0x4000000000000000);
}
TEST_CASE("Arithmetic right shift (64-bit)", "[SAR_64]") {
convertible_t a, b, c;
RX_EXECUTE_I64(-9, 2, SAR_64);
REQUIRE(c.i64 == -3);
RX_EXECUTE_I64(INT64_MIN, 63, SAR_64);
REQUIRE(c.i64 == -1);
RX_EXECUTE_I64(INT64_MAX, 163768499474606398, SAR_64);
REQUIRE(c.i64 == 1);
}
TEST_CASE("Circular left shift (64-bit)", "[ROL_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x3, 52, ROL_64);
REQUIRE(c.u64 == 0x30000000000000);
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROL_64);
REQUIRE(c.u64 == 6978065200552740799);
RX_EXECUTE_U64(0x8000000000000000, 1, ROL_64);
REQUIRE(c.u64 == 1);
}
TEST_CASE("Circular right shift (64-bit)", "[ROR_64]") {
convertible_t a, b, c;
RX_EXECUTE_U64(0x3, 52, ROR_64);
REQUIRE(c.u64 == 12288);
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROR_64);
REQUIRE(c.u64 == 0xD835C455069D81EF);
RX_EXECUTE_U64(0x8000000000000000, 1, ROR_64);
REQUIRE(c.u64 == 0x4000000000000000);
}
TEST_CASE("Denormal numbers are flushed to zero", "[FTZ]") {
FPINIT();
convertible_t a, c;
a.i64 = 1;
FDIV_64(a, std::numeric_limits<double>::max(), c);
REQUIRE(c.f64 == 0.0);
}
TEST_CASE("IEEE-754 compliance", "[FPU]") {
FPINIT();
convertible_t a, c;
a.i64 = 1;
FDIV_64(a, 0, c);
REQUIRE(c.f64 == std::numeric_limits<double>::infinity());
a.i64 = -1;
FDIV_64(a, 0, c);
REQUIRE(c.f64 == -std::numeric_limits<double>::infinity());
REQUIRE(rxRound(RoundToNearest, 33073499373184121, -37713516328519941, &FADD) == -4640016955335824.0);
REQUIRE(rxRound(RoundDown, 33073499373184121, -37713516328519941, &FADD) == -4640016955335824.0);
REQUIRE(rxRound(RoundUp, 33073499373184121, -37713516328519941, &FADD) == -4640016955335812.0);
REQUIRE(rxRound(RoundToZero, 33073499373184121, -37713516328519941, &FADD) == -4640016955335816.0);
REQUIRE(rxRound(RoundToNearest, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107858e+18);
REQUIRE(rxRound(RoundDown, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107868e+18);
REQUIRE(rxRound(RoundUp, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107848e+18);
REQUIRE(rxRound(RoundToZero, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107848e+18);
REQUIRE(rxRound(RoundToNearest, 1, -10, &FDIV) == -0.10000000000000001);
REQUIRE(rxRound(RoundDown, 1, -10, &FDIV) == -0.10000000000000001);
REQUIRE(rxRound(RoundUp, 1, -10, &FDIV) == -0.099999999999999992);
REQUIRE(rxRound(RoundToZero, 1, -10, &FDIV) == -0.099999999999999992);
REQUIRE(rxRound(RoundToNearest, -2, 0, &FABSQRT) == 1.4142135623730951);
REQUIRE(rxRound(RoundDown, -2, 0, &FABSQRT) == 1.4142135623730949);
REQUIRE(rxRound(RoundUp, -2, 0, &FABSQRT) == 1.4142135623730951);
REQUIRE(rxRound(RoundToZero, -2, 0, &FABSQRT) == 1.4142135623730949);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,10 @@
CXXFLAGS=-Wall -std=c++17 -O0
TestAluFpu: TestAluFpu.o InstructionsPortable.o
$(CXX) TestAluFpu.o InstructionsPortable.o -o $@
TestAluFpu.o: TestAluFpu.cpp
InstructionsPortable.o: InstructionsPortable.cpp
clean:
rm -f TestAluFpu TestAluFpu.o InstructionsPortable.o
Loading…
Cancel
Save