Use cmake for building (#90)

* Use cmake for building
* CMakeLists.txt modified to support full standalone build
* added support for ARCH=native builds
* added PowerPC flags
* added ARMv8 flags
* check for x86 AES-NI at compile time
pr-arm-intrin
tevador 5 years ago committed by GitHub
parent 4a4b06e44b
commit b91882be42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

3
.gitignore vendored

@ -5,4 +5,5 @@ obj/
.vs
x64/
Release/
Debug/
Debug/
build/

@ -51,31 +51,126 @@ src/virtual_machine.cpp
src/vm_compiled_light.cpp
src/blake2/blake2b.c)
if (NOT ARCH_ID)
set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR})
if(NOT ARCH_ID)
# allow cross compiling
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "")
set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCH_ID)
endif()
if(NOT ARM_ID)
set(ARM_ID "${ARCH_ID}")
endif()
if(NOT ARCH)
set(ARCH "default")
endif()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
message(STATUS "Setting default build type: ${CMAKE_BUILD_TYPE}")
endif()
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
function(add_flag flag)
string(REPLACE "-" "_" supported_cxx ${flag}_cxx)
check_cxx_compiler_flag(${flag} ${supported_cxx})
if(${${supported_cxx}})
message(STATUS "Setting CXX flag ${flag}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE)
endif()
string(REPLACE "-" "_" supported_c ${flag}_c)
check_c_compiler_flag(${flag} ${supported_c})
if(${${supported_c}})
message(STATUS "Setting C flag ${flag}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE)
endif()
endfunction()
# x86-64
if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
list(APPEND randomx_sources
src/jit_compiler_x86_static.S
src/jit_compiler_x86.cpp)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
if(ARCH STREQUAL "native")
add_flag("-march=native")
else()
# default build has hardware AES enabled (software AES can be selected at runtime)
add_flag("-maes")
endif()
endif()
# PowerPC
if (ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
if(ARCH STREQUAL "native")
add_flag("-mcpu=native")
endif()
# PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build
endif()
# ARMv8
if (ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a")
if(ARCH STREQUAL "native")
add_flag("-march=native")
else()
# default build has hardware AES enabled (software AES can be selected at runtime)
add_flag("-march=armv8-a+crypto")
endif()
endif()
set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path")
add_library(randomx
${randomx_sources})
target_link_libraries(randomx
PRIVATE
${CMAKE_THREAD_LIBS_INIT})
set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET randomx PROPERTY CXX_STANDARD 11)
set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON)
add_executable(randomx-tests
src/tests/tests.cpp)
target_link_libraries(randomx-tests
PRIVATE randomx)
set_property(TARGET randomx-tests PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET randomx-tests PROPERTY CXX_STANDARD 11)
add_executable(randomx-codegen
src/tests/code-generator.cpp)
target_link_libraries(randomx-codegen
PRIVATE randomx)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
set_property(TARGET randomx-codegen PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET randomx-codegen PROPERTY CXX_STANDARD 11)
if (NOT Threads_FOUND AND UNIX AND NOT APPLE)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
endif()
add_executable(randomx-benchmark
src/tests/benchmark.cpp
src/tests/affinity.cpp)
target_link_libraries(randomx-benchmark
PRIVATE randomx
PRIVATE ${CMAKE_THREAD_LIBS_INIT})
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
#include <cstdint>
#include <atomic>
int main() {
std::atomic<uint64_t> a;
a.is_lock_free();
}" HAVE_CXX_ATOMICS)
if(NOT HAVE_CXX_ATOMICS)
target_link_libraries(randomx-benchmark
PRIVATE "atomic")
endif()
set_property(TARGET randomx-benchmark PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET randomx-benchmark PROPERTY CXX_STANDARD 11)

@ -20,23 +20,28 @@ Design description and analysis is available in [design.md](doc/design.md).
## Build
RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `benchmark` executable for testing.
RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `randomx-benchmark` and `randomx-tests` executables for testing.
### Linux
Build dependencies: `make` and `gcc` (minimum version 4.8, but version 7+ is recommended).
Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
Build using the provided makefile.
To build optimized binaries for your machine, run:
```
git clone https://github.com/tevador/RandomX.git
cd RandomX
mkdir build && cd build
cmake -DARCH=native ..
make
```
### Windows
Build dependencies: Visual Studio 2017.
A solution file is provided.
On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio 2017 (solution file is provided).
### Precompiled binaries
Precompiled `benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases).
Precompiled `randomx-benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases).
## Proof of work

@ -1,200 +0,0 @@
#CXX=g++-8
#CC=gcc-8
AR=gcc-ar
PLATFORM=$(shell uname -m)
OS=$(shell uname -s)
CXXFLAGS=-std=c++11
CCFLAGS=-std=c99
ARFLAGS=rcs
BINDIR=bin
SRCDIR=src
TESTDIR=src/tests
OBJDIR=obj
LDFLAGS=-lpthread
RXA=$(BINDIR)/librandomx.a
BINARIES=$(RXA) $(BINDIR)/randomx-benchmark $(BINDIR)/randomx-generator $(BINDIR)/randomx-tests
RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o bytecode_machine.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o)
ifeq ($(PLATFORM),amd64)
RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o)
CXXFLAGS += -maes
endif
ifeq ($(PLATFORM),x86_64)
RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o)
CXXFLAGS += -maes
endif
ifeq ($(OS),Darwin)
AR=ar
endif
ifeq ($(PLATFORM),ppc64)
CXXFLAGS += -mcpu=native
endif
ifeq ($(PLATFORM),ppc64le)
CXXFLAGS += -mcpu=native
endif
release: CXXFLAGS += -O3 -flto
release: CCFLAGS += -O3 -flto
release: LDFLAGS += -flto
release: $(BINARIES)
native: CXXFLAGS += -march=native -O3 -flto
native: CCFLAGS += -march=native -O3 -flto
native: $(BINARIES)
nolto: CXXFLAGS += -O3
nolto: CCFLAGS += -O3
nolto: $(BINARIES)
debug: CXXFLAGS += -g
debug: CCFLAGS += -g
debug: LDFLAGS += -g
debug: $(BINARIES)
profile: CXXFLAGS += -pg
profile: CCFLAGS += -pg
profile: LDFLAGS += -pg
profile: $(BINDIR)/randomx-benchmark
test: CXXFLAGS += -O0
$(RXA): $(RXOBJS) | $(BINDIR)
$(AR) $(ARFLAGS) $@ $(RXOBJS)
$(OBJDIR):
mkdir $(OBJDIR)
$(BINDIR):
mkdir $(BINDIR)
$(OBJDIR)/affinity.o: $(TESTDIR)/affinity.cpp $(TESTDIR)/affinity.hpp
$(CXX) $(CXXFLAGS) -c $< -o $@
$(OBJDIR)/benchmark.o: $(TESTDIR)/benchmark.cpp $(TESTDIR)/stopwatch.hpp \
$(TESTDIR)/utility.hpp $(SRCDIR)/randomx.h $(SRCDIR)/blake2/endian.h $(TESTDIR)/affinity.hpp
$(CXX) $(CXXFLAGS) -pthread -c $< -o $@
$(BINDIR)/randomx-benchmark: $(OBJDIR)/benchmark.o $(OBJDIR)/affinity.o $(RXA)
$(CXX) $(LDFLAGS) -pthread $< $(OBJDIR)/affinity.o $(RXA) -o $@
$(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/superscalar.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp \
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/aes_hash.hpp \
$(SRCDIR)/blake2/blake2.h $(SRCDIR)/program.hpp
$(CXX) $(CXXFLAGS) -c $< -o $@
$(BINDIR)/randomx-generator: $(OBJDIR)/code-generator.o $(RXA)
$(CXX) $(LDFLAGS) $< $(RXA) -o $@
$(OBJDIR)/tests.o: $(TESTDIR)/tests.cpp $(TESTDIR)/utility.hpp \
$(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h \
$(SRCDIR)/randomx.h $(SRCDIR)/intrin_portable.h \
$(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \
$(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp \
$(SRCDIR)/allocator.hpp $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/superscalar.hpp \
$(SRCDIR)/reciprocal.h $(SRCDIR)/jit_compiler.hpp \
$(SRCDIR)/jit_compiler_x86.hpp
$(CXX) $(CXXFLAGS) -c $< -o $@
$(BINDIR)/randomx-tests: $(OBJDIR)/tests.o $(RXA)
$(CXX) $(LDFLAGS) $< $(RXA) -o $@
$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR)
$(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
$(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \
$(SRCDIR)/blake2/blake2.h
$(OBJDIR)/bytecode_machine.o: $(SRCDIR)/bytecode_machine.cpp $(SRCDIR)/bytecode_machine.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \
$(SRCDIR)/reciprocal.h
$(OBJDIR)/blake2b.o: $(SRCDIR)/blake2/blake2b.c $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h
$(CC) $(CCFLAGS) -c $< -o $@
$(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
$(SRCDIR)/intrin_portable.h
$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/blake2_generator.hpp \
$(SRCDIR)/program.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/virtual_memory.hpp \
$(SRCDIR)/instruction_weights.hpp
$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDIR)/configuration.h \
$(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \
$(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \
$(SRCDIR)/asm/program_read_dataset_sshash_init.inc \
$(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \
$(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \
$(SRCDIR)/asm/program_epilogue_store.inc $(SRCDIR)/asm/program_sshash_load.inc \
$(SRCDIR)/asm/program_sshash_prefetch.inc $(SRCDIR)/asm/program_sshash_constants.inc \
$(SRCDIR)/asm/randomx_reciprocal.inc
$(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h
$(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp
$(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \
$(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/virtual_machine.hpp \
$(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/instruction_weights.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h
$(OBJDIR)/allocator.o: $(SRCDIR)/allocator.cpp $(SRCDIR)/allocator.hpp $(SRCDIR)/intrin_portable.h \
$(SRCDIR)/virtual_memory.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h
$(OBJDIR)/assembly_generator_x86.o: $(SRCDIR)/assembly_generator_x86.cpp \
$(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/reciprocal.h $(SRCDIR)/program.hpp \
$(SRCDIR)/instruction.hpp $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp \
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/instruction_weights.hpp
$(OBJDIR)/instruction.o: $(SRCDIR)/instruction.cpp $(SRCDIR)/instruction.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/common.hpp $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/instruction_weights.hpp
$(OBJDIR)/randomx.o: $(SRCDIR)/randomx.cpp $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp $(SRCDIR)/common.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/superscalar_program.hpp \
$(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \
$(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_compiled.hpp \
$(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h $(SRCDIR)/bytecode_machine.hpp
$(OBJDIR)/superscalar.o: $(SRCDIR)/superscalar.cpp $(SRCDIR)/configuration.h $(SRCDIR)/program.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/randomx.h $(SRCDIR)/instruction.hpp \
$(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/blake2_generator.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/reciprocal.h
$(OBJDIR)/vm_compiled.o: $(SRCDIR)/vm_compiled.cpp $(SRCDIR)/vm_compiled.hpp \
$(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \
$(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp
$(OBJDIR)/vm_interpreted_light.o: $(SRCDIR)/vm_interpreted_light.cpp \
$(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/common.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/bytecode_machine.hpp
$(OBJDIR)/argon2_core.o: $(SRCDIR)/argon2_core.c $(SRCDIR)/argon2_core.h $(SRCDIR)/argon2.h \
$(SRCDIR)/blake2/blake2.h $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h
$(OBJDIR)/blake2_generator.o: $(SRCDIR)/blake2_generator.cpp $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2_generator.hpp
$(OBJDIR)/instructions_portable.o: $(SRCDIR)/instructions_portable.cpp $(SRCDIR)/common.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/intrin_portable.h
$(OBJDIR)/reciprocal.o: $(SRCDIR)/reciprocal.c $(SRCDIR)/reciprocal.h
$(OBJDIR)/virtual_machine.o: $(SRCDIR)/virtual_machine.cpp $(SRCDIR)/virtual_machine.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/aes_hash.hpp $(SRCDIR)/blake2/blake2.h \
$(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp
$(OBJDIR)/vm_compiled_light.o: $(SRCDIR)/vm_compiled_light.cpp $(SRCDIR)/vm_compiled_light.hpp \
$(SRCDIR)/vm_compiled.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp \
$(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp \
$(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \
$(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp
$(OBJDIR)/%.o: $(SRCDIR)/%.c
$(CC) $(CCFLAGS) -c $< -o $@
$(OBJDIR)/%.o: $(SRCDIR)/%.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@
$(OBJDIR)/%.o: $(SRCDIR)/%.S
$(CXX) -x assembler-with-cpp -c $< -o $@
clean:
rm -f $(BINARIES) $(OBJDIR)/*.o

@ -53,6 +53,11 @@ constexpr int RoundToZero = 3;
#define __SSE2__ 1
#endif
//MSVC doesn't define __AES__
#if defined(_MSC_VER) && defined(__SSE2__)
#define __AES__
#endif
//the library "sqrt" function provided by MSVC for x86 targets doesn't give
//the correct results, so we have to use inline assembly to call x87 fsqrt directly
#if !defined(__SSE2__)
@ -121,9 +126,16 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
#define rx_xor_vec_f128 _mm_xor_pd
#define rx_and_vec_f128 _mm_and_pd
#define rx_or_vec_f128 _mm_or_pd
#ifdef __AES__
#define rx_aesenc_vec_i128 _mm_aesenc_si128
#define rx_aesdec_vec_i128 _mm_aesdec_si128
#define HAVE_AES
#endif //__AES__
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
return _mm_cvtsi128_si32(a);
}
@ -164,7 +176,7 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
#include <cstdint>
#include <stdexcept>
#include <cstdlib>
#include<altivec.h>
#include <altivec.h>
#undef vector
#undef pixel
#undef bool
@ -190,7 +202,6 @@ typedef union{
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
/* Splat 64-bit long long to 2 64-bit long longs */
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
{ return (__m128i) vec_splats (scalar); }
@ -268,6 +279,7 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_or(a,b);
}
#if defined(__CRYPTO__)
FORCE_INLINE __m128ll vrev(__m128i v){
@ -291,18 +303,9 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
__m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
return (rx_vec_i128)vec_xor((__m128i)out,rkey);
}
#else
static const char* platformError = "Platform doesn't support hardware AES";
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
#endif
#define HAVE_AES
#endif //__CRYPTO__
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
vec_u _a;
@ -506,16 +509,6 @@ FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return x;
}
static const char* platformError = "Platform doesn't support hardware AES";
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
return a.u32[0];
}
@ -591,6 +584,20 @@ void rx_set_rounding_mode(uint32_t mode);
#endif
#ifndef HAVE_AES
static const char* platformError = "Platform doesn't support hardware AES";
#include <stdexcept>
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
#endif
double loadDoublePortable(const void* addr);
uint64_t mulh(uint64_t, uint64_t);
int64_t smulh(int64_t, int64_t);

Loading…
Cancel
Save