@ -39,6 +39,11 @@
# include "hash-ops.h"
# include "oaes_lib.h"
# include "variant2_int_sqrt.h"
# include "variant4_random_math.h"
# include "CryptonightR_JIT.h"
# include <errno.h>
# include <string.h>
# define MEMORY (1 << 21) // 2MB scratchpad
# define ITER (1 << 20)
@ -47,9 +52,19 @@
# define INIT_SIZE_BLK 8
# define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
extern void aesb_single_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
extern void aesb_single_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
extern void aesb_pseudo_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
static void local_abort ( const char * msg )
{
fprintf ( stderr , " %s \n " , msg ) ;
# ifdef NDEBUG
_exit ( 1 ) ;
# else
abort ( ) ;
# endif
}
# define VARIANT1_1(p) \
do if ( variant = = 1 ) \
{ \
@ -109,69 +124,96 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
memcpy ( b + AES_BLOCK_SIZE , state . hs . b + 64 , AES_BLOCK_SIZE ) ; \
xor64 ( b + AES_BLOCK_SIZE , state . hs . b + 80 ) ; \
xor64 ( b + AES_BLOCK_SIZE + 8 , state . hs . b + 88 ) ; \
division_result = state . hs . w [ 12 ] ; \
sqrt_result = state . hs . w [ 13 ] ; \
division_result = SWAP64LE ( state . hs . w [ 12 ] ) ; \
sqrt_result = SWAP64LE ( state . hs . w [ 13 ] ) ; \
} while ( 0 )
# define VARIANT2_SHUFFLE_ADD_SSE2(base_ptr, offset) \
do if ( variant > = 2 ) \
{ \
const __m128i chunk1 = _mm_load_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) ) ; \
__m128i chunk1 = _mm_load_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) ) ; \
const __m128i chunk2 = _mm_load_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x20 ) ) ) ; \
const __m128i chunk3 = _mm_load_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x30 ) ) ) ; \
_mm_store_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) , _mm_add_epi64 ( chunk3 , _b1 ) ) ; \
_mm_store_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x20 ) ) , _mm_add_epi64 ( chunk1 , _b ) ) ; \
_mm_store_si128 ( ( __m128i * ) ( ( base_ptr ) + ( ( offset ) ^ 0x30 ) ) , _mm_add_epi64 ( chunk2 , _a ) ) ; \
if ( variant > = 4 ) \
{ \
chunk1 = _mm_xor_si128 ( chunk1 , chunk2 ) ; \
_c = _mm_xor_si128 ( _c , chunk3 ) ; \
_c = _mm_xor_si128 ( _c , chunk1 ) ; \
} \
} while ( 0 )
# define VARIANT2_SHUFFLE_ADD_NEON(base_ptr, offset) \
do if ( variant > = 2 ) \
{ \
const uint64x2_t chunk1 = vld1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) ) ; \
uint64x2_t chunk1 = vld1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) ) ; \
const uint64x2_t chunk2 = vld1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x20 ) ) ) ; \
const uint64x2_t chunk3 = vld1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x30 ) ) ) ; \
vst1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) , vaddq_u64 ( chunk3 , vreinterpretq_u64_u8 ( _b1 ) ) ) ; \
vst1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x20 ) ) , vaddq_u64 ( chunk1 , vreinterpretq_u64_u8 ( _b ) ) ) ; \
vst1q_u64 ( U64 ( ( base_ptr ) + ( ( offset ) ^ 0x30 ) ) , vaddq_u64 ( chunk2 , vreinterpretq_u64_u8 ( _a ) ) ) ; \
if ( variant > = 4 ) \
{ \
chunk1 = veorq_u64 ( chunk1 , chunk2 ) ; \
_c = vreinterpretq_u8_u64 ( veorq_u64 ( vreinterpretq_u64_u8 ( _c ) , chunk3 ) ) ; \
_c = vreinterpretq_u8_u64 ( veorq_u64 ( vreinterpretq_u64_u8 ( _c ) , chunk1 ) ) ; \
} \
} while ( 0 )
# define VARIANT2_PORTABLE_SHUFFLE_ADD(base_ptr, offset) \
# define VARIANT2_PORTABLE_SHUFFLE_ADD( out, a_, base_ptr, offset) \
do if ( variant > = 2 ) \
{ \
uint64_t * chunk1 = U64 ( ( base_ptr ) + ( ( offset ) ^ 0x10 ) ) ; \
uint64_t * chunk2 = U64 ( ( base_ptr ) + ( ( offset ) ^ 0x20 ) ) ; \
uint64_t * chunk3 = U64 ( ( base_ptr ) + ( ( offset ) ^ 0x30 ) ) ; \
\
const uint64_t chunk1_old [ 2 ] = { chunk1 [ 0 ] , chunk1 [ 1 ] } ; \
uint64_t chunk1_old [ 2 ] = { SWAP64LE ( chunk1 [ 0 ] ) , SWAP64LE ( chunk1 [ 1 ] ) } ; \
const uint64_t chunk2_old [ 2 ] = { SWAP64LE ( chunk2 [ 0 ] ) , SWAP64LE ( chunk2 [ 1 ] ) } ; \
const uint64_t chunk3_old [ 2 ] = { SWAP64LE ( chunk3 [ 0 ] ) , SWAP64LE ( chunk3 [ 1 ] ) } ; \
\
uint64_t b1 [ 2 ] ; \
memcpy ( b1 , b + 16 , 16 ) ; \
chunk1 [ 0 ] = chunk3 [ 0 ] + b1 [ 0 ] ; \
chunk1 [ 1 ] = chunk3 [ 1 ] + b1 [ 1 ] ; \
memcpy _swap64le ( b1 , b + 16 , 2 ) ; \
chunk1 [ 0 ] = SWAP64LE ( chunk3 _old [ 0 ] + b1 [ 0 ] ) ; \
chunk1 [ 1 ] = SWAP64LE ( chunk3 _old [ 1 ] + b1 [ 1 ] ) ; \
\
uint64_t a0 [ 2 ] ; \
memcpy ( a0 , a , 16 ) ; \
chunk3 [ 0 ] = chunk2 [ 0 ] + a0 [ 0 ] ; \
chunk3 [ 1 ] = chunk2 [ 1 ] + a0 [ 1 ] ; \
memcpy _swap64le( a0 , a_ , 2 ) ; \
chunk3 [ 0 ] = SWAP64LE ( chunk2 _old [ 0 ] + a0 [ 0 ] ) ; \
chunk3 [ 1 ] = SWAP64LE ( chunk2 _old [ 1 ] + a0 [ 1 ] ) ; \
\
uint64_t b0 [ 2 ] ; \
memcpy ( b0 , b , 16 ) ; \
chunk2 [ 0 ] = chunk1_old [ 0 ] + b0 [ 0 ] ; \
chunk2 [ 1 ] = chunk1_old [ 1 ] + b0 [ 1 ] ; \
memcpy_swap64le ( b0 , b , 2 ) ; \
chunk2 [ 0 ] = SWAP64LE ( chunk1_old [ 0 ] + b0 [ 0 ] ) ; \
chunk2 [ 1 ] = SWAP64LE ( SWAP64LE ( chunk1_old [ 1 ] ) + b0 [ 1 ] ) ; \
if ( variant > = 4 ) \
{ \
uint64_t out_copy [ 2 ] ; \
memcpy_swap64le ( out_copy , out , 2 ) ; \
chunk1_old [ 0 ] ^ = chunk2_old [ 0 ] ; \
chunk1_old [ 1 ] ^ = chunk2_old [ 1 ] ; \
out_copy [ 0 ] ^ = chunk3_old [ 0 ] ; \
out_copy [ 1 ] ^ = chunk3_old [ 1 ] ; \
out_copy [ 0 ] ^ = chunk1_old [ 0 ] ; \
out_copy [ 1 ] ^ = chunk1_old [ 1 ] ; \
memcpy_swap64le ( out , out_copy , 2 ) ; \
} \
} while ( 0 )
# define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
( ( uint64_t * ) ( b ) ) [ 0 ] ^ = division_result ^ ( sqrt_result < < 32 ) ; \
uint64_t tmpx = division_result ^ ( sqrt_result < < 32 ) ; \
( ( uint64_t * ) ( b ) ) [ 0 ] ^ = SWAP64LE ( tmpx ) ; \
{ \
const uint64_t dividend = ( ( uint64_t * ) ( ptr ) ) [ 1 ] ; \
const uint32_t divisor = ( ( ( uint64_t * ) ( ptr ) ) [ 0 ] + ( uint32_t ) ( sqrt_result < < 1 ) ) | 0x80000001UL ; \
const uint64_t dividend = SWAP64LE ( ( ( uint64_t * ) ( ptr ) ) [ 1 ] ) ; \
const uint32_t divisor = ( SWAP64LE ( ( ( uint64_t * ) ( ptr ) ) [ 0 ] ) + ( uint32_t ) ( sqrt_result < < 1 ) ) | 0x80000001UL ; \
division_result = ( ( uint32_t ) ( dividend / divisor ) ) + \
( ( ( uint64_t ) ( dividend % divisor ) ) < < 32 ) ; \
} \
const uint64_t sqrt_input = ( ( uint64_t * ) ( ptr ) ) [ 0 ] + division_result
const uint64_t sqrt_input = SWAP64LE ( ( ( uint64_t * ) ( ptr ) ) [ 0 ] ) + division_result
# define VARIANT2_INTEGER_MATH_SSE2(b, ptr) \
do if ( variant > = 2 ) \
do if ( ( variant = = 2 ) | | ( variant = = 3 ) ) \
{ \
VARIANT2_INTEGER_MATH_DIVISION_STEP ( b , ptr ) ; \
VARIANT2_INTEGER_MATH_SQRT_STEP_SSE2 ( ) ; \
@ -181,7 +223,7 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
# if defined DBL_MANT_DIG && (DBL_MANT_DIG >= 50)
// double precision floating point type has enough bits of precision on current platform
# define VARIANT2_PORTABLE_INTEGER_MATH(b, ptr) \
do if ( variant > = 2 ) \
do if ( ( variant = = 2 ) | | ( variant = = 3 ) ) \
{ \
VARIANT2_INTEGER_MATH_DIVISION_STEP ( b , ptr ) ; \
VARIANT2_INTEGER_MATH_SQRT_STEP_FP64 ( ) ; \
@ -191,7 +233,7 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
// double precision floating point type is not good enough on current platform
// fall back to the reference code (integer only)
# define VARIANT2_PORTABLE_INTEGER_MATH(b, ptr) \
do if ( variant > = 2 ) \
do if ( ( variant = = 2 ) | | ( variant = = 3 ) ) \
{ \
VARIANT2_INTEGER_MATH_DIVISION_STEP ( b , ptr ) ; \
VARIANT2_INTEGER_MATH_SQRT_STEP_REF ( ) ; \
@ -199,18 +241,80 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
# endif
# define VARIANT2_2_PORTABLE() \
if ( variant >= 2 ) { \
if ( variant == 2 | | variant = = 3 ) { \
xor_blocks ( long_state + ( j ^ 0x10 ) , d ) ; \
xor_blocks ( d , long_state + ( j ^ 0x20 ) ) ; \
}
# define VARIANT2_2() \
do if ( variant > = 2 ) \
do if ( variant = = 2 | | variant = = 3 ) \
{ \
* U64 ( hp_state + ( j ^ 0x10 ) ) ^ = SWAP64LE ( hi ) ; \
* ( U64 ( hp_state + ( j ^ 0x10 ) ) + 1 ) ^ = SWAP64LE ( lo ) ; \
hi ^ = SWAP64LE ( * U64 ( hp_state + ( j ^ 0x20 ) ) ) ; \
lo ^ = SWAP64LE ( * ( U64 ( hp_state + ( j ^ 0x20 ) ) + 1 ) ) ; \
} while ( 0 )
# define V4_REG_LOAD(dst, src) \
do { \
memcpy ( ( dst ) , ( src ) , sizeof ( v4_reg ) ) ; \
if ( sizeof ( v4_reg ) = = sizeof ( uint32_t ) ) \
* ( dst ) = SWAP32LE ( * ( dst ) ) ; \
else \
* ( dst ) = SWAP64LE ( * ( dst ) ) ; \
} while ( 0 )
# define VARIANT4_RANDOM_MATH_INIT() \
v4_reg r [ 9 ] ; \
struct V4_Instruction code [ NUM_INSTRUCTIONS_MAX + 1 ] ; \
int jit = use_v4_jit ( ) ; \
do if ( variant > = 4 ) \
{ \
* U64 ( hp_state + ( j ^ 0x10 ) ) ^ = hi ; \
* ( U64 ( hp_state + ( j ^ 0x10 ) ) + 1 ) ^ = lo ; \
hi ^ = * U64 ( hp_state + ( j ^ 0x20 ) ) ; \
lo ^ = * ( U64 ( hp_state + ( j ^ 0x20 ) ) + 1 ) ; \
for ( int i = 0 ; i < 4 ; + + i ) \
V4_REG_LOAD ( r + i , ( uint8_t * ) ( state . hs . w + 12 ) + sizeof ( v4_reg ) * i ) ; \
v4_random_math_init ( code , height ) ; \
if ( jit ) \
{ \
int ret = v4_generate_JIT_code ( code , hp_jitfunc , 4096 ) ; \
if ( ret < 0 ) \
local_abort ( " Error generating CryptonightR code " ) ; \
} \
} while ( 0 )
# define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
do if ( variant > = 4 ) \
{ \
uint64_t t [ 2 ] ; \
memcpy ( t , b , sizeof ( uint64_t ) ) ; \
\
if ( sizeof ( v4_reg ) = = sizeof ( uint32_t ) ) \
t [ 0 ] ^ = SWAP64LE ( ( r [ 0 ] + r [ 1 ] ) | ( ( uint64_t ) ( r [ 2 ] + r [ 3 ] ) < < 32 ) ) ; \
else \
t [ 0 ] ^ = SWAP64LE ( ( r [ 0 ] + r [ 1 ] ) ^ ( r [ 2 ] + r [ 3 ] ) ) ; \
\
memcpy ( b , t , sizeof ( uint64_t ) ) ; \
\
V4_REG_LOAD ( r + 4 , a ) ; \
V4_REG_LOAD ( r + 5 , ( uint64_t * ) ( a ) + 1 ) ; \
V4_REG_LOAD ( r + 6 , _b ) ; \
V4_REG_LOAD ( r + 7 , _b1 ) ; \
V4_REG_LOAD ( r + 8 , ( uint64_t * ) ( _b1 ) + 1 ) ; \
\
if ( jit ) \
( * hp_jitfunc ) ( r ) ; \
else \
v4_random_math ( code , r ) ; \
\
memcpy ( t , a , sizeof ( uint64_t ) * 2 ) ; \
\
if ( sizeof ( v4_reg ) = = sizeof ( uint32_t ) ) { \
t [ 0 ] ^ = SWAP64LE ( r [ 2 ] | ( ( uint64_t ) ( r [ 3 ] ) < < 32 ) ) ; \
t [ 1 ] ^ = SWAP64LE ( r [ 0 ] | ( ( uint64_t ) ( r [ 1 ] ) < < 32 ) ) ; \
} else { \
t [ 0 ] ^ = SWAP64LE ( r [ 2 ] ^ r [ 3 ] ) ; \
t [ 1 ] ^ = SWAP64LE ( r [ 0 ] ^ r [ 1 ] ) ; \
} \
memcpy ( a , t , sizeof ( uint64_t ) * 2 ) ; \
} while ( 0 )
@ -297,6 +401,7 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
p = U64 ( & hp_state [ j ] ) ; \
b [ 0 ] = p [ 0 ] ; b [ 1 ] = p [ 1 ] ; \
VARIANT2_INTEGER_MATH_SSE2 ( b , c ) ; \
VARIANT4_RANDOM_MATH ( a , b , r , & _b , & _b1 ) ; \
__mul ( ) ; \
VARIANT2_2 ( ) ; \
VARIANT2_SHUFFLE_ADD_SSE2 ( hp_state , j ) ; \
@ -328,6 +433,9 @@ union cn_slow_hash_state
THREADV uint8_t * hp_state = NULL ;
THREADV int hp_allocated = 0 ;
THREADV v4_random_math_JIT_func hp_jitfunc = NULL ;
THREADV uint8_t * hp_jitfunc_memory = NULL ;
THREADV int hp_jitfunc_allocated = 0 ;
# if defined(_MSC_VER)
# define cpuid(info,x) __cpuidex(info,x,0)
@ -386,6 +494,31 @@ STATIC INLINE int force_software_aes(void)
return use ;
}
volatile int use_v4_jit_flag = - 1 ;
STATIC INLINE int use_v4_jit ( void )
{
# if defined(__x86_64__)
if ( use_v4_jit_flag ! = - 1 )
return use_v4_jit_flag ;
const char * env = getenv ( " MONERO_USE_CNV4_JIT " ) ;
if ( ! env ) {
use_v4_jit_flag = 0 ;
}
else if ( ! strcmp ( env , " 0 " ) | | ! strcmp ( env , " no " ) ) {
use_v4_jit_flag = 0 ;
}
else {
use_v4_jit_flag = 1 ;
}
return use_v4_jit_flag ;
# else
return 0 ;
# endif
}
STATIC INLINE int check_aes_hw ( void )
{
int cpuid_results [ 4 ] ;
@ -637,6 +770,33 @@ void slow_hash_allocate_state(void)
hp_allocated = 0 ;
hp_state = ( uint8_t * ) malloc ( MEMORY ) ;
}
# if defined(_MSC_VER) || defined(__MINGW32__)
hp_jitfunc_memory = ( uint8_t * ) VirtualAlloc ( hp_jitfunc_memory , 4096 + 4095 ,
MEM_COMMIT | MEM_RESERVE , PAGE_EXECUTE_READWRITE ) ;
# else
# if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
defined ( __DragonFly__ ) | | defined ( __NetBSD__ )
hp_jitfunc_memory = mmap ( 0 , 4096 + 4095 , PROT_READ | PROT_WRITE | PROT_EXEC ,
MAP_PRIVATE | MAP_ANON , 0 , 0 ) ;
# else
hp_jitfunc_memory = mmap ( 0 , 4096 + 4095 , PROT_READ | PROT_WRITE | PROT_EXEC ,
MAP_PRIVATE | MAP_ANONYMOUS , 0 , 0 ) ;
# endif
if ( hp_jitfunc_memory = = MAP_FAILED )
hp_jitfunc_memory = NULL ;
# endif
hp_jitfunc_allocated = 1 ;
if ( hp_jitfunc_memory = = NULL )
{
hp_jitfunc_allocated = 0 ;
hp_jitfunc_memory = malloc ( 4096 + 4095 ) ;
}
hp_jitfunc = ( v4_random_math_JIT_func ) ( ( size_t ) ( hp_jitfunc_memory + 4095 ) & ~ 4095 ) ;
# if !(defined(_MSC_VER) || defined(__MINGW32__))
mprotect ( hp_jitfunc , 4096 , PROT_READ | PROT_WRITE | PROT_EXEC ) ;
# endif
}
/**
@ -659,8 +819,22 @@ void slow_hash_free_state(void)
# endif
}
if ( ! hp_jitfunc_allocated )
free ( hp_jitfunc_memory ) ;
else
{
# if defined(_MSC_VER) || defined(__MINGW32__)
VirtualFree ( hp_jitfunc_memory , 0 , MEM_RELEASE ) ;
# else
munmap ( hp_jitfunc_memory , 4096 + 4095 ) ;
# endif
}
hp_state = NULL ;
hp_allocated = 0 ;
hp_jitfunc = NULL ;
hp_jitfunc_memory = NULL ;
hp_jitfunc_allocated = 0 ;
}
/**
@ -693,7 +867,7 @@ void slow_hash_free_state(void)
* @ param length the length in bytes of the data
* @ param hash a pointer to a buffer in which the final 256 bit hash will be stored
*/
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed )
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed , uint64_t height )
{
RDATA_ALIGN16 uint8_t expandedKey [ 240 ] ; /* These buffers are aligned to use later with SSE functions */
@ -729,6 +903,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
VARIANT1_INIT64 ( ) ;
VARIANT2_INIT64 ( ) ;
VARIANT4_RANDOM_MATH_INIT ( ) ;
/* CryptoNight Step 2: Iteratively encrypt the results from Keccak to fill
* the 2 MB large random access buffer .
@ -900,6 +1075,7 @@ union cn_slow_hash_state
p = U64 ( & hp_state [ j ] ) ; \
b [ 0 ] = p [ 0 ] ; b [ 1 ] = p [ 1 ] ; \
VARIANT2_PORTABLE_INTEGER_MATH ( b , c ) ; \
VARIANT4_RANDOM_MATH ( a , b , r , & _b , & _b1 ) ; \
__mul ( ) ; \
VARIANT2_2 ( ) ; \
VARIANT2_SHUFFLE_ADD_NEON ( hp_state , j ) ; \
@ -1062,7 +1238,7 @@ STATIC INLINE void aligned_free(void *ptr)
}
# endif /* FORCE_USE_HEAP */
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed )
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed , uint64_t height )
{
RDATA_ALIGN16 uint8_t expandedKey [ 240 ] ;
@ -1099,6 +1275,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
VARIANT1_INIT64 ( ) ;
VARIANT2_INIT64 ( ) ;
VARIANT4_RANDOM_MATH_INIT ( ) ;
/* CryptoNight Step 2: Iteratively encrypt the results from Keccak to fill
* the 2 MB large random access buffer .
@ -1277,10 +1454,11 @@ STATIC INLINE void xor_blocks(uint8_t* a, const uint8_t* b)
U64 ( a ) [ 1 ] ^ = U64 ( b ) [ 1 ] ;
}
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed )
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed , uint64_t height )
{
uint8_t text [ INIT_SIZE_BYTE ] ;
uint8_t a [ AES_BLOCK_SIZE ] ;
uint8_t a1 [ AES_BLOCK_SIZE ] ;
uint8_t b [ AES_BLOCK_SIZE * 2 ] ;
uint8_t c [ AES_BLOCK_SIZE ] ;
uint8_t c1 [ AES_BLOCK_SIZE ] ;
@ -1316,6 +1494,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
VARIANT1_INIT64 ( ) ;
VARIANT2_INIT64 ( ) ;
VARIANT4_RANDOM_MATH_INIT ( ) ;
// use aligned data
memcpy ( expandedKey , aes_ctx - > key - > exp_data , aes_ctx - > key - > exp_data_len ) ;
@ -1339,10 +1518,10 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
// Iteration 1
j = state_index ( a ) ;
p = & long_state [ j ] ;
aesb_single_round ( p , p , a ) ;
copy_block ( c1 , p ) ;
aesb_single_round ( p , c1 , a ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( long_state , j ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( c1 , a , long_state , j ) ;
copy_block ( p , c1 ) ;
xor_blocks ( p , b ) ;
VARIANT1_1 ( p ) ;
@ -1351,13 +1530,15 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
p = & long_state [ j ] ;
copy_block ( c , p ) ;
copy_block ( a1 , a ) ;
VARIANT2_PORTABLE_INTEGER_MATH ( c , c1 ) ;
VARIANT4_RANDOM_MATH ( a1 , c , r , b , b + AES_BLOCK_SIZE ) ;
mul ( c1 , c , d ) ;
VARIANT2_2_PORTABLE ( ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( long_state, j ) ;
sum_half_blocks ( a , d ) ;
swap_blocks ( a , c ) ;
xor_blocks ( a , c ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( c1, a , long_state, j ) ;
sum_half_blocks ( a 1 , d ) ;
swap_blocks ( a 1 , c ) ;
xor_blocks ( a 1 , c ) ;
VARIANT1_2 ( U64 ( c ) + 1 ) ;
copy_block ( p , c ) ;
@ -1365,6 +1546,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
copy_block ( b + AES_BLOCK_SIZE , b ) ;
}
copy_block ( b , c1 ) ;
copy_block ( a , a1 ) ;
}
memcpy ( text , state . init , INIT_SIZE_BYTE ) ;
@ -1408,10 +1590,7 @@ static void (*const extra_hashes[4])(const void *, size_t, char *) = {
hash_extra_blake , hash_extra_groestl , hash_extra_jh , hash_extra_skein
} ;
extern void aesb_single_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
extern void aesb_pseudo_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
static size_t e2i ( const uint8_t * a , size_t count ) { return ( * ( ( uint64_t * ) a ) / AES_BLOCK_SIZE ) & ( count - 1 ) ; }
static size_t e2i ( const uint8_t * a , size_t count ) { return ( SWAP64LE ( * ( ( uint64_t * ) a ) ) / AES_BLOCK_SIZE ) & ( count - 1 ) ; }
static void mul ( const uint8_t * a , const uint8_t * b , uint8_t * res ) {
uint64_t a0 , b0 ;
@ -1478,7 +1657,7 @@ union cn_slow_hash_state {
} ;
# pragma pack(pop)
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed ) {
void cn_slow_hash ( const void * data , size_t length , char * hash , int variant , int prehashed , uint64_t height ) {
# ifndef FORCE_USE_HEAP
uint8_t long_state [ MEMORY ] ;
# else
@ -1488,6 +1667,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
union cn_slow_hash_state state ;
uint8_t text [ INIT_SIZE_BYTE ] ;
uint8_t a [ AES_BLOCK_SIZE ] ;
uint8_t a1 [ AES_BLOCK_SIZE ] ;
uint8_t b [ AES_BLOCK_SIZE * 2 ] ;
uint8_t c1 [ AES_BLOCK_SIZE ] ;
uint8_t c2 [ AES_BLOCK_SIZE ] ;
@ -1507,6 +1687,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
VARIANT1_PORTABLE_INIT ( ) ;
VARIANT2_PORTABLE_INIT ( ) ;
VARIANT4_RANDOM_MATH_INIT ( ) ;
oaes_key_import_data ( aes_ctx , aes_key , AES_KEY_SIZE ) ;
for ( i = 0 ; i < MEMORY / INIT_SIZE_BYTE ; i + + ) {
@ -1530,7 +1711,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
j = e2i ( a , MEMORY / AES_BLOCK_SIZE ) * AES_BLOCK_SIZE ;
copy_block ( c1 , & long_state [ j ] ) ;
aesb_single_round ( c1 , c1 , a ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( long_state, j ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( c1, a , long_state, j ) ;
copy_block ( & long_state [ j ] , c1 ) ;
xor_blocks ( & long_state [ j ] , b ) ;
assert ( j = = e2i ( a , MEMORY / AES_BLOCK_SIZE ) * AES_BLOCK_SIZE ) ;
@ -1538,22 +1719,22 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
/* Iteration 2 */
j = e2i ( c1 , MEMORY / AES_BLOCK_SIZE ) * AES_BLOCK_SIZE ;
copy_block ( c2 , & long_state [ j ] ) ;
copy_block ( a1 , a ) ;
VARIANT2_PORTABLE_INTEGER_MATH ( c2 , c1 ) ;
VARIANT4_RANDOM_MATH ( a1 , c2 , r , b , b + AES_BLOCK_SIZE ) ;
mul ( c1 , c2 , d ) ;
VARIANT2_2_PORTABLE ( ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( long_state , j ) ;
swap_blocks ( a , c1 ) ;
sum_half_blocks ( c1 , d ) ;
swap_blocks ( c1 , c2 ) ;
xor_blocks ( c1 , c2 ) ;
VARIANT2_PORTABLE_SHUFFLE_ADD ( c1 , a , long_state , j ) ;
sum_half_blocks ( a1 , d ) ;
swap_blocks ( a1 , c2 ) ;
xor_blocks ( a1 , c2 ) ;
VARIANT1_2 ( c2 + 8 ) ;
copy_block ( & long_state [ j ] , c2 ) ;
assert ( j = = e2i ( a , MEMORY / AES_BLOCK_SIZE ) * AES_BLOCK_SIZE ) ;
if ( variant > = 2 ) {
copy_block ( b + AES_BLOCK_SIZE , b ) ;
}
copy_block ( b , a ) ;
copy_block ( a , c 1) ;
copy_block ( b , c1 ) ;
copy_block ( a , a 1) ;
}
memcpy ( text , state . init , INIT_SIZE_BYTE ) ;