@ -37,9 +37,11 @@
# include "hash-ops.h"
# include "oaes_lib.h"
# ifdef __SSE2__
# if defined(__x86_64__) || defined(__i386)
// Optimised code below, uses x86-specific intrinsics, SSE2, AES-NI
// Fall back to more portable code is down at the bottom
# include <emmintrin.h>
# endif
# if defined(_MSC_VER)
# include <intrin.h>
@ -621,3 +623,151 @@ void cn_slow_hash(const void *data, size_t length, char *hash)
hash_permutation ( & state . hs ) ;
extra_hashes [ state . hs . b [ 0 ] & 3 ] ( & state , 200 , hash ) ;
}
# else
// Portable implementation as a fallback
static void ( * const extra_hashes [ 4 ] ) ( const void * , size_t , char * ) = {
hash_extra_blake , hash_extra_groestl , hash_extra_jh , hash_extra_skein
} ;
# define MEMORY (1 << 21) /* 2 MiB */
# define ITER (1 << 20)
# define AES_BLOCK_SIZE 16
# define AES_KEY_SIZE 32 /*16*/
# define INIT_SIZE_BLK 8
# define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
extern int aesb_single_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
extern int aesb_pseudo_round ( const uint8_t * in , uint8_t * out , const uint8_t * expandedKey ) ;
static size_t e2i ( const uint8_t * a , size_t count ) { return ( * ( ( uint64_t * ) a ) / AES_BLOCK_SIZE ) & ( count - 1 ) ; }
static void mul ( const uint8_t * a , const uint8_t * b , uint8_t * res ) {
uint64_t a0 , b0 ;
uint64_t hi , lo ;
a0 = SWAP64LE ( ( ( uint64_t * ) a ) [ 0 ] ) ;
b0 = SWAP64LE ( ( ( uint64_t * ) b ) [ 0 ] ) ;
lo = mul128 ( a0 , b0 , & hi ) ;
( ( uint64_t * ) res ) [ 0 ] = SWAP64LE ( hi ) ;
( ( uint64_t * ) res ) [ 1 ] = SWAP64LE ( lo ) ;
}
static void sum_half_blocks ( uint8_t * a , const uint8_t * b ) {
uint64_t a0 , a1 , b0 , b1 ;
a0 = SWAP64LE ( ( ( uint64_t * ) a ) [ 0 ] ) ;
a1 = SWAP64LE ( ( ( uint64_t * ) a ) [ 1 ] ) ;
b0 = SWAP64LE ( ( ( uint64_t * ) b ) [ 0 ] ) ;
b1 = SWAP64LE ( ( ( uint64_t * ) b ) [ 1 ] ) ;
a0 + = b0 ;
a1 + = b1 ;
( ( uint64_t * ) a ) [ 0 ] = SWAP64LE ( a0 ) ;
( ( uint64_t * ) a ) [ 1 ] = SWAP64LE ( a1 ) ;
}
# define U64(x) ((uint64_t *) (x))
static void copy_block ( uint8_t * dst , const uint8_t * src ) {
memcpy ( dst , src , AES_BLOCK_SIZE ) ;
}
static void swap_blocks ( uint8_t * a , uint8_t * b ) {
uint64_t t [ 2 ] ;
U64 ( t ) [ 0 ] = U64 ( a ) [ 0 ] ;
U64 ( t ) [ 1 ] = U64 ( a ) [ 1 ] ;
U64 ( a ) [ 0 ] = U64 ( b ) [ 0 ] ;
U64 ( a ) [ 1 ] = U64 ( b ) [ 1 ] ;
U64 ( b ) [ 0 ] = U64 ( t ) [ 0 ] ;
U64 ( b ) [ 1 ] = U64 ( t ) [ 1 ] ;
}
static void xor_blocks ( uint8_t * a , const uint8_t * b ) {
size_t i ;
for ( i = 0 ; i < AES_BLOCK_SIZE ; i + + ) {
a [ i ] ^ = b [ i ] ;
}
}
# pragma pack(push, 1)
union cn_slow_hash_state {
union hash_state hs ;
struct {
uint8_t k [ 64 ] ;
uint8_t init [ INIT_SIZE_BYTE ] ;
} ;
} ;
# pragma pack(pop)
void cn_slow_hash ( const void * data , size_t length , char * hash ) {
uint8_t long_state [ MEMORY ] ;
union cn_slow_hash_state state ;
uint8_t text [ INIT_SIZE_BYTE ] ;
uint8_t a [ AES_BLOCK_SIZE ] ;
uint8_t b [ AES_BLOCK_SIZE ] ;
uint8_t c [ AES_BLOCK_SIZE ] ;
uint8_t d [ AES_BLOCK_SIZE ] ;
size_t i , j ;
uint8_t aes_key [ AES_KEY_SIZE ] ;
oaes_ctx * aes_ctx ;
hash_process ( & state . hs , data , length ) ;
memcpy ( text , state . init , INIT_SIZE_BYTE ) ;
memcpy ( aes_key , state . hs . b , AES_KEY_SIZE ) ;
aes_ctx = ( oaes_ctx * ) oaes_alloc ( ) ;
oaes_key_import_data ( aes_ctx , aes_key , AES_KEY_SIZE ) ;
for ( i = 0 ; i < MEMORY / INIT_SIZE_BYTE ; i + + ) {
for ( j = 0 ; j < INIT_SIZE_BLK ; j + + ) {
aesb_pseudo_round ( & text [ AES_BLOCK_SIZE * j ] , & text [ AES_BLOCK_SIZE * j ] , aes_ctx - > key - > exp_data ) ;
}
memcpy ( & long_state [ i * INIT_SIZE_BYTE ] , text , INIT_SIZE_BYTE ) ;
}
for ( i = 0 ; i < 16 ; i + + ) {
a [ i ] = state . k [ i ] ^ state . k [ 32 + i ] ;
b [ i ] = state . k [ 16 + i ] ^ state . k [ 48 + i ] ;
}
for ( i = 0 ; i < ITER / 2 ; i + + ) {
/* Dependency chain: address -> read value ------+
* written value < - + hard function ( AES or MUL ) < +
* next address < - +
*/
/* Iteration 1 */
j = e2i ( a , MEMORY / AES_BLOCK_SIZE ) ;
copy_block ( c , & long_state [ j * AES_BLOCK_SIZE ] ) ;
aesb_single_round ( c , c , a ) ;
xor_blocks ( b , c ) ;
swap_blocks ( b , c ) ;
copy_block ( & long_state [ j * AES_BLOCK_SIZE ] , c ) ;
assert ( j = = e2i ( a , MEMORY / AES_BLOCK_SIZE ) ) ;
swap_blocks ( a , b ) ;
/* Iteration 2 */
j = e2i ( a , MEMORY / AES_BLOCK_SIZE ) ;
copy_block ( c , & long_state [ j * AES_BLOCK_SIZE ] ) ;
mul ( a , c , d ) ;
sum_half_blocks ( b , d ) ;
swap_blocks ( b , c ) ;
xor_blocks ( b , c ) ;
copy_block ( & long_state [ j * AES_BLOCK_SIZE ] , c ) ;
assert ( j = = e2i ( a , MEMORY / AES_BLOCK_SIZE ) ) ;
swap_blocks ( a , b ) ;
}
memcpy ( text , state . init , INIT_SIZE_BYTE ) ;
oaes_key_import_data ( aes_ctx , & state . hs . b [ 32 ] , AES_KEY_SIZE ) ;
for ( i = 0 ; i < MEMORY / INIT_SIZE_BYTE ; i + + ) {
for ( j = 0 ; j < INIT_SIZE_BLK ; j + + ) {
xor_blocks ( & text [ j * AES_BLOCK_SIZE ] , & long_state [ i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE ] ) ;
aesb_pseudo_round ( & text [ AES_BLOCK_SIZE * j ] , & text [ AES_BLOCK_SIZE * j ] , aes_ctx - > key - > exp_data ) ;
}
}
memcpy ( state . init , text , INIT_SIZE_BYTE ) ;
hash_permutation ( & state . hs ) ;
/*memcpy(hash, &state, 32);*/
extra_hashes [ state . hs . b [ 0 ] & 3 ] ( & state , 200 , hash ) ;
oaes_free ( ( OAES_CTX * * ) & aes_ctx ) ;
}
# endif