From c93ef4369c44a24fd21b91215a67af4d99c55781 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Thu, 7 Feb 2019 10:32:06 +0100 Subject: [PATCH 1/5] Fixes for big-endian machines --- src/crypto/variant4_random_math.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h index fc16ef4d8..2024cf80e 100644 --- a/src/crypto/variant4_random_math.h +++ b/src/crypto/variant4_random_math.h @@ -195,7 +195,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_ char data[32]; memset(data, 0, sizeof(data)); - *((uint64_t*)data) = height; + *((uint64_t*)data) = SWAP64LE(height); size_t data_index = sizeof(data); @@ -209,7 +209,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_ // byte 1: instruction opcode // byte 2: current value of the source register // - // Registers R4-R7 are constant and are threatened as having the same value because when we do + // Registers R4-R7 are constant and are treated as having the same value because when we do // the same operation twice with two constant source registers, it can be optimized into a single operation int inst_data[8] = { 0, 1, 2, 3, -1, -1, -1, -1 }; @@ -355,7 +355,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_ // ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C" check_data(&data_index, sizeof(uint32_t), data, sizeof(data)); - code[code_size].C = *((uint32_t*)&data[data_index]); + code[code_size].C = SWAP32LE(*((uint32_t*)&data[data_index])); data_index += sizeof(uint32_t); } From 38c49b7a356db0479f93dbe523404e811adc611a Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 8 Feb 2019 22:25:30 +0100 Subject: [PATCH 2/5] More fixes for big-endian machines and pointer aliasing --- src/crypto/slow-hash.c | 35 +++++++++++++++++++++---------- src/crypto/variant4_random_math.h | 10 +++++++-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/crypto/slow-hash.c b/src/crypto/slow-hash.c index 8da79dee0..5a8be4a61 100644 --- a/src/crypto/slow-hash.c +++ b/src/crypto/slow-hash.c @@ -215,30 +215,43 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex lo ^= SWAP64LE(*(U64(hp_state + (j ^ 0x20)) + 1)); \ } while (0) +#define V4_REG_LOAD(dst, src) \ + do { \ + memcpy((dst), (src), sizeof(v4_reg)); \ + if (sizeof(v4_reg) == sizeof(uint32_t)) \ + *(dst) = SWAP32LE(*(dst)); \ + else \ + *(dst) = SWAP64LE(*(dst)); \ + } while (0) + #define VARIANT4_RANDOM_MATH_INIT() \ v4_reg r[8]; \ struct V4_Instruction code[TOTAL_LATENCY * ALU_COUNT + 1]; \ do if (variant >= 4) \ { \ - v4_reg* data = (v4_reg*)(state.hs.w + 12); \ - r[0] = data[0]; \ - r[1] = data[1]; \ - r[2] = data[2]; \ - r[3] = data[3]; \ + for (int i = 0; i < 4; ++i) \ + V4_REG_LOAD(r + i, (uint8_t*)(state.hs.w + 12) + sizeof(v4_reg) * i); \ v4_random_math_init(code, height); \ } while (0) #define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \ do if (variant >= 4) \ { \ + uint64_t t; \ + memcpy(&t, b, sizeof(uint64_t)); \ + \ if (sizeof(v4_reg) == sizeof(uint32_t)) \ - U64(b)[0] ^= (r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32); \ + t ^= SWAP64LE((r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32)); \ else \ - U64(b)[0] ^= (r[0] + r[1]) ^ (r[2] + r[3]); \ - r[4] = ((v4_reg*)(a))[0]; \ - r[5] = ((v4_reg*)(a))[sizeof(uint64_t) / sizeof(v4_reg)]; \ - r[6] = ((v4_reg*)(_b))[0]; \ - r[7] = ((v4_reg*)(_b1))[0]; \ + t ^= SWAP64LE((r[0] + r[1]) ^ (r[2] + r[3])); \ + \ + memcpy(b, &t, sizeof(uint64_t)); \ + \ + V4_REG_LOAD(r + 4, a); \ + V4_REG_LOAD(r + 5, (uint64_t*)(a) + 1); \ + V4_REG_LOAD(r + 6, _b); \ + V4_REG_LOAD(r + 7, _b1); \ + \ v4_random_math(code, r); \ } while (0) diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h index 2024cf80e..00f0393c5 100644 --- a/src/crypto/variant4_random_math.h +++ b/src/crypto/variant4_random_math.h @@ -195,8 +195,12 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_ char data[32]; memset(data, 0, sizeof(data)); - *((uint64_t*)data) = SWAP64LE(height); + uint64_t tmp = SWAP64LE(height); + memcpy(data, &tmp, sizeof(uint64_t)); + // Set data_index past the last byte in data + // to trigger full data update with blake hash + // before we start using it size_t data_index = sizeof(data); int code_size; @@ -355,7 +359,9 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_ // ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C" check_data(&data_index, sizeof(uint32_t), data, sizeof(data)); - code[code_size].C = SWAP32LE(*((uint32_t*)&data[data_index])); + uint32_t t; + memcpy(&t, data + data_index, sizeof(uint32_t)); + code[code_size].C = SWAP32LE(t); data_index += sizeof(uint32_t); } From a23468af8cbd1d4df0540d0a4ba1e7e3e2aa448f Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 8 Feb 2019 22:58:29 +0100 Subject: [PATCH 3/5] char is unsigned on ARM, fixed it --- src/crypto/variant4_random_math.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h index 00f0393c5..2d3669d47 100644 --- a/src/crypto/variant4_random_math.h +++ b/src/crypto/variant4_random_math.h @@ -166,7 +166,7 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg } // If we don't have enough data available, generate more -static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, char* data, const size_t data_size) +static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size) { if (*data_index + bytes_needed > data_size) { @@ -193,7 +193,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_ // Available ALUs for each instruction const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT }; - char data[32]; + int8_t data[32]; memset(data, 0, sizeof(data)); uint64_t tmp = SWAP64LE(height); memcpy(data, &tmp, sizeof(uint64_t)); From 3b97d399b104600e93ce43ea70a7658b170e1890 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 8 Feb 2019 23:10:38 +0100 Subject: [PATCH 4/5] Fixed a warning --- src/crypto/variant4_random_math.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h index 2d3669d47..50c65f2ed 100644 --- a/src/crypto/variant4_random_math.h +++ b/src/crypto/variant4_random_math.h @@ -170,7 +170,7 @@ static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed { if (*data_index + bytes_needed > data_size) { - hash_extra_blake(data, data_size, data); + hash_extra_blake(data, data_size, (char*) data); *data_index = 0; } } From 4cec4db7f0c20e4db94ebfb043491bee7843f5b6 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 8 Feb 2019 23:36:25 +0100 Subject: [PATCH 5/5] Fixed undefined behavior in ROR/ROL --- src/crypto/variant4_random_math.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h index 50c65f2ed..8724c58c9 100644 --- a/src/crypto/variant4_random_math.h +++ b/src/crypto/variant4_random_math.h @@ -106,13 +106,13 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg case ROR: \ { \ const uint32_t shift = src % REG_BITS; \ - *dst = (*dst >> shift) | (*dst << (REG_BITS - shift)); \ + *dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \ } \ break; \ case ROL: \ { \ const uint32_t shift = src % REG_BITS; \ - *dst = (*dst << shift) | (*dst >> (REG_BITS - shift)); \ + *dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \ } \ break; \ case XOR: \