From 939bc2233281c47427c9987fc5310cfb77b085f9 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Sun, 14 Jan 2018 23:06:55 +0000 Subject: [PATCH] add Straus multiexp --- src/ringct/bulletproofs.cc | 190 ++++++------------------- src/ringct/multiexp.cc | 137 +++++++++++++++++- src/ringct/multiexp.h | 5 +- tests/performance_tests/CMakeLists.txt | 1 + tests/performance_tests/main.cpp | 15 ++ tests/performance_tests/multiexp.h | 81 +++++++++++ 6 files changed, 279 insertions(+), 150 deletions(-) create mode 100644 tests/performance_tests/multiexp.h diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 057f19029..1c29b1b99 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -67,6 +67,14 @@ static const rct::keyV twoN = vector_powers(TWO, maxN); static const rct::key ip12 = inner_product(oneN, twoN); static boost::mutex init_mutex; +static inline rct::key multiexp(const std::vector &data, bool HiGi) +{ + if (HiGi || data.size() < 1000) + return straus(data, HiGi); + else + return bos_coster_heap_conv_robust(data); +} + //addKeys3acc_p3 //aAbB += a*A + b*B where a, b are scalars, A, B are curve points //A and B must be input after applying "precomp" @@ -126,35 +134,15 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b) { CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN"); -#if 1 + std::vector multiexp_data; multiexp_data.reserve(a.size()*2); for (size_t i = 0; i < a.size(); ++i) { - if (!(a[i] == rct::zero())) - { - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = a[i]; - multiexp_data.back().point = Gi_p3[i]; - } - if (!(b[i] == rct::zero())) - { - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = b[i]; - multiexp_data.back().point = Hi_p3[i]; - } + multiexp_data.emplace_back(a[i], Gi_p3[i]); + multiexp_data.emplace_back(b[i], Hi_p3[i]); } - return bos_coster_heap_conv_robust(multiexp_data); -#else - ge_p3 res_p3 = ge_p3_identity; - for (size_t i = 0; i < a.size(); ++i) - { - rct::addKeys3acc_p3(&res_p3, a[i], Gprecomp[i], b[i], Hprecomp[i]); - } - rct::key res; - ge_p3_tobytes(res.bytes, &res_p3); - return res; -#endif + return multiexp(multiexp_data, true); } /* Compute a custom vector-scalar commitment */ @@ -164,63 +152,19 @@ static rct::key vector_exponent_custom(const rct::keyV &A, const rct::keyV &B, c CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); CHECK_AND_ASSERT_THROW_MES(a.size() == A.size(), "Incompatible sizes of a and A"); CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN"); -#if 1 + std::vector multiexp_data; multiexp_data.reserve(a.size()*2); for (size_t i = 0; i < a.size(); ++i) { - if (!(a[i] == rct::zero())) - { - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = a[i]; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, A[i].bytes) == 0, "ge_frombytes_vartime failed"); - } - if (!(b[i] == rct::zero())) - { - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = b[i]; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, B[i].bytes) == 0, "ge_frombytes_vartime failed"); - } - } - return bos_coster_heap_conv_robust(multiexp_data); -#else - ge_p3 res_p3 = ge_p3_identity; - for (size_t i = 0; i < a.size(); ++i) - { -#if 0 - rct::key term; - // we happen to know where A and B might fall, so don't bother checking the rest - ge_dsmp *Acache = NULL, *Bcache = NULL; - ge_dsmp Acache_custom[1], Bcache_custom[1]; - if (Gi[i] == A[i]) - Acache = Gprecomp + i; - else if (i<32 && Gi[i+32] == A[i]) - Acache = Gprecomp + i + 32; - else - { - rct::precomp(Acache_custom[0], A[i]); - Acache = Acache_custom; - } - if (i == 0 && B[i] == Hi[0]) - Bcache = Hprecomp; - else - { - rct::precomp(Bcache_custom[0], B[i]); - Bcache = Bcache_custom; - } - rct::addKeys3(term, a[i], *Acache, b[i], *Bcache); - rct::addKeys(res, res, term); -#else - ge_dsmp Acache, Bcache; - rct::precomp(Bcache, B[i]); - rct::precomp(Acache, A[i]); - addKeys3acc_p3(&res_p3, a[i], Acache, b[i], Bcache); -#endif + multiexp_data.resize(multiexp_data.size() + 1); + multiexp_data.back().scalar = a[i]; + CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, A[i].bytes) == 0, "ge_frombytes_vartime failed"); + multiexp_data.resize(multiexp_data.size() + 1); + multiexp_data.back().scalar = b[i]; + CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, B[i].bytes) == 0, "ge_frombytes_vartime failed"); } - rct::key res; - ge_p3_tobytes(res.bytes, &res_p3); - return res; -#endif + return multiexp(multiexp_data, false); } /* Given a scalar, construct a vector of powers */ @@ -986,26 +930,23 @@ bool bulletproof_VERIFY(const Bulletproof &proof) } PERF_TIMER_STOP(VERIFY_line_61); - // multiexp is slower for small numbers of calcs - if (M >= 16) + // bos coster is slower for small numbers of calcs, straus seems not + if (1) { PERF_TIMER_START_BP(VERIFY_line_61rl_new); sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes); std::vector multiexp_data; - multiexp_data.reserve(3+M); - multiexp_data.push_back({tmp, rct::H}); - for (size_t j = 0; j < M; j++) + multiexp_data.reserve(3+proof.V.size()); + multiexp_data.emplace_back(tmp, rct::H); + for (size_t j = 0; j < proof.V.size(); j++) { - if (!(zpow[j+2] == rct::zero())) - multiexp_data.push_back({zpow[j+2], j < proof.V.size() ? proof.V[j] : rct::identity()}); + multiexp_data.emplace_back(zpow[j+2], proof.V[j]); } - if (!(x == rct::zero())) - multiexp_data.push_back({x, proof.T1}); + multiexp_data.emplace_back(x, proof.T1); rct::key xsq; sc_mul(xsq.bytes, x.bytes, x.bytes); - if (!(xsq == rct::zero())) - multiexp_data.push_back({xsq, proof.T2}); - L61Right = bos_coster_heap_conv_robust(multiexp_data); + multiexp_data.emplace_back(xsq, proof.T2); + L61Right = multiexp(multiexp_data, false); PERF_TIMER_STOP(VERIFY_line_61rl_new); } else @@ -1114,10 +1055,8 @@ bool bulletproof_VERIFY(const Bulletproof &proof) sc_muladd(tmp.bytes, z.bytes, ypow.bytes, tmp.bytes); sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes); - if (!(g_scalar == rct::zero())) - multiexp_data.push_back({g_scalar, Gi_p3[i]}); - if (!(h_scalar == rct::zero())) - multiexp_data.push_back({h_scalar, Hi_p3[i]}); + multiexp_data.emplace_back(g_scalar, Gi_p3[i]); + multiexp_data.emplace_back(h_scalar, Hi_p3[i]); if (i != MN-1) { @@ -1126,63 +1065,28 @@ bool bulletproof_VERIFY(const Bulletproof &proof) } } - rct::key inner_prod = bos_coster_heap_conv_robust(multiexp_data); + rct::key inner_prod = multiexp(multiexp_data, true); PERF_TIMER_STOP(VERIFY_line_24_25); + // PAPER LINE 26 rct::key pprime; - // multiexp does not seem to give any speedup here - if(0) - { - PERF_TIMER_START_BP(VERIFY_line_26_new); - // PAPER LINE 26 - std::vector multiexp_data; - multiexp_data.reserve(1+2*rounds); - - sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes); - rct::addKeys(pprime, P, rct::scalarmultBase(tmp)); - for (size_t i = 0; i < rounds; ++i) - { - sc_mul(tmp.bytes, w[i].bytes, w[i].bytes); - sc_mul(tmp2.bytes, winv[i].bytes, winv[i].bytes); - if (!(tmp == rct::zero())) - multiexp_data.push_back({tmp, proof.L[i]}); - if (!(tmp2 == rct::zero())) - multiexp_data.push_back({tmp2, proof.R[i]}); - } - sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes); - if (!(tmp == rct::zero())) - multiexp_data.push_back({tmp, rct::H}); - addKeys(pprime, pprime, bos_coster_heap_conv_robust(multiexp_data)); - PERF_TIMER_STOP(VERIFY_line_26_new); - } + PERF_TIMER_START_BP(VERIFY_line_26_new); + multiexp_data.clear(); + multiexp_data.reserve(1+2*rounds); + sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes); + rct::addKeys(pprime, P, rct::scalarmultBase(tmp)); + for (size_t i = 0; i < rounds; ++i) { - PERF_TIMER_START_BP(VERIFY_line_26_old); - // PAPER LINE 26 - sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes); - rct::addKeys(pprime, P, rct::scalarmultBase(tmp)); - ge_p3 pprime_p3; - CHECK_AND_ASSERT_MES(ge_frombytes_vartime(&pprime_p3, pprime.bytes) == 0, false, "ge_frombytes_vartime failed"); - - for (size_t i = 0; i < rounds; ++i) - { - sc_mul(tmp.bytes, w[i].bytes, w[i].bytes); - sc_mul(tmp2.bytes, winv[i].bytes, winv[i].bytes); -#if 1 - ge_dsmp cacheL, cacheR; - rct::precomp(cacheL, proof.L[i]); - rct::precomp(cacheR, proof.R[i]); - addKeys3acc_p3(&pprime_p3, tmp, cacheL, tmp2, cacheR); -#else - rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.L[i], tmp)); - rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.R[i], tmp2)); -#endif - } - sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes); - addKeys_acc_p3(&pprime_p3, tmp, rct::H); - ge_p3_tobytes(pprime.bytes, &pprime_p3); - PERF_TIMER_STOP(VERIFY_line_26_old); + sc_mul(tmp.bytes, w[i].bytes, w[i].bytes); + sc_mul(tmp2.bytes, winv[i].bytes, winv[i].bytes); + multiexp_data.emplace_back(tmp, proof.L[i]); + multiexp_data.emplace_back(tmp2, proof.R[i]); } + sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes); + multiexp_data.emplace_back(tmp, rct::H); + addKeys(pprime, pprime, multiexp(multiexp_data, false)); + PERF_TIMER_STOP(VERIFY_line_26_new); PERF_TIMER_START_BP(VERIFY_step2_check); sc_mul(tmp.bytes, proof.a.bytes, proof.b.bytes); diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc index 2545325ae..b70d92d46 100644 --- a/src/ringct/multiexp.cc +++ b/src/ringct/multiexp.cc @@ -38,7 +38,7 @@ extern "C" #include "multiexp.h" #undef MONERO_DEFAULT_LOG_CATEGORY -#define MONERO_DEFAULT_LOG_CATEGORY "multiexp.boscoster" +#define MONERO_DEFAULT_LOG_CATEGORY "multiexp" //#define MULTIEXP_PERF(x) x #define MULTIEXP_PERF(x) @@ -71,7 +71,15 @@ static inline rct::key div2(const rct::key &k) return res; } -rct::key bos_coster_heap_conv(std::vector &data) +static inline rct::key pow2(size_t n) +{ + CHECK_AND_ASSERT_THROW_MES(n < 256, "Invalid pow2 argument"); + rct::key res = rct::zero(); + res[n >> 3] |= 1<<(n&7); + return res; +} + +rct::key bos_coster_heap_conv(std::vector data) { MULTIEXP_PERF(PERF_TIMER_START_UNIT(bos_coster, 1000000)); MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000)); @@ -142,15 +150,20 @@ rct::key bos_coster_heap_conv(std::vector &data) return res; } -rct::key bos_coster_heap_conv_robust(std::vector &data) +rct::key bos_coster_heap_conv_robust(std::vector data) { MULTIEXP_PERF(PERF_TIMER_START_UNIT(bos_coster, 1000000)); MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000)); size_t points = data.size(); CHECK_AND_ASSERT_THROW_MES(points > 1, "Not enough points"); - std::vector heap(points); + std::vector heap; + heap.reserve(points); for (size_t n = 0; n < points; ++n) - heap[n] = n; + { + if (!(data[n].scalar == rct::zero()) && memcmp(&data[n].point, &ge_p3_identity, sizeof(ge_p3))) + heap.push_back(n); + } + points = heap.size(); auto Comp = [&](size_t e0, size_t e1) { return data[e0].scalar < data[e1].scalar; }; std::make_heap(heap.begin(), heap.end(), Comp); @@ -236,4 +249,118 @@ rct::key bos_coster_heap_conv_robust(std::vector &data) return res; } +rct::key straus(const std::vector &data, bool HiGi) +{ + MULTIEXP_PERF(PERF_TIMER_UNIT(straus, 1000000)); + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000)); + static constexpr unsigned int c = 4; + static constexpr unsigned int mask = (1<> HiGi_multiples; + std::vector> local_multiples, &multiples = HiGi ? HiGi_multiples : local_multiples; + ge_cached cached; + ge_p1p1 p1; + ge_p3 p3; + + std::vector skip(data.size()); + for (size_t i = 0; i < data.size(); ++i) + skip[i] = data[i].scalar == rct::zero() || !memcmp(&data[i].point, &ge_p3_identity, sizeof(ge_p3)); + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(multiples, 1000000)); + multiples.resize(1<> digits; + digits.resize(data.size()); + for (size_t j = 0; j < data.size(); ++j) + { + digits[j].resize(256); + unsigned char bytes33[33]; + memcpy(bytes33, data[j].scalar.bytes, 32); + bytes33[32] = 0; +#if 1 + static_assert(c == 4, "optimized version needs c == 4"); + const unsigned char *bytes = bytes33; + unsigned int i; + for (i = 0; i < 256; i += 8, bytes++) + { + digits[j][i] = bytes[0] & 0xf; + digits[j][i+1] = (bytes[0] >> 1) & 0xf; + digits[j][i+2] = (bytes[0] >> 2) & 0xf; + digits[j][i+3] = (bytes[0] >> 3) & 0xf; + digits[j][i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf; + digits[j][i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf; + digits[j][i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf; + digits[j][i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf; + } +#elif 1 + for (size_t i = 0; i < 256; ++i) + digits[j][i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask; +#else + rct::key shifted = data[j].scalar; + for (size_t i = 0; i < 256; ++i) + { + digits[j][i] = shifted.bytes[0] & 0xf; + shifted = div2(shifted, (256-i)>>3); + } +#endif + } + MULTIEXP_PERF(PERF_TIMER_STOP(digits)); + + rct::key maxscalar = rct::zero(); + for (size_t i = 0; i < data.size(); ++i) + if (maxscalar < data[i].scalar) + maxscalar = data[i].scalar; + size_t i = 0; + while (i < 256 && !(maxscalar < pow2(i))) + i += c; + MULTIEXP_PERF(PERF_TIMER_STOP(setup)); + + ge_p3 res_p3 = ge_p3_identity; + if (!(i < c)) + goto skipfirst; + while (!(i < c)) + { + for (size_t j = 0; j < c; ++j) + { + ge_p3_to_cached(&cached, &res_p3); + ge_add(&p1, &res_p3, &cached); + ge_p1p1_to_p3(&res_p3, &p1); + } +skipfirst: + i -= c; + for (size_t j = 0; j < data.size(); ++j) + { + if (skip[j]) + continue; + int digit = digits[j][i]; + if (digit) + { + ge_add(&p1, &res_p3, &multiples[digit][j]); + ge_p1p1_to_p3(&res_p3, &p1); + } + } + } + + rct::key res; + ge_p3_tobytes(res.bytes, &res_p3); + return res; +} + } diff --git a/src/ringct/multiexp.h b/src/ringct/multiexp.h index 108db7c39..cc53e633e 100644 --- a/src/ringct/multiexp.h +++ b/src/ringct/multiexp.h @@ -52,8 +52,9 @@ struct MultiexpData { } }; -rct::key bos_coster_heap_conv(std::vector &data); -rct::key bos_coster_heap_conv_robust(std::vector &data); +rct::key bos_coster_heap_conv(std::vector data); +rct::key bos_coster_heap_conv_robust(std::vector data); +rct::key straus(const std::vector &data, bool HiGi = false); } diff --git a/tests/performance_tests/CMakeLists.txt b/tests/performance_tests/CMakeLists.txt index 47f441dda..3ffd84aa6 100644 --- a/tests/performance_tests/CMakeLists.txt +++ b/tests/performance_tests/CMakeLists.txt @@ -45,6 +45,7 @@ set(performance_tests_headers range_proof.h bulletproof.h crypto_ops.h + multiexp.h multi_tx_test_base.h performance_tests.h performance_utils.h diff --git a/tests/performance_tests/main.cpp b/tests/performance_tests/main.cpp index 5b7a30f96..739c6cc87 100644 --- a/tests/performance_tests/main.cpp +++ b/tests/performance_tests/main.cpp @@ -56,6 +56,7 @@ #include "rct_mlsag.h" #include "bulletproof.h" #include "crypto_ops.h" +#include "multiexp.h" namespace po = boost::program_options; @@ -196,6 +197,20 @@ int main(int argc, char** argv) TEST_PERFORMANCE1(filter, test_crypto_ops, op_addKeys3); TEST_PERFORMANCE1(filter, test_crypto_ops, op_addKeys3_2); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_bos_coster, 2); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_bos_coster, 8); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_bos_coster, 16); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_bos_coster, 256); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_bos_coster, 1024); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_bos_coster, 4096); + + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_straus, 2); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_straus, 8); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_straus, 16); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_straus, 256); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_straus, 1024); + TEST_PERFORMANCE2(filter, verbose, test_multiexp, multiexp_straus, 4096); + std::cout << "Tests finished. Elapsed time: " << timer.elapsed_ms() / 1000 << " sec" << std::endl; return 0; diff --git a/tests/performance_tests/multiexp.h b/tests/performance_tests/multiexp.h new file mode 100644 index 000000000..ac5f60fdf --- /dev/null +++ b/tests/performance_tests/multiexp.h @@ -0,0 +1,81 @@ +// Copyright (c) 2018, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Parts of this file are originally copyright (c) 2012-2013 The Cryptonote developers + +#pragma once + +#include +#include "ringct/rctOps.h" +#include "ringct/multiexp.h" + +enum test_multiexp_algorithm +{ + multiexp_bos_coster, + multiexp_straus, +}; + +template +class test_multiexp +{ +public: + static const size_t loop_count = npoints >= 1024 ? 10 : npoints < 256 ? 1000 : 100; + + bool init() + { + data.resize(npoints); + res = rct::identity(); + for (size_t n = 0; n < npoints; ++n) + { + data[n].scalar = rct::skGen(); + rct::key point = rct::scalarmultBase(rct::skGen()); + if (ge_frombytes_vartime(&data[n].point, point.bytes)) + return false; + rct::key kn = rct::scalarmultKey(point, data[n].scalar); + res = rct::addKeys(res, kn); + } + return true; + } + + bool test() + { + switch (algorithm) + { + case multiexp_bos_coster: + return res == bos_coster_heap_conv_robust(data); + case multiexp_straus: + return res == straus(data, false); + default: + return false; + } + } + +private: + std::vector data; + rct::key res; +};