diff --git a/src/mnemonics/electrum-words.cpp b/src/mnemonics/electrum-words.cpp index 6d75b8950..48c9ab1ba 100644 --- a/src/mnemonics/electrum-words.cpp +++ b/src/mnemonics/electrum-words.cpp @@ -76,8 +76,8 @@ namespace crypto namespace { uint32_t create_checksum_index(const std::vector &word_list, - uint32_t unique_prefix_length); - bool checksum_test(std::vector seed, uint32_t unique_prefix_length); + const Language::Base *language); + bool checksum_test(std::vector seed, const Language::Base *language); /*! * \brief Finds the word list that contains the seed words and puts the indices @@ -116,8 +116,8 @@ namespace for (std::vector::iterator it1 = language_instances.begin(); it1 != language_instances.end(); it1++) { - const std::unordered_map &word_map = (*it1)->get_word_map(); - const std::unordered_map &trimmed_word_map = (*it1)->get_trimmed_word_map(); + const std::unordered_map &word_map = (*it1)->get_word_map(); + const std::unordered_map &trimmed_word_map = (*it1)->get_trimmed_word_map(); // To iterate through seed words bool full_match = true; @@ -151,7 +151,7 @@ namespace // if we were using prefix only, and we have a checksum, check it now // to avoid false positives due to prefix set being too common if (has_checksum) - if (!checksum_test(seed, (*it1)->get_unique_prefix_length())) + if (!checksum_test(seed, *it1)) { fallback = *it1; full_match = false; @@ -190,20 +190,20 @@ namespace * \return Checksum index */ uint32_t create_checksum_index(const std::vector &word_list, - uint32_t unique_prefix_length) + const Language::Base *language) { - epee::wipeable_string trimmed_words = ""; + epee::wipeable_string trimmed_words = "", word; + const auto &word_map = language->get_word_map(); + const auto &trimmed_word_map = language->get_trimmed_word_map(); + const uint32_t unique_prefix_length = language->get_unique_prefix_length(); for (std::vector::const_iterator it = word_list.begin(); it != word_list.end(); it++) { - if (it->length() > unique_prefix_length) - { - trimmed_words += Language::utf8prefix(*it, unique_prefix_length); - } - else - { - trimmed_words += *it; - } + word = Language::utf8prefix(*it, unique_prefix_length); + auto it2 = trimmed_word_map.find(word); + if (it2 == trimmed_word_map.end()) + throw std::runtime_error("Word \"" + std::string(word.data(), word.size()) + "\" not found in trimmed word map in " + language->get_english_language_name()); + trimmed_words += it2->first; } boost::crc_32_type result; result.process_bytes(trimmed_words.data(), trimmed_words.length()); @@ -216,7 +216,7 @@ namespace * \param unique_prefix_length the prefix length of each word to use for checksum * \return True if the test passed false if not. */ - bool checksum_test(std::vector seed, uint32_t unique_prefix_length) + bool checksum_test(std::vector seed, const Language::Base *language) { if (seed.empty()) return false; @@ -224,13 +224,16 @@ namespace epee::wipeable_string last_word = seed.back(); seed.pop_back(); - epee::wipeable_string checksum = seed[create_checksum_index(seed, unique_prefix_length)]; + const uint32_t unique_prefix_length = language->get_unique_prefix_length(); + + auto idx = create_checksum_index(seed, language); + epee::wipeable_string checksum = seed[idx]; epee::wipeable_string trimmed_checksum = checksum.length() > unique_prefix_length ? Language::utf8prefix(checksum, unique_prefix_length) : checksum; epee::wipeable_string trimmed_last_word = last_word.length() > unique_prefix_length ? Language::utf8prefix(last_word, unique_prefix_length) : last_word; - bool ret = trimmed_checksum == trimmed_last_word; + bool ret = Language::WordEqual()(trimmed_checksum, trimmed_last_word); MINFO("Checksum is " << (ret ? "valid" : "invalid")); return ret; } @@ -301,7 +304,7 @@ namespace crypto if (has_checksum) { - if (!checksum_test(seed, language->get_unique_prefix_length())) + if (!checksum_test(seed, language)) { // Checksum fail MERROR("Invalid seed: invalid checksum"); @@ -424,7 +427,7 @@ namespace crypto memwipe(w, sizeof(w)); } - words += words_store[create_checksum_index(words_store, language->get_unique_prefix_length())]; + words += words_store[create_checksum_index(words_store, language)]; return true; } diff --git a/src/mnemonics/language_base.h b/src/mnemonics/language_base.h index 89a4d2e7b..653314b04 100644 --- a/src/mnemonics/language_base.h +++ b/src/mnemonics/language_base.h @@ -38,7 +38,9 @@ #include #include #include +#include #include "misc_log_ex.h" +#include "fnv1.h" /*! * \namespace Language @@ -71,6 +73,92 @@ namespace Language return prefix; } + template + inline T utf8canonical(const T &s) + { + T sc = ""; + size_t avail = s.size(); + const char *ptr = s.data(); + wint_t cp = 0; + int bytes = 1; + char wbuf[8], *wptr; + while (avail--) + { + if ((*ptr & 0x80) == 0) + { + cp = *ptr++; + bytes = 1; + } + else if ((*ptr & 0xe0) == 0xc0) + { + if (avail < 1) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0x1f) << 6; + cp |= *ptr++ & 0x3f; + --avail; + bytes = 2; + } + else if ((*ptr & 0xf0) == 0xe0) + { + if (avail < 2) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0xf) << 12; + cp |= (*ptr++ & 0x3f) << 6; + cp |= *ptr++ & 0x3f; + avail -= 2; + bytes = 3; + } + else if ((*ptr & 0xf8) == 0xf0) + { + if (avail < 3) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0x7) << 18; + cp |= (*ptr++ & 0x3f) << 12; + cp |= (*ptr++ & 0x3f) << 6; + cp |= *ptr++ & 0x3f; + avail -= 3; + bytes = 4; + } + else + throw std::runtime_error("Invalid UTF-8"); + + cp = std::towlower(cp); + wptr = wbuf; + switch (bytes) + { + case 1: *wptr++ = cp; break; + case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break; + case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; + case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr += 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; + default: throw std::runtime_error("Invalid UTF-8"); + } + *wptr = 0; + sc += T(wbuf, bytes); + cp = 0; + bytes = 1; + } + return sc; + } + + struct WordHash + { + std::size_t operator()(const epee::wipeable_string &s) const + { + const epee::wipeable_string sc = utf8canonical(s); + return epee::fnv::FNV1a(sc.data(), sc.size()); + } + }; + + struct WordEqual + { + bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const + { + const epee::wipeable_string s0c = utf8canonical(s0); + const epee::wipeable_string s1c = utf8canonical(s1); + return s0c == s1c; + } + }; + /*! * \class Base * \brief A base language class which all languages have to inherit from for @@ -87,8 +175,8 @@ namespace Language NWORDS = 1626 }; std::vector word_list; /*!< A pointer to the array of words */ - std::unordered_map word_map; /*!< hash table to find word's index */ - std::unordered_map trimmed_word_map; /*!< hash table to find word's trimmed index */ + std::unordered_map word_map; /*!< hash table to find word's index */ + std::unordered_map trimmed_word_map; /*!< hash table to find word's trimmed index */ std::string language_name; /*!< Name of language */ std::string english_language_name; /*!< Name of language */ uint32_t unique_prefix_length; /*!< Number of unique starting characters to trim the wordlist to when matching */ @@ -159,7 +247,7 @@ namespace Language * \brief Returns a pointer to the word map. * \return A pointer to the word map. */ - const std::unordered_map& get_word_map() const + const std::unordered_map& get_word_map() const { return word_map; } @@ -167,7 +255,7 @@ namespace Language * \brief Returns a pointer to the trimmed word map. * \return A pointer to the trimmed word map. */ - const std::unordered_map& get_trimmed_word_map() const + const std::unordered_map& get_trimmed_word_map() const { return trimmed_word_map; }