diff --git a/src/blockchain_db/blockchain_db.h b/src/blockchain_db/blockchain_db.h index 7870a2efd..f13aa0cae 100644 --- a/src/blockchain_db/blockchain_db.h +++ b/src/blockchain_db/blockchain_db.h @@ -1404,6 +1404,13 @@ public: */ virtual cryptonote::blobdata get_txpool_tx_blob(const crypto::hash& txid) const = 0; + /** + * @brief prune output data for the given amount + * + * @param amount the amount for which to prune data + */ + virtual void prune_outputs(uint64_t amount) = 0; + /** * @brief runs a function over all txpool transactions * diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp index 5af5d1903..4bfc80863 100644 --- a/src/blockchain_db/lmdb/db_lmdb.cpp +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -1075,6 +1075,60 @@ void BlockchainLMDB::remove_output(const uint64_t amount, const uint64_t& out_in throw0(DB_ERROR(lmdb_error(std::string("Error deleting amount for output index ").append(boost::lexical_cast(out_index).append(": ")).c_str(), result).c_str())); } +void BlockchainLMDB::prune_outputs(uint64_t amount) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + mdb_txn_cursors *m_cursors = &m_wcursors; + CURSOR(output_amounts); + CURSOR(output_txs); + + MINFO("Pruning outputs for amount " << amount); + + MDB_val v; + MDB_val_set(k, amount); + int result = mdb_cursor_get(m_cur_output_amounts, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + return; + if (result) + throw0(DB_ERROR(lmdb_error("Error looking up outputs: ", result).c_str())); + + // gather output ids + mdb_size_t num_elems; + mdb_cursor_count(m_cur_output_amounts, &num_elems); + MINFO(num_elems << " outputs found"); + std::vector output_ids; + output_ids.reserve(num_elems); + while (1) + { + const pre_rct_outkey *okp = (const pre_rct_outkey *)v.mv_data; + output_ids.push_back(okp->output_id); + MDEBUG("output id " << okp->output_id); + result = mdb_cursor_get(m_cur_output_amounts, &k, &v, MDB_NEXT_DUP); + if (result == MDB_NOTFOUND) + break; + if (result) + throw0(DB_ERROR(lmdb_error("Error counting outputs: ", result).c_str())); + } + if (output_ids.size() != num_elems) + throw0(DB_ERROR("Unexpected number of outputs")); + + result = mdb_cursor_del(m_cur_output_amounts, MDB_NODUPDATA); + if (result) + throw0(DB_ERROR(lmdb_error("Error deleting outputs: ", result).c_str())); + + for (uint64_t output_id: output_ids) + { + MDB_val_set(v, output_id); + result = mdb_cursor_get(m_cur_output_txs, (MDB_val *)&zerokval, &v, MDB_GET_BOTH); + if (result) + throw0(DB_ERROR(lmdb_error("Error looking up output: ", result).c_str())); + result = mdb_cursor_del(m_cur_output_txs, 0); + if (result) + throw0(DB_ERROR(lmdb_error("Error deleting output: ", result).c_str())); + } +} + void BlockchainLMDB::add_spent_key(const crypto::key_image& k_image) { LOG_PRINT_L3("BlockchainLMDB::" << __func__); @@ -2229,11 +2283,19 @@ uint64_t BlockchainLMDB::num_outputs() const TXN_PREFIX_RDONLY(); int result; - // get current height - MDB_stat db_stats; - if ((result = mdb_stat(m_txn, m_output_txs, &db_stats))) + RCURSOR(output_txs) + + uint64_t num = 0; + MDB_val k, v; + result = mdb_cursor_get(m_cur_output_txs, &k, &v, MDB_LAST); + if (result == MDB_NOTFOUND) + num = 0; + else if (result == 0) + num = 1 + ((const outtx*)v.mv_data)->output_id; + else throw0(DB_ERROR(lmdb_error("Failed to query m_output_txs: ", result).c_str())); - return db_stats.ms_entries; + + return num; } bool BlockchainLMDB::tx_exists(const crypto::hash& h) const diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h index 26159ab4d..6db241240 100644 --- a/src/blockchain_db/lmdb/db_lmdb.h +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -345,6 +345,8 @@ private: void remove_output(const uint64_t amount, const uint64_t& out_index); + virtual void prune_outputs(uint64_t amount); + virtual void add_spent_key(const crypto::key_image& k_image); virtual void remove_spent_key(const crypto::key_image& k_image); diff --git a/src/blockchain_utilities/CMakeLists.txt b/src/blockchain_utilities/CMakeLists.txt index c9ad1cebe..ddf575c29 100644 --- a/src/blockchain_utilities/CMakeLists.txt +++ b/src/blockchain_utilities/CMakeLists.txt @@ -81,6 +81,17 @@ monero_private_headers(blockchain_usage +set(blockchain_prune_known_spent_data_sources + blockchain_prune_known_spent_data.cpp + ) + +set(blockchain_prune_known_spent_data_private_headers) + +monero_private_headers(blockchain_prune_known_spent_data + ${blockchain_prune_known_spent_data_private_headers}) + + + set(blockchain_ancestry_sources blockchain_ancestry.cpp ) @@ -265,3 +276,25 @@ set_property(TARGET blockchain_stats PROPERTY OUTPUT_NAME "monero-blockchain-stats") install(TARGETS blockchain_stats DESTINATION bin) + +monero_add_executable(blockchain_prune_known_spent_data + ${blockchain_prune_known_spent_data_sources} + ${blockchain_prune_known_spent_data_private_headers}) + +target_link_libraries(blockchain_prune_known_spent_data + PRIVATE + cryptonote_core + blockchain_db + p2p + version + epee + ${Boost_FILESYSTEM_LIBRARY} + ${Boost_SYSTEM_LIBRARY} + ${Boost_THREAD_LIBRARY} + ${CMAKE_THREAD_LIBS_INIT} + ${EXTRA_LIBRARIES}) + +set_property(TARGET blockchain_prune_known_spent_data + PROPERTY + OUTPUT_NAME "monero-blockchain-prune-known-spent-data") +install(TARGETS blockchain_prune_known_spent_data DESTINATION bin) diff --git a/src/blockchain_utilities/blockchain_prune_known_spent_data.cpp b/src/blockchain_utilities/blockchain_prune_known_spent_data.cpp new file mode 100644 index 000000000..f6136c1ba --- /dev/null +++ b/src/blockchain_utilities/blockchain_prune_known_spent_data.cpp @@ -0,0 +1,305 @@ +// Copyright (c) 2014-2018, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include "common/command_line.h" +#include "serialization/crypto.h" +#include "cryptonote_core/tx_pool.h" +#include "cryptonote_core/cryptonote_core.h" +#include "cryptonote_core/blockchain.h" +#include "blockchain_db/blockchain_db.h" +#include "blockchain_db/db_types.h" +#include "version.h" + +#undef MONERO_DEFAULT_LOG_CATEGORY +#define MONERO_DEFAULT_LOG_CATEGORY "bcutil" + +namespace po = boost::program_options; +using namespace epee; +using namespace cryptonote; + +static std::map load_outputs(const std::string &filename) +{ + std::map outputs; + uint64_t amount = std::numeric_limits::max(); + FILE *f; + + f = fopen(filename.c_str(), "r"); + if (!f) + { + MERROR("Failed to load outputs from " << filename << ": " << strerror(errno)); + return {}; + } + while (1) + { + char s[256]; + if (!fgets(s, sizeof(s), f)) + break; + if (feof(f)) + break; + const size_t len = strlen(s); + if (len > 0 && s[len - 1] == '\n') + s[len - 1] = 0; + if (!s[0]) + continue; + std::pair output; + uint64_t offset, num_offsets; + if (sscanf(s, "@%" PRIu64, &amount) == 1) + { + continue; + } + if (amount == std::numeric_limits::max()) + { + MERROR("Bad format in " << filename); + continue; + } + if (sscanf(s, "%" PRIu64 "*%" PRIu64, &offset, &num_offsets) == 2 && num_offsets < std::numeric_limits::max() - offset) + { + outputs[amount] += num_offsets; + } + else if (sscanf(s, "%" PRIu64, &offset) == 1) + { + outputs[amount] += 1; + } + else + { + MERROR("Bad format in " << filename); + continue; + } + } + fclose(f); + return outputs; +} + +int main(int argc, char* argv[]) +{ + TRY_ENTRY(); + + epee::string_tools::set_module_name_and_folder(argv[0]); + + std::string default_db_type = "lmdb"; + + std::string available_dbs = cryptonote::blockchain_db_types(", "); + available_dbs = "available: " + available_dbs; + + uint32_t log_level = 0; + + tools::on_startup(); + + po::options_description desc_cmd_only("Command line options"); + po::options_description desc_cmd_sett("Command line options and settings options"); + const command_line::arg_descriptor arg_log_level = {"log-level", "0-4 or categories", ""}; + const command_line::arg_descriptor arg_database = { + "database", available_dbs.c_str(), default_db_type + }; + const command_line::arg_descriptor arg_verbose = {"verbose", "Verbose output", false}; + const command_line::arg_descriptor arg_dry_run = {"dry-run", "Do not actually prune", false}; + const command_line::arg_descriptor arg_input = {"input", "Path to the known spent outputs file"}; + + command_line::add_arg(desc_cmd_sett, cryptonote::arg_data_dir); + command_line::add_arg(desc_cmd_sett, cryptonote::arg_testnet_on); + command_line::add_arg(desc_cmd_sett, cryptonote::arg_stagenet_on); + command_line::add_arg(desc_cmd_sett, arg_log_level); + command_line::add_arg(desc_cmd_sett, arg_database); + command_line::add_arg(desc_cmd_sett, arg_verbose); + command_line::add_arg(desc_cmd_sett, arg_dry_run); + command_line::add_arg(desc_cmd_sett, arg_input); + command_line::add_arg(desc_cmd_only, command_line::arg_help); + + po::options_description desc_options("Allowed options"); + desc_options.add(desc_cmd_only).add(desc_cmd_sett); + + po::variables_map vm; + bool r = command_line::handle_error_helper(desc_options, [&]() + { + auto parser = po::command_line_parser(argc, argv).options(desc_options); + po::store(parser.run(), vm); + po::notify(vm); + return true; + }); + if (! r) + return 1; + + if (command_line::get_arg(vm, command_line::arg_help)) + { + std::cout << "Monero '" << MONERO_RELEASE_NAME << "' (v" << MONERO_VERSION_FULL << ")" << ENDL << ENDL; + std::cout << desc_options << std::endl; + return 1; + } + + mlog_configure(mlog_get_default_log_path("monero-blockchain-prune-known-spent-data.log"), true); + if (!command_line::is_arg_defaulted(vm, arg_log_level)) + mlog_set_log(command_line::get_arg(vm, arg_log_level).c_str()); + else + mlog_set_log(std::string(std::to_string(log_level) + ",bcutil:INFO").c_str()); + + LOG_PRINT_L0("Starting..."); + + std::string opt_data_dir = command_line::get_arg(vm, cryptonote::arg_data_dir); + bool opt_testnet = command_line::get_arg(vm, cryptonote::arg_testnet_on); + bool opt_stagenet = command_line::get_arg(vm, cryptonote::arg_stagenet_on); + network_type net_type = opt_testnet ? TESTNET : opt_stagenet ? STAGENET : MAINNET; + bool opt_verbose = command_line::get_arg(vm, arg_verbose); + bool opt_dry_run = command_line::get_arg(vm, arg_dry_run); + + std::string db_type = command_line::get_arg(vm, arg_database); + if (!cryptonote::blockchain_valid_db_type(db_type)) + { + std::cerr << "Invalid database type: " << db_type << std::endl; + return 1; + } + + const std::string input = command_line::get_arg(vm, arg_input); + + LOG_PRINT_L0("Initializing source blockchain (BlockchainDB)"); + std::unique_ptr core_storage; + tx_memory_pool m_mempool(*core_storage); + core_storage.reset(new Blockchain(m_mempool)); + BlockchainDB *db = new_db(db_type); + if (db == NULL) + { + LOG_ERROR("Attempted to use non-existent database type: " << db_type); + throw std::runtime_error("Attempting to use non-existent database type"); + } + LOG_PRINT_L0("database: " << db_type); + + const std::string filename = (boost::filesystem::path(opt_data_dir) / db->get_db_name()).string(); + LOG_PRINT_L0("Loading blockchain from folder " << filename << " ..."); + + try + { + db->open(filename, 0); + } + catch (const std::exception& e) + { + LOG_PRINT_L0("Error opening database: " << e.what()); + return 1; + } + r = core_storage->init(db, net_type); + + CHECK_AND_ASSERT_MES(r, 1, "Failed to initialize source blockchain storage"); + LOG_PRINT_L0("Source blockchain storage initialized OK"); + + std::map known_spent_outputs; + if (input.empty()) + { + std::map> outputs; + + LOG_PRINT_L0("Scanning for known spent data..."); + db->for_all_transactions([&](const crypto::hash &txid, const cryptonote::transaction &tx){ + const bool miner_tx = tx.vin.size() == 1 && tx.vin[0].type() == typeid(txin_gen); + for (const auto &in: tx.vin) + { + if (in.type() != typeid(txin_to_key)) + continue; + const auto &txin = boost::get(in); + if (txin.amount == 0) + continue; + + outputs[txin.amount].second++; + } + + for (const auto &out: tx.vout) + { + uint64_t amount = out.amount; + if (miner_tx && tx.version >= 2) + amount = 0; + if (amount == 0) + continue; + if (out.target.type() != typeid(txout_to_key)) + continue; + + outputs[amount].first++; + } + return true; + }, true); + + for (const auto &i: outputs) + { + known_spent_outputs[i.first] = i.second.second; + } + } + else + { + LOG_PRINT_L0("Loading known spent data..."); + known_spent_outputs = load_outputs(input); + } + + LOG_PRINT_L0("Pruning known spent data..."); + + bool stop_requested = false; + tools::signal_handler::install([&stop_requested](int type) { + stop_requested = true; + }); + + db->batch_start(); + + size_t num_total_outputs = 0, num_prunable_outputs = 0, num_known_spent_outputs = 0, num_eligible_outputs = 0, num_eligible_known_spent_outputs = 0; + for (auto i = known_spent_outputs.begin(); i != known_spent_outputs.end(); ++i) + { + uint64_t num_outputs = db->get_num_outputs(i->first); + num_total_outputs += num_outputs; + num_known_spent_outputs += i->second; + if (i->first == 0 || is_valid_decomposed_amount(i->first)) + { + if (opt_verbose) + MINFO("Ignoring output value " << i->first << ", with " << num_outputs << " outputs"); + continue; + } + num_eligible_outputs += num_outputs; + num_eligible_known_spent_outputs += i->second; + if (opt_verbose) + MINFO(i->first << ": " << i->second << "/" << num_outputs); + if (num_outputs > i->second) + continue; + if (num_outputs && num_outputs < i->second) + { + MERROR("More outputs are spent than known for amount " << i->first << ", not touching"); + continue; + } + if (opt_verbose) + MINFO("Pruning data for " << num_outputs << " outputs"); + if (!opt_dry_run) + db->prune_outputs(i->first); + num_prunable_outputs += i->second; + } + + db->batch_stop(); + + MINFO("Total outputs: " << num_total_outputs); + MINFO("Known spent outputs: " << num_known_spent_outputs); + MINFO("Eligible outputs: " << num_eligible_outputs); + MINFO("Eligible known spent outputs: " << num_eligible_known_spent_outputs); + MINFO("Prunable outputs: " << num_prunable_outputs); + + LOG_PRINT_L0("Blockchain known spent data pruned OK"); + core_storage->deinit(); + return 0; + + CATCH_ENTRY("Error", 1); +} diff --git a/tests/unit_tests/testdb.h b/tests/unit_tests/testdb.h index a9c772920..5d9ba5833 100644 --- a/tests/unit_tests/testdb.h +++ b/tests/unit_tests/testdb.h @@ -142,5 +142,6 @@ public: virtual bool prune_blockchain(uint32_t pruning_seed = 0) { return true; } virtual bool update_pruning() { return true; } virtual bool check_pruning() { return true; } + virtual void prune_outputs(uint64_t amount) {} };