From bd1f6029a367e411b66d4ff49f2537947e9d300f Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 12 Dec 2017 13:44:11 +0000 Subject: [PATCH] http_client: rewrite header parsing manually for speed boost::regex is stupendously atrocious at parsing malformed data --- contrib/epee/include/net/http_client.h | 169 ++++++++++++++----------- 1 file changed, 95 insertions(+), 74 deletions(-) diff --git a/contrib/epee/include/net/http_client.h b/contrib/epee/include/net/http_client.h index 5c448fcb3..1edf65928 100644 --- a/contrib/epee/include/net/http_client.h +++ b/contrib/epee/include/net/http_client.h @@ -27,6 +27,7 @@ #pragma once +#include #include #include #include @@ -752,87 +753,107 @@ namespace net_utils return true; } //--------------------------------------------------------------------------- - inline - bool parse_header(http_header_info& body_info, const std::string& m_cache_to_process) - { + inline bool parse_header(http_header_info& body_info, const std::string& m_cache_to_process) + { MTRACE("http_stream_filter::parse_cached_header(*)"); - - STATIC_REGEXP_EXPR_1(rexp_mach_field, - "\n?((Connection)|(Referer)|(Content-Length)|(Content-Type)|(Transfer-Encoding)|(Content-Encoding)|(Host)|(Cookie)|(User-Agent)|(Origin)" - // 12 3 4 5 6 7 8 9 10 11 - "|([\\w-]+?)) ?: ?((.*?)(\r?\n))[^\t ]", - //12 13 14 15 - boost::regex::icase | boost::regex::normal); - - boost::smatch result; - std::string::const_iterator it_current_bound = m_cache_to_process.begin(); - std::string::const_iterator it_end_bound = m_cache_to_process.end(); - - - //lookup all fields and fill well-known fields - while( boost::regex_search( it_current_bound, it_end_bound, result, rexp_mach_field, boost::match_default) && result[0].matched) + const char *ptr = m_cache_to_process.c_str(); + while (ptr[0] != '\r' || ptr[1] != '\n') { - const size_t field_val = 14; - //const size_t field_etc_name = 11; - - int i = 2; //start position = 2 - if(result[i++].matched)//"Connection" - body_info.m_connection = result[field_val]; - else if(result[i++].matched)//"Referrer" - body_info.m_referer = result[field_val]; - else if(result[i++].matched)//"Content-Length" - body_info.m_content_length = result[field_val]; - else if(result[i++].matched)//"Content-Type" - body_info.m_content_type = result[field_val]; - else if(result[i++].matched)//"Transfer-Encoding" - body_info.m_transfer_encoding = result[field_val]; - else if(result[i++].matched)//"Content-Encoding" - body_info.m_content_encoding = result[field_val]; - else if(result[i++].matched)//"Host" - { body_info.m_host = result[field_val]; - string_tools::trim(body_info.m_host); + // optional \n + if (*ptr == '\n') + ++ptr; + // an identifier composed of letters or - + const char *key_pos = ptr; + while (isalnum(*ptr) || *ptr == '_' || *ptr == '-') + ++ptr; + const char *key_end = ptr; + // optional space (not in RFC, but in previous code) + if (*ptr == ' ') + ++ptr; + CHECK_AND_ASSERT_MES(*ptr == ':', true, "http_stream_filter::parse_cached_header() invalid header in: " << m_cache_to_process); + ++ptr; + // optional whitespace, but not newlines - line folding is obsolete, let's ignore it + while (isblank(*ptr)) + ++ptr; + const char *value_pos = ptr; + while (*ptr != '\r' && *ptr != '\n') + ++ptr; + const char *value_end = ptr; + // optional trailing whitespace + while (value_end > value_pos && isblank(*(value_end-1))) + --value_end; + if (*ptr == '\r') + ++ptr; + CHECK_AND_ASSERT_MES(*ptr == '\n', true, "http_stream_filter::parse_cached_header() invalid header in: " << m_cache_to_process); + ++ptr; + + const std::string key = std::string(key_pos, key_end - key_pos); + const std::string value = std::string(value_pos, value_end - value_pos); + if (!key.empty()) + { + if (!string_tools::compare_no_case(key, "Connection")) + body_info.m_connection = value; + else if(!string_tools::compare_no_case(key, "Referrer")) + body_info.m_referer = value; + else if(!string_tools::compare_no_case(key, "Content-Length")) + body_info.m_content_length = value; + else if(!string_tools::compare_no_case(key, "Content-Type")) + body_info.m_content_type = value; + else if(!string_tools::compare_no_case(key, "Transfer-Encoding")) + body_info.m_transfer_encoding = value; + else if(!string_tools::compare_no_case(key, "Content-Encoding")) + body_info.m_content_encoding = value; + else if(!string_tools::compare_no_case(key, "Host")) + body_info.m_host = value; + else if(!string_tools::compare_no_case(key, "Cookie")) + body_info.m_cookie = value; + else if(!string_tools::compare_no_case(key, "User-Agent")) + body_info.m_user_agent = value; + else if(!string_tools::compare_no_case(key, "Origin")) + body_info.m_origin = value; + else + body_info.m_etc_fields.emplace_back(key, value); } - else if(result[i++].matched)//"Cookie" - body_info.m_cookie = result[field_val]; - else if(result[i++].matched)//"User-Agent" - body_info.m_user_agent = result[field_val]; - else if(result[i++].matched)//"Origin" - body_info.m_origin = result[field_val]; - else if(result[i++].matched)//e.t.c (HAVE TO BE MATCHED!) - body_info.m_etc_fields.emplace_back(result[12], result[field_val]); - else - {CHECK_AND_ASSERT_MES(false, false, "http_stream_filter::parse_cached_header() not matched last entry in:"<(result[1]); - m_response_info.m_http_ver_lo = boost::lexical_cast(result[2]); - m_response_info.m_response_code = boost::lexical_cast(result[3]); - - m_header_cache.erase(to_nonsonst_iterator(m_header_cache, result[0].first), to_nonsonst_iterator(m_header_cache, result[0].second)); - return true; - }else - { - LOG_ERROR("http_stream_filter::handle_invoke_reply_line(): Failed to match first response line:" << m_header_cache); - return false; - } - + //First line response, look like this: "HTTP/1.1 200 OK" + const char *ptr = m_header_cache.c_str(); + CHECK_AND_ASSERT_MES(!memcmp(ptr, "HTTP/", 5), false, "Invalid first response line: " + m_header_cache); + ptr += 5; + CHECK_AND_ASSERT_MES(isdigit(*ptr), false, "Invalid first response line: " + m_header_cache); + unsigned long ul; + char *end; + ul = strtoul(ptr, &end, 10); + CHECK_AND_ASSERT_MES(ul <= INT_MAX && *end =='.', false, "Invalid first response line: " + m_header_cache); + m_response_info.m_http_ver_hi = ul; + ptr = end + 1; + CHECK_AND_ASSERT_MES(isdigit(*ptr), false, "Invalid first response line: " + m_header_cache + ", ptr: " << ptr); + ul = strtoul(ptr, &end, 10); + CHECK_AND_ASSERT_MES(ul <= INT_MAX && isblank(*end), false, "Invalid first response line: " + m_header_cache + ", ptr: " << ptr); + m_response_info.m_http_ver_lo = ul; + ptr = end + 1; + while (isblank(*ptr)) + ++ptr; + CHECK_AND_ASSERT_MES(isdigit(*ptr), false, "Invalid first response line: " + m_header_cache); + ul = strtoul(ptr, &end, 10); + CHECK_AND_ASSERT_MES(ul >= 100 && ul <= 999 && isspace(*end), false, "Invalid first response line: " + m_header_cache); + m_response_info.m_response_code = ul; + ptr = end; + // ignore the optional text, till the end + while (*ptr != '\r' && *ptr != '\n') + ++ptr; + if (*ptr == '\r') + ++ptr; + CHECK_AND_ASSERT_MES(*ptr == '\n', false, "Invalid first response line: " << m_header_cache); + ++ptr; + + m_header_cache.erase(0, ptr - m_header_cache.c_str()); + return true; } inline bool set_reply_content_encoder()