Commit c64d2573 authored by Tatsuhiro Tsujikawa's avatar Tatsuhiro Tsujikawa

Replace http-parser with llhttp

llhttp does not include URL parser.  We extracted URL parser code from
http-parser and put it under third-party/url-parser.

llhttp bd3d224eb8cdc92c6fc8f508d7bbe0ba266e8e92
parent f028cc43
......@@ -9,6 +9,7 @@ if(ENABLE_EXAMPLES)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
"${CMAKE_CURRENT_SOURCE_DIR}/../third-party"
"${CMAKE_CURRENT_SOURCE_DIR}/../third-party/llhttp/include"
${LIBEVENT_INCLUDE_DIRS}
${OPENSSL_INCLUDE_DIRS}
......@@ -21,14 +22,24 @@ if(ENABLE_EXAMPLES)
${APP_LIBRARIES}
)
add_executable(client client.c $<TARGET_OBJECTS:http-parser>)
add_executable(libevent-client libevent-client.c $<TARGET_OBJECTS:http-parser>)
add_executable(libevent-server libevent-server.c $<TARGET_OBJECTS:http-parser>)
add_executable(deflate deflate.c $<TARGET_OBJECTS:http-parser>)
add_executable(client client.c $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
add_executable(libevent-client libevent-client.c $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
add_executable(libevent-server libevent-server.c $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
add_executable(deflate deflate.c $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
if(ENABLE_ASIO_LIB)
foreach(name asio-sv asio-sv2 asio-cl asio-cl2)
add_executable(${name} ${name}.cc $<TARGET_OBJECTS:http-parser>)
add_executable(${name} ${name}.cc $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
target_include_directories(${name} PRIVATE
${OPENSSL_INCLUDE_DIRS}
${Boost_INCLUDE_DIRS}
......
......@@ -36,7 +36,7 @@ AM_CPPFLAGS = \
@OPENSSL_CFLAGS@ \
@DEFS@
LDADD = $(top_builddir)/lib/libnghttp2.la \
$(top_builddir)/third-party/libhttp-parser.la \
$(top_builddir)/third-party/liburl-parser.la \
@LIBEVENT_OPENSSL_LIBS@ \
@OPENSSL_LIBS@ \
@APPLDFLAGS@
......@@ -61,7 +61,7 @@ noinst_PROGRAMS += asio-sv asio-sv2 asio-cl asio-cl2
ASIOCPPFLAGS = ${AM_CPPFLAGS} ${BOOST_CPPFLAGS}
ASIOLDADD = $(top_builddir)/lib/libnghttp2.la \
$(top_builddir)/src/libnghttp2_asio.la @JEMALLOC_LIBS@ \
$(top_builddir)/third-party/libhttp-parser.la \
$(top_builddir)/third-party/liburl-parser.la \
@OPENSSL_LIBS@ \
${BOOST_LDFLAGS} \
${BOOST_ASIO_LIB} \
......
......@@ -65,7 +65,7 @@ char *strndup(const char *s, size_t size);
#include <nghttp2/nghttp2.h>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#define ARRLEN(x) (sizeof(x) / sizeof(x[0]))
......
......@@ -4,42 +4,42 @@ from io import StringIO
from gentokenlookup import gentokenlookup
# copied from http-parser/http_parser.h, and stripped trailing spaces
# and backslashes.
# copied from llhttp.h, and stripped trailing spaces and backslashes.
SRC = '''
XX(0, DELETE, DELETE)
XX(1, GET, GET)
XX(2, HEAD, HEAD)
XX(3, POST, POST)
XX(4, PUT, PUT)
/* pathological */
XX(5, CONNECT, CONNECT)
XX(6, OPTIONS, OPTIONS)
XX(7, TRACE, TRACE)
/* webdav */
XX(8, COPY, COPY)
XX(9, LOCK, LOCK)
XX(10, MKCOL, MKCOL)
XX(11, MOVE, MOVE)
XX(12, PROPFIND, PROPFIND)
XX(13, PROPPATCH, PROPPATCH)
XX(14, SEARCH, SEARCH)
XX(15, UNLOCK, UNLOCK)
/* subversion */
XX(16, REPORT, REPORT)
XX(17, MKACTIVITY, MKACTIVITY)
XX(18, CHECKOUT, CHECKOUT)
XX(19, MERGE, MERGE)
/* upnp */
XX(20, MSEARCH, M-SEARCH)
XX(21, NOTIFY, NOTIFY)
XX(22, SUBSCRIBE, SUBSCRIBE)
XX(23, UNSUBSCRIBE, UNSUBSCRIBE)
/* RFC-5789 */
XX(24, PATCH, PATCH)
XX(25, PURGE, PURGE)
/* CalDAV */
XX(26, MKCALENDAR, MKCALENDAR)
XX(0, DELETE, DELETE)
XX(1, GET, GET)
XX(2, HEAD, HEAD)
XX(3, POST, POST)
XX(4, PUT, PUT)
XX(5, CONNECT, CONNECT)
XX(6, OPTIONS, OPTIONS)
XX(7, TRACE, TRACE)
XX(8, COPY, COPY)
XX(9, LOCK, LOCK)
XX(10, MKCOL, MKCOL)
XX(11, MOVE, MOVE)
XX(12, PROPFIND, PROPFIND)
XX(13, PROPPATCH, PROPPATCH)
XX(14, SEARCH, SEARCH)
XX(15, UNLOCK, UNLOCK)
XX(16, BIND, BIND)
XX(17, REBIND, REBIND)
XX(18, UNBIND, UNBIND)
XX(19, ACL, ACL)
XX(20, REPORT, REPORT)
XX(21, MKACTIVITY, MKACTIVITY)
XX(22, CHECKOUT, CHECKOUT)
XX(23, MERGE, MERGE)
XX(24, MSEARCH, M-SEARCH)
XX(25, NOTIFY, NOTIFY)
XX(26, SUBSCRIBE, SUBSCRIBE)
XX(27, UNSUBSCRIBE, UNSUBSCRIBE)
XX(28, PATCH, PATCH)
XX(29, PURGE, PURGE)
XX(30, MKCALENDAR, MKCALENDAR)
XX(31, LINK, LINK)
XX(32, UNLINK, UNLINK)
XX(33, SOURCE, SOURCE)
'''
if __name__ == '__main__':
......
......@@ -10,6 +10,7 @@ set_source_files_properties(${cxx_sources} PROPERTIES
include_directories(
"${CMAKE_CURRENT_SOURCE_DIR}/includes"
"${CMAKE_CURRENT_SOURCE_DIR}/../third-party"
"${CMAKE_CURRENT_SOURCE_DIR}/../third-party/llhttp/include"
${JEMALLOC_INCLUDE_DIRS}
${SPDYLAY_INCLUDE_DIRS}
......@@ -166,7 +167,8 @@ if(ENABLE_APP)
)
add_executable(nghttpx-unittest EXCLUDE_FROM_ALL
${NGHTTPX_UNITTEST_SOURCES}
$<TARGET_OBJECTS:http-parser>
$<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
target_include_directories(nghttpx-unittest PRIVATE ${CUNIT_INCLUDE_DIRS})
target_compile_definitions(nghttpx-unittest
......@@ -184,12 +186,20 @@ if(ENABLE_APP)
add_dependencies(check nghttpx-unittest)
endif()
add_executable(nghttp ${NGHTTP_SOURCES} $<TARGET_OBJECTS:http-parser>)
add_executable(nghttpd ${NGHTTPD_SOURCES} $<TARGET_OBJECTS:http-parser>)
add_executable(nghttpx ${NGHTTPX-bin_SOURCES} $<TARGET_OBJECTS:http-parser>)
add_executable(nghttp ${NGHTTP_SOURCES} $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
add_executable(nghttpd ${NGHTTPD_SOURCES} $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
add_executable(nghttpx ${NGHTTPX-bin_SOURCES} $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
target_compile_definitions(nghttpx PRIVATE "-DPKGDATADIR=\"${PKGDATADIR}\"")
target_link_libraries(nghttpx nghttpx_static)
add_executable(h2load ${H2LOAD_SOURCES} $<TARGET_OBJECTS:http-parser>)
add_executable(h2load ${H2LOAD_SOURCES} $<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
install(TARGETS nghttp nghttpd nghttpx h2load
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
......@@ -243,7 +253,8 @@ if(ENABLE_ASIO_LIB)
add_library(nghttp2_asio SHARED
${NGHTTP2_ASIO_SOURCES}
$<TARGET_OBJECTS:http-parser>
$<TARGET_OBJECTS:llhttp>
$<TARGET_OBJECTS:url-parser>
)
target_include_directories(nghttp2_asio PRIVATE
${OPENSSL_INCLUDE_DIRS}
......
......@@ -40,6 +40,7 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/lib \
-I$(top_srcdir)/src/includes \
-I$(top_srcdir)/third-party \
-I$(top_srcdir)/third-party/llhttp/include \
@LIBXML2_CFLAGS@ \
@LIBEV_CFLAGS@ \
@OPENSSL_CFLAGS@ \
......@@ -49,7 +50,8 @@ AM_CPPFLAGS = \
@DEFS@
LDADD = $(top_builddir)/lib/libnghttp2.la \
$(top_builddir)/third-party/libhttp-parser.la \
$(top_builddir)/third-party/liburl-parser.la \
$(top_builddir)/third-party/libllhttp.la \
@JEMALLOC_LIBS@ \
@LIBXML2_LIBS@ \
@LIBEV_LIBS@ \
......@@ -263,7 +265,8 @@ libnghttp2_asio_la_CPPFLAGS = ${AM_CPPFLAGS} ${BOOST_CPPFLAGS}
libnghttp2_asio_la_LDFLAGS = -no-undefined -version-info 1:0:0
libnghttp2_asio_la_LIBADD = \
$(top_builddir)/lib/libnghttp2.la \
$(top_builddir)/third-party/libhttp-parser.la \
$(top_builddir)/third-party/liburl-parser.la \
$(top_builddir)/third-party/libllhttp.la \
@OPENSSL_LIBS@ \
${BOOST_LDFLAGS} \
${BOOST_ASIO_LIB} \
......
......@@ -48,7 +48,7 @@
#include <openssl/err.h>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#include "h2load_http1_session.h"
#include "h2load_http2_session.h"
......
......@@ -34,27 +34,13 @@
#include <iostream>
#include <fstream>
#include "http-parser/http_parser.h"
using namespace nghttp2;
namespace h2load {
Http1Session::Http1Session(Client *client)
: stream_req_counter_(1),
stream_resp_counter_(1),
client_(client),
htp_(),
complete_(false) {
http_parser_init(&htp_, HTTP_RESPONSE);
htp_.data = this;
}
Http1Session::~Http1Session() {}
namespace {
// HTTP response message begin
int htp_msg_begincb(http_parser *htp) {
int htp_msg_begincb(llhttp_t *htp) {
auto session = static_cast<Http1Session *>(htp->data);
if (session->stream_resp_counter_ > session->stream_req_counter_) {
......@@ -67,7 +53,7 @@ int htp_msg_begincb(http_parser *htp) {
namespace {
// HTTP response status code
int htp_statuscb(http_parser *htp, const char *at, size_t length) {
int htp_statuscb(llhttp_t *htp, const char *at, size_t length) {
auto session = static_cast<Http1Session *>(htp->data);
auto client = session->get_client();
......@@ -83,7 +69,7 @@ int htp_statuscb(http_parser *htp, const char *at, size_t length) {
namespace {
// HTTP response message complete
int htp_msg_completecb(http_parser *htp) {
int htp_msg_completecb(llhttp_t *htp) {
auto session = static_cast<Http1Session *>(htp->data);
auto client = session->get_client();
......@@ -91,7 +77,7 @@ int htp_msg_completecb(http_parser *htp) {
return 0;
}
client->final = http_should_keep_alive(htp) == 0;
client->final = llhttp_should_keep_alive(htp) == 0;
auto req_stat = client->get_req_stat(session->stream_resp_counter_);
assert(req_stat);
......@@ -106,14 +92,13 @@ int htp_msg_completecb(http_parser *htp) {
if (client->final) {
session->stream_req_counter_ = session->stream_resp_counter_;
http_parser_pause(htp, 1);
// Connection is going down. If we have still request to do,
// create new connection and keep on doing the job.
if (client->req_left) {
client->try_new_connection();
}
return 0;
return HPE_PAUSED;
}
return 0;
......@@ -121,7 +106,7 @@ int htp_msg_completecb(http_parser *htp) {
} // namespace
namespace {
int htp_hdr_keycb(http_parser *htp, const char *data, size_t len) {
int htp_hdr_keycb(llhttp_t *htp, const char *data, size_t len) {
auto session = static_cast<Http1Session *>(htp->data);
auto client = session->get_client();
......@@ -132,7 +117,7 @@ int htp_hdr_keycb(http_parser *htp, const char *data, size_t len) {
} // namespace
namespace {
int htp_hdr_valcb(http_parser *htp, const char *data, size_t len) {
int htp_hdr_valcb(llhttp_t *htp, const char *data, size_t len) {
auto session = static_cast<Http1Session *>(htp->data);
auto client = session->get_client();
......@@ -143,13 +128,13 @@ int htp_hdr_valcb(http_parser *htp, const char *data, size_t len) {
} // namespace
namespace {
int htp_hdrs_completecb(http_parser *htp) {
int htp_hdrs_completecb(llhttp_t *htp) {
return !http2::expect_response_body(htp->status_code);
}
} // namespace
namespace {
int htp_body_cb(http_parser *htp, const char *data, size_t len) {
int htp_body_cb(llhttp_t *htp, const char *data, size_t len) {
auto session = static_cast<Http1Session *>(htp->data);
auto client = session->get_client();
......@@ -161,18 +146,32 @@ int htp_body_cb(http_parser *htp, const char *data, size_t len) {
} // namespace
namespace {
constexpr http_parser_settings htp_hooks = {
htp_msg_begincb, // http_cb on_message_begin;
nullptr, // http_data_cb on_url;
htp_statuscb, // http_data_cb on_status;
htp_hdr_keycb, // http_data_cb on_header_field;
htp_hdr_valcb, // http_data_cb on_header_value;
htp_hdrs_completecb, // http_cb on_headers_complete;
htp_body_cb, // http_data_cb on_body;
htp_msg_completecb // http_cb on_message_complete;
constexpr llhttp_settings_t htp_hooks = {
htp_msg_begincb, // llhttp_cb on_message_begin;
nullptr, // llhttp_data_cb on_url;
htp_statuscb, // llhttp_data_cb on_status;
htp_hdr_keycb, // llhttp_data_cb on_header_field;
htp_hdr_valcb, // llhttp_data_cb on_header_value;
htp_hdrs_completecb, // llhttp_cb on_headers_complete;
htp_body_cb, // llhttp_data_cb on_body;
htp_msg_completecb, // llhttp_cb on_message_complete;
nullptr, // llhttp_cb on_chunk_header
nullptr, // llhttp_cb on_chunk_complete
};
} // namespace
Http1Session::Http1Session(Client *client)
: stream_req_counter_(1),
stream_resp_counter_(1),
client_(client),
htp_(),
complete_(false) {
llhttp_init(&htp_, HTTP_RESPONSE, &htp_hooks);
htp_.data = this;
}
Http1Session::~Http1Session() {}
void Http1Session::on_connect() { client_->signal_write(); }
int Http1Session::submit_request() {
......@@ -202,15 +201,15 @@ int Http1Session::submit_request() {
}
int Http1Session::on_read(const uint8_t *data, size_t len) {
auto nread = http_parser_execute(&htp_, &htp_hooks,
reinterpret_cast<const char *>(data), len);
auto htperr =
llhttp_execute(&htp_, reinterpret_cast<const char *>(data), len);
auto nread = static_cast<size_t>(
reinterpret_cast<const uint8_t *>(llhttp_get_error_pos(&htp_)) - data);
if (client_->worker->config->verbose) {
std::cout.write(reinterpret_cast<const char *>(data), nread);
}
auto htperr = HTTP_PARSER_ERRNO(&htp_);
if (htperr == HPE_PAUSED) {
// pause is done only when connection: close is requested
return -1;
......@@ -218,8 +217,8 @@ int Http1Session::on_read(const uint8_t *data, size_t len) {
if (htperr != HPE_OK) {
std::cerr << "[ERROR] HTTP parse error: "
<< "(" << http_errno_name(htperr) << ") "
<< http_errno_description(htperr) << std::endl;
<< "(" << llhttp_errno_name(htperr) << ") "
<< llhttp_get_error_reason(&htp_) << std::endl;
return -1;
}
......
......@@ -29,6 +29,8 @@
#include <nghttp2/nghttp2.h>
#include "llhttp.h"
namespace h2load {
struct Client;
......@@ -49,7 +51,7 @@ public:
private:
Client *client_;
http_parser htp_;
llhttp_t htp_;
bool complete_;
};
......
......@@ -24,6 +24,8 @@
*/
#include "http2.h"
#include "llhttp.h"
#include "util.h"
namespace nghttp2 {
......@@ -1386,6 +1388,11 @@ int lookup_method_token(const uint8_t *name, size_t namelen) {
switch (namelen) {
case 3:
switch (name[2]) {
case 'L':
if (util::streq_l("AC", name, 2)) {
return HTTP_ACL;
}
break;
case 'T':
if (util::streq_l("GE", name, 2)) {
return HTTP_GET;
......@@ -1399,6 +1406,9 @@ int lookup_method_token(const uint8_t *name, size_t namelen) {
case 4:
switch (name[3]) {
case 'D':
if (util::streq_l("BIN", name, 3)) {
return HTTP_BIND;
}
if (util::streq_l("HEA", name, 3)) {
return HTTP_HEAD;
}
......@@ -1409,6 +1419,9 @@ int lookup_method_token(const uint8_t *name, size_t namelen) {
}
break;
case 'K':
if (util::streq_l("LIN", name, 3)) {
return HTTP_LINK;
}
if (util::streq_l("LOC", name, 3)) {
return HTTP_LOCK;
}
......@@ -1452,10 +1465,21 @@ int lookup_method_token(const uint8_t *name, size_t namelen) {
break;
case 6:
switch (name[5]) {
case 'D':
if (util::streq_l("REBIN", name, 5)) {
return HTTP_REBIND;
}
if (util::streq_l("UNBIN", name, 5)) {
return HTTP_UNBIND;
}
break;
case 'E':
if (util::streq_l("DELET", name, 5)) {
return HTTP_DELETE;
}
if (util::streq_l("SOURC", name, 5)) {
return HTTP_SOURCE;
}
break;
case 'H':
if (util::streq_l("SEARC", name, 5)) {
......@@ -1463,6 +1487,9 @@ int lookup_method_token(const uint8_t *name, size_t namelen) {
}
break;
case 'K':
if (util::streq_l("UNLIN", name, 5)) {
return HTTP_UNLINK;
}
if (util::streq_l("UNLOC", name, 5)) {
return HTTP_UNLOCK;
}
......@@ -1554,8 +1581,9 @@ int lookup_method_token(const uint8_t *name, size_t namelen) {
}
StringRef to_method_string(int method_token) {
// we happened to use same value for method with http-parser.
return StringRef{http_method_str(static_cast<http_method>(method_token))};
// we happened to use same value for method with llhttp.
return StringRef{
llhttp_method_name(static_cast<llhttp_method>(method_token))};
}
StringRef get_pure_path_component(const StringRef &uri) {
......
......@@ -35,7 +35,7 @@
#include <nghttp2/nghttp2.h>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#include "util.h"
#include "memchunk.h"
......@@ -396,15 +396,15 @@ bool expect_response_body(int method_token, int status_code);
bool expect_response_body(int status_code);
// Looks up method token for method name |name| of length |namelen|.
// Only methods defined in http-parser/http-parser.h (http_method) are
// tokenized. If method name cannot be tokenized, returns -1.
// Only methods defined in llhttp.h (llhttp_method) are tokenized. If
// method name cannot be tokenized, returns -1.
int lookup_method_token(const uint8_t *name, size_t namelen);
int lookup_method_token(const StringRef &name);
// Returns string representation of |method_token|. This is wrapper
// function over http_method_str from http-parser. If |method_token|
// is not known to http-parser, "<unknown>" is returned. The returned
// StringRef is guaranteed to be NULL-terminated.
// Returns string representation of |method_token|. This is wrapper
// around llhttp_method_name from llhttp. If |method_token| is
// unknown, program aborts. The returned StringRef is guaranteed to
// be NULL-terminated.
StringRef to_method_string(int method_token);
StringRef normalize_path(BlockAllocator &balloc, const StringRef &path,
......
......@@ -30,7 +30,7 @@
#include <CUnit/CUnit.h>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#include "http2.h"
#include "util.h"
......
......@@ -401,7 +401,7 @@ void ContinueTimer::dispatch_continue() {
}
namespace {
int htp_msg_begincb(http_parser *htp) {
int htp_msg_begincb(llhttp_t *htp) {
if (config.verbose) {
print_timer();
std::cout << " HTTP Upgrade response" << std::endl;
......@@ -411,7 +411,7 @@ int htp_msg_begincb(http_parser *htp) {
} // namespace
namespace {
int htp_msg_completecb(http_parser *htp) {
int htp_msg_completecb(llhttp_t *htp) {
auto client = static_cast<HttpClient *>(htp->data);
client->upgrade_response_status_code = htp->status_code;
client->upgrade_response_complete = true;
......@@ -420,15 +420,17 @@ int htp_msg_completecb(http_parser *htp) {
} // namespace
namespace {
constexpr http_parser_settings htp_hooks = {
htp_msg_begincb, // http_cb on_message_begin;
nullptr, // http_data_cb on_url;
nullptr, // http_data_cb on_status;
nullptr, // http_data_cb on_header_field;
nullptr, // http_data_cb on_header_value;
nullptr, // http_cb on_headers_complete;
nullptr, // http_data_cb on_body;
htp_msg_completecb // http_cb on_message_complete;
constexpr llhttp_settings_t htp_hooks = {
htp_msg_begincb, // llhttp_cb on_message_begin;
nullptr, // llhttp_data_cb on_url;
nullptr, // llhttp_data_cb on_status;
nullptr, // llhttp_data_cb on_header_field;
nullptr, // llhttp_data_cb on_header_value;
nullptr, // llhttp_cb on_headers_complete;
nullptr, // llhttp_data_cb on_body;
htp_msg_completecb, // llhttp_cb on_message_complete;
nullptr, // llhttp_cb on_chunk_header
nullptr, // llhttp_cb on_chunk_complete
};
} // namespace
......@@ -885,8 +887,8 @@ int HttpClient::connected() {
writefn = &HttpClient::write_clear;
if (need_upgrade()) {
htp = std::make_unique<http_parser>();
http_parser_init(htp.get(), HTTP_RESPONSE);
htp = std::make_unique<llhttp_t>();
llhttp_init(htp.get(), HTTP_RESPONSE, &htp_hooks);
htp->data = this;
return do_write();
......@@ -1031,19 +1033,20 @@ int HttpClient::on_upgrade_connect() {
int HttpClient::on_upgrade_read(const uint8_t *data, size_t len) {
int rv;
auto nread = http_parser_execute(htp.get(), &htp_hooks,
reinterpret_cast<const char *>(data), len);
auto htperr =
llhttp_execute(htp.get(), reinterpret_cast<const char *>(data), len);
auto nread = static_cast<size_t>(
reinterpret_cast<const uint8_t *>(llhttp_get_error_pos(htp.get())) -
data);
if (config.verbose) {
std::cout.write(reinterpret_cast<const char *>(data), nread);
}
auto htperr = HTTP_PARSER_ERRNO(htp.get());
if (htperr != HPE_OK) {
if (htperr != HPE_OK && htperr != HPE_PAUSED_UPGRADE) {
std::cerr << "[ERROR] Failed to parse HTTP Upgrade response header: "
<< "(" << http_errno_name(htperr) << ") "
<< http_errno_description(htperr) << std::endl;
<< "(" << llhttp_errno_name(htperr) << ") "
<< llhttp_get_error_reason(htp.get()) << std::endl;
return -1;
}
......
......@@ -47,7 +47,7 @@
#include <nghttp2/nghttp2.h>
#include "http-parser/http_parser.h"
#include "llhttp.h"
#include "memchunk.h"
#include "http2.h"
......@@ -265,7 +265,7 @@ struct HttpClient {
std::string host;
std::string hostport;
// Used for parse the HTTP upgrade response from server
std::unique_ptr<http_parser> htp;
std::unique_ptr<llhttp_t> htp;
SessionTiming timing;
ev_io wev;
ev_io rev;
......
......@@ -51,7 +51,7 @@
#include <nghttp2/nghttp2.h>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#include "shrpx_log.h"
#include "shrpx_tls.h"
......
......@@ -26,7 +26,7 @@
#include <cassert>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#include "shrpx_upstream.h"
#include "shrpx_client_handler.h"
......
......@@ -38,6 +38,8 @@
#include <nghttp2/nghttp2.h>
#include "llhttp.h"
#include "shrpx_io_control.h"
#include "shrpx_log_config.h"
#include "http2.h"
......
......@@ -28,7 +28,7 @@
# include <unistd.h>
#endif // HAVE_UNISTD_H
#include "http-parser/http_parser.h"
#include "llhttp.h"
#include "shrpx_client_handler.h"
#include "shrpx_upstream.h"
......
......@@ -336,6 +336,25 @@ int Http2Session::resolve_name() {
}
}
namespace {
int htp_hdrs_completecb(llhttp_t *htp);
} // namespace
namespace {
constexpr llhttp_settings_t htp_hooks = {
nullptr, // llhttp_cb on_message_begin;
nullptr, // llhttp_data_cb on_url;
nullptr, // llhttp_data_cb on_status;
nullptr, // llhttp_data_cb on_header_field;
nullptr, // llhttp_data_cb on_header_value;
htp_hdrs_completecb, // llhttp_cb on_headers_complete;
nullptr, // llhttp_data_cb on_body;
nullptr, // llhttp_cb on_message_complete;
nullptr, // llhttp_cb on_chunk_header
nullptr, // llhttp_cb on_chunk_complete
};
} // namespace
int Http2Session::initiate_connection() {
int rv = 0;
......@@ -402,8 +421,8 @@ int Http2Session::initiate_connection() {
on_read_ = &Http2Session::downstream_read_proxy;
on_write_ = &Http2Session::downstream_connect_proxy;
proxy_htp_ = std::make_unique<http_parser>();
http_parser_init(proxy_htp_.get(), HTTP_RESPONSE);
proxy_htp_ = std::make_unique<llhttp_t>();
llhttp_init(proxy_htp_.get(), HTTP_RESPONSE, &htp_hooks);
proxy_htp_->data = this;
state_ = Http2SessionState::PROXY_CONNECTING;
......@@ -604,19 +623,12 @@ int Http2Session::initiate_connection() {
}
namespace {
int htp_hdrs_completecb(http_parser *htp) {
int htp_hdrs_completecb(llhttp_t *htp) {
auto http2session = static_cast<Http2Session *>(htp->data);
// We only read HTTP header part. If tunneling succeeds, response
// body is a different protocol (HTTP/2 in this case), we don't read
// them here.
//
// Here is a caveat: http-parser returns 1 less bytes if we pause
// here. The reason why they do this is probably they want to eat
// last 1 byte in s_headers_done state, on the other hand, this
// callback is called its previous state s_headers_almost_done. We
// will do "+ 1" to the return value to workaround this.
http_parser_pause(htp, 1);
// We just check status code here
if (htp->status_code / 100 == 2) {
......@@ -625,37 +637,19 @@ int htp_hdrs_completecb(http_parser *htp) {
}
http2session->set_state(Http2SessionState::PROXY_CONNECTED);
return 0;
return HPE_PAUSED;
}
SSLOG(WARN, http2session) << "Tunneling failed: " << htp->status_code;
http2session->set_state(Http2SessionState::PROXY_FAILED);
return 0;
return HPE_PAUSED;
}
} // namespace
namespace {
constexpr http_parser_settings htp_hooks = {
nullptr, // http_cb on_message_begin;
nullptr, // http_data_cb on_url;
nullptr, // http_data_cb on_status;
nullptr, // http_data_cb on_header_field;
nullptr, // http_data_cb on_header_value;
htp_hdrs_completecb, // http_cb on_headers_complete;
nullptr, // http_data_cb on_body;
nullptr // http_cb on_message_complete;
};
} // namespace
int Http2Session::downstream_read_proxy(const uint8_t *data, size_t datalen) {
auto nread =
http_parser_execute(proxy_htp_.get(), &htp_hooks,
reinterpret_cast<const char *>(data), datalen);
(void)nread;
auto htperr = HTTP_PARSER_ERRNO(proxy_htp_.get());
auto htperr = llhttp_execute(proxy_htp_.get(),
reinterpret_cast<const char *>(data), datalen);
if (htperr == HPE_PAUSED) {
switch (state_) {
case Http2SessionState::PROXY_CONNECTED:
......
......@@ -36,7 +36,7 @@
#include <nghttp2/nghttp2.h>
#include "http-parser/http_parser.h"
#include "llhttp.h"
#include "shrpx_connection.h"
#include "buffer.h"
......@@ -265,7 +265,7 @@ private:
std::function<int(Http2Session &, const uint8_t *, size_t)> on_read_;
std::function<int(Http2Session &)> on_write_;
// Used to parse the response from HTTP proxy
std::unique_ptr<http_parser> proxy_htp_;
std::unique_ptr<llhttp_t> proxy_htp_;
Worker *worker_;
// NULL if no TLS is configured
SSL_CTX *ssl_ctx_;
......
......@@ -243,6 +243,30 @@ int HttpDownstreamConnection::attach_downstream(Downstream *downstream) {
return 0;
}
namespace {
int htp_msg_begincb(llhttp_t *htp);
int htp_hdr_keycb(llhttp_t *htp, const char *data, size_t len);
int htp_hdr_valcb(llhttp_t *htp, const char *data, size_t len);
int htp_hdrs_completecb(llhttp_t *htp);
int htp_bodycb(llhttp_t *htp, const char *data, size_t len);
int htp_msg_completecb(llhttp_t *htp);
} // namespace
namespace {
constexpr llhttp_settings_t htp_hooks = {
htp_msg_begincb, // llhttp_cb on_message_begin;
nullptr, // llhttp_data_cb on_url;
nullptr, // llhttp_data_cb on_status;
htp_hdr_keycb, // llhttp_data_cb on_header_field;
htp_hdr_valcb, // llhttp_data_cb on_header_value;
htp_hdrs_completecb, // llhttp_cb on_headers_complete;
htp_bodycb, // llhttp_data_cb on_body;
htp_msg_completecb, // llhttp_cb on_message_complete;
nullptr, // llhttp_cb on_chunk_header
nullptr, // llhttp_cb on_chunk_complete
};
} // namespace
int HttpDownstreamConnection::initiate_connection() {
int rv;
......@@ -416,7 +440,7 @@ int HttpDownstreamConnection::initiate_connection() {
request_header_written_ = false;
}
http_parser_init(&response_htp_, HTTP_RESPONSE);
llhttp_init(&response_htp_, HTTP_RESPONSE, &htp_hooks);
response_htp_.data = downstream_;
return 0;
......@@ -855,11 +879,12 @@ void HttpDownstreamConnection::force_resume_read() {
}
namespace {
int htp_msg_begincb(http_parser *htp) {
int htp_msg_begincb(llhttp_t *htp) {
auto downstream = static_cast<Downstream *>(htp->data);
if (downstream->get_response_state() != DownstreamState::INITIAL) {
return -1;
llhttp_set_error_reason(htp, "HTTP message started when it shouldn't");
return HPE_USER;
}
return 0;
......@@ -867,7 +892,7 @@ int htp_msg_begincb(http_parser *htp) {
} // namespace
namespace {
int htp_hdrs_completecb(http_parser *htp) {
int htp_hdrs_completecb(llhttp_t *htp) {
auto downstream = static_cast<Downstream *>(htp->data);
auto upstream = downstream->get_upstream();
auto handler = upstream->get_client_handler();
......@@ -948,7 +973,7 @@ int htp_hdrs_completecb(http_parser *htp) {
return 1;
}
resp.connection_close = !http_should_keep_alive(htp);
resp.connection_close = !llhttp_should_keep_alive(htp);
downstream->set_response_state(DownstreamState::HEADER_COMPLETE);
downstream->inspect_http1_response();
if (downstream->get_upgraded()) {
......@@ -994,7 +1019,7 @@ int htp_hdrs_completecb(http_parser *htp) {
// https://tools.ietf.org/html/rfc7230#section-3.3
// TODO It seems that the cases other than HEAD are handled by
// http-parser. Need test.
// llhttp. Need test.
return !http2::expect_response_body(req.method, resp.http_status);
}
} // namespace
......@@ -1034,7 +1059,7 @@ int ensure_max_header_fields(const Downstream *downstream,
} // namespace
namespace {
int htp_hdr_keycb(http_parser *htp, const char *data, size_t len) {
int htp_hdr_keycb(llhttp_t *htp, const char *data, size_t len) {
auto downstream = static_cast<Downstream *>(htp->data);
auto &resp = downstream->response();
auto &httpconf = get_config()->http;
......@@ -1071,7 +1096,7 @@ int htp_hdr_keycb(http_parser *htp, const char *data, size_t len) {
} // namespace
namespace {
int htp_hdr_valcb(http_parser *htp, const char *data, size_t len) {
int htp_hdr_valcb(llhttp_t *htp, const char *data, size_t len) {
auto downstream = static_cast<Downstream *>(htp->data);
auto &resp = downstream->response();
auto &httpconf = get_config()->http;
......@@ -1090,7 +1115,7 @@ int htp_hdr_valcb(http_parser *htp, const char *data, size_t len) {
} // namespace
namespace {
int htp_bodycb(http_parser *htp, const char *data, size_t len) {
int htp_bodycb(llhttp_t *htp, const char *data, size_t len) {
auto downstream = static_cast<Downstream *>(htp->data);
auto &resp = downstream->response();
......@@ -1102,14 +1127,13 @@ int htp_bodycb(http_parser *htp, const char *data, size_t len) {
} // namespace
namespace {
int htp_msg_completecb(http_parser *htp) {
int htp_msg_completecb(llhttp_t *htp) {
auto downstream = static_cast<Downstream *>(htp->data);
// http-parser does not treat "200 connection established" response
// llhttp does not treat "200 connection established" response
// against CONNECT request, and in that case, this function is not
// called. But if HTTP Upgrade is made (e.g., WebSocket), this
// function is called, and http_parser_execute() returns just after
// that.
// function is called, and llhttp_execute() returns just after that.
if (downstream->get_upgraded()) {
return 0;
}
......@@ -1129,19 +1153,6 @@ int htp_msg_completecb(http_parser *htp) {
}
} // namespace
namespace {
constexpr http_parser_settings htp_hooks = {
htp_msg_begincb, // http_cb on_message_begin;
nullptr, // http_data_cb on_url;
nullptr, // http_data_cb on_status;
htp_hdr_keycb, // http_data_cb on_header_field;
htp_hdr_valcb, // http_data_cb on_header_value;
htp_hdrs_completecb, // http_cb on_headers_complete;
htp_bodycb, // http_data_cb on_body;
htp_msg_completecb // http_cb on_message_complete;
};
} // namespace
int HttpDownstreamConnection::write_first() {
int rv;
......@@ -1389,13 +1400,14 @@ int HttpDownstreamConnection::process_input(const uint8_t *data,
return 0;
}
auto nproc =
http_parser_execute(&response_htp_, &htp_hooks,
reinterpret_cast<const char *>(data), datalen);
auto htperr = HTTP_PARSER_ERRNO(&response_htp_);
auto htperr = llhttp_execute(&response_htp_,
reinterpret_cast<const char *>(data), datalen);
auto nproc = static_cast<size_t>(
reinterpret_cast<const uint8_t *>(llhttp_get_error_pos(&response_htp_)) -
data);
if (htperr != HPE_OK) {
if (htperr != HPE_OK &&
(!downstream_->get_upgraded() || htperr != HPE_PAUSED_UPGRADE)) {
// Handling early return (in other words, response was hijacked by
// mruby scripting).
if (downstream_->get_response_state() == DownstreamState::MSG_COMPLETE) {
......@@ -1404,8 +1416,8 @@ int HttpDownstreamConnection::process_input(const uint8_t *data,
if (LOG_ENABLED(INFO)) {
DCLOG(INFO, this) << "HTTP parser failure: "
<< "(" << http_errno_name(htperr) << ") "
<< http_errno_description(htperr);
<< "(" << llhttp_errno_name(htperr) << ") "
<< llhttp_get_error_reason(&response_htp_);
}
return -1;
......
......@@ -27,7 +27,7 @@
#include "shrpx.h"
#include "http-parser/http_parser.h"
#include "llhttp.h"
#include "shrpx_downstream_connection.h"
#include "shrpx_io_control.h"
......@@ -110,7 +110,7 @@ private:
std::unique_ptr<Address> resolved_addr_;
std::unique_ptr<DNSQuery> dns_query_;
IOControl ioctrl_;
http_parser response_htp_;
llhttp_t response_htp_;
// true if first write succeeded.
bool first_write_done_;
// true if this object can be reused
......
This diff is collapsed.
......@@ -30,7 +30,7 @@
#include <cinttypes>
#include <memory>
#include "http-parser/http_parser.h"
#include "llhttp.h"
#include "shrpx_upstream.h"
#include "memchunk.h"
......@@ -100,7 +100,7 @@ public:
private:
ClientHandler *handler_;
http_parser htp_;
llhttp_t htp_;
size_t current_header_length_;
std::unique_ptr<Downstream> downstream_;
IOControl ioctrl_;
......
......@@ -594,7 +594,8 @@ void upstream_accesslog(const std::vector<LogFragment> &lfv,
auto &balloc = downstream->get_block_allocator();
auto downstream_addr = downstream->get_addr();
auto method = http2::to_method_string(req.method);
auto method = req.method == -1 ? StringRef::from_lit("<unknown>")
: http2::to_method_string(req.method);
auto path = req.method == HTTP_CONNECT
? req.authority
: config->http2_proxy
......
......@@ -47,7 +47,7 @@
#include <map>
#include <random>
#include "http-parser/http_parser.h"
#include "url-parser/url_parser.h"
#include "template.h"
#include "network.h"
......
if(ENABLE_THIRD_PARTY)
set(LIBHTTP_PARSER_SOURCES
http-parser/http_parser.c
set(LIBLLHTTP_SOURCES
llhttp/src/api.c
llhttp/src/http.c
llhttp/src/llhttp.c
)
add_library(http-parser OBJECT ${LIBHTTP_PARSER_SOURCES})
set_target_properties(http-parser PROPERTIES
add_library(llhttp OBJECT ${LIBLLHTTP_SOURCES})
target_include_directories(llhttp PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/llhttp/include"
)
set_target_properties(llhttp PROPERTIES
POSITION_INDEPENDENT_CODE ON
)
set(LIBURL_PARSER_SOURCES
url-parser/url_parser.c
)
add_library(url-parser OBJECT ${LIBURL_PARSER_SOURCES})
set_target_properties(url-parser PROPERTIES
POSITION_INDEPENDENT_CODE ON)
if(HAVE_NEVERBLEED)
......
......@@ -27,10 +27,18 @@ EXTRA_DIST = CMakeLists.txt
if ENABLE_THIRD_PARTY
noinst_LTLIBRARIES = libhttp-parser.la
libhttp_parser_la_SOURCES = \
http-parser/http_parser.c \
http-parser/http_parser.h
noinst_LTLIBRARIES = liburl-parser.la
liburl_parser_la_SOURCES = \
url-parser/url_parser.c \
url-parser/url_parser.h
noinst_LTLIBRARIES += libllhttp.la
libllhttp_la_SOURCES = \
llhttp/src/api.c \
llhttp/src/http.c \
llhttp/src/llhttp.c \
llhttp/include/llhttp.h
libllhttp_la_CPPFLAGS = -I${srcdir}/llhttp/include
if HAVE_NEVERBLEED
noinst_LTLIBRARIES += libneverbleed.la
......
HTTP Parser
===========
[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser)
This is a parser for HTTP messages written in C. It parses both requests and
responses. The parser is designed to be used in performance HTTP
applications. It does not make any syscalls nor allocations, it does not
buffer data, it can be interrupted at anytime. Depending on your
architecture, it only requires about 40 bytes of data per message
stream (in a web server that is per connection).
Features:
* No dependencies
* Handles persistent streams (keep-alive).
* Decodes chunked encoding.
* Upgrade support
* Defends against buffer overflow attacks.
The parser extracts the following information from HTTP messages:
* Header fields and values
* Content-Length
* Request method
* Response status code
* Transfer-Encoding
* HTTP version
* Request URL
* Message body
Usage
-----
One `http_parser` object is used per TCP connection. Initialize the struct
using `http_parser_init()` and set the callbacks. That might look something
like this for a request parser:
```c
http_parser_settings settings;
settings.on_url = my_url_callback;
settings.on_header_field = my_header_field_callback;
/* ... */
http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST);
parser->data = my_socket;
```
When data is received on the socket execute the parser and check for errors.
```c
size_t len = 80*1024, nparsed;
char buf[len];
ssize_t recved;
recved = recv(fd, buf, len, 0);
if (recved < 0) {
/* Handle error. */
}
/* Start up / continue the parser.
* Note we pass recved==0 to signal that EOF has been received.
*/
nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) {
/* handle new protocol */
} else if (nparsed != recved) {
/* Handle error. Usually just close the connection. */
}
```
`http_parser` needs to know where the end of the stream is. For example, sometimes
servers send responses without Content-Length and expect the client to
consume input (for the body) until EOF. To tell `http_parser` about EOF, give
`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors
can still be encountered during an EOF, so one must still be prepared
to receive them.
Scalar valued message information such as `status_code`, `method`, and the
HTTP version are stored in the parser structure. This data is only
temporally stored in `http_parser` and gets reset on each new message. If
this information is needed later, copy it out of the structure during the
`headers_complete` callback.
The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback.
The Special Problem of Upgrade
------------------------------
`http_parser` supports upgrading the connection to a different protocol. An
increasingly common example of this is the WebSocket protocol which sends
a request like
GET /demo HTTP/1.1
Upgrade: WebSocket
Connection: Upgrade
Host: example.com
Origin: http://example.com
WebSocket-Protocol: sample
followed by non-HTTP data.
(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the
WebSocket protocol.)
To support this, the parser will treat this as a normal HTTP message without a
body, issuing both on_headers_complete and on_message_complete callbacks. However
http_parser_execute() will stop parsing at the end of the headers and return.
The user is expected to check if `parser->upgrade` has been set to 1 after
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
offset by the return value of `http_parser_execute()`.
Callbacks
---------
During the `http_parser_execute()` call, the callbacks set in
`http_parser_settings` will be executed. The parser maintains state and
never looks behind, so buffering the data is not necessary. If you need to
save certain data for later usage, you can do that from the callbacks.
There are two types of callbacks:
* notification `typedef int (*http_cb) (http_parser*);`
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
Callbacks: (requests only) on_url,
(common) on_header_field, on_header_value, on_body;
Callbacks must return 0 on success. Returning a non-zero value indicates
error to the parser, making it exit immediately.
For cases where it is necessary to pass local information to/from a callback,
the `http_parser` object's `data` field can be used.
An example of such a case is when using threads to handle a socket connection,
parse a request, and then give a response over that socket. By instantiation
of a thread-local struct containing relevant data (e.g. accepted socket,
allocated memory for callbacks to write into, etc), a parser's callbacks are
able to communicate data between the scope of the thread and the scope of the
callback in a threadsafe manner. This allows `http_parser` to be used in
multi-threaded contexts.
Example:
```c
typedef struct {
socket_t sock;
void* buffer;
int buf_len;
} custom_data_t;
int my_url_callback(http_parser* parser, const char *at, size_t length) {
/* access to thread local custom_data_t struct.
Use this access save parsed data for later use into thread local
buffer, or communicate over socket
*/
parser->data;
...
return 0;
}
...
void http_parser_thread(socket_t sock) {
int nparsed = 0;
/* allocate memory for user data */
custom_data_t *my_data = malloc(sizeof(custom_data_t));
/* some information for use by callbacks.
* achieves thread -> callback information flow */
my_data->sock = sock;
/* instantiate a thread-local parser */
http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST); /* initialise parser */
/* this custom data reference is accessible through the reference to the
parser supplied to callback functions */
parser->data = my_data;
http_parser_settings settings; /* set up callbacks */
settings.on_url = my_url_callback;
/* execute parser */
nparsed = http_parser_execute(parser, &settings, buf, recved);
...
/* parsed information copied from callback.
can now perform action on data copied into thread-local memory from callbacks.
achieves callback -> thread information flow */
my_data->buffer;
...
}
```
In case you parse HTTP message in chunks (i.e. `read()` request line
from socket, parse, read half headers, parse, etc) your data callbacks
may be called more than once. `http_parser` guarantees that data pointer is only
valid for the lifetime of callback. You can also `read()` into a heap allocated
buffer to avoid copying memory around if this fits your application.
Reading headers may be a tricky task if you read/parse headers partially.
Basically, you need to remember whether last header callback was field or value
and apply the following logic:
(on_header_field and on_header_value shortened to on_h_*)
------------------------ ------------ --------------------------------------------
| State (prev. callback) | Callback | Description/action |
------------------------ ------------ --------------------------------------------
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
| | | into it |
------------------------ ------------ --------------------------------------------
| value | on_h_field | New header started. |
| | | Copy current name,value buffers to headers |
| | | list and allocate new buffer for new name |
------------------------ ------------ --------------------------------------------
| field | on_h_field | Previous name continues. Reallocate name |
| | | buffer and append callback data to it |
------------------------ ------------ --------------------------------------------
| field | on_h_value | Value for current header started. Allocate |
| | | new buffer and copy callback data to it |
------------------------ ------------ --------------------------------------------
| value | on_h_value | Value continues. Reallocate value buffer |
| | | and append callback data to it |
------------------------ ------------ --------------------------------------------
Parsing URLs
------------
A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
Users of this library may wish to use it to parse URLs constructed from
consecutive `on_url` callbacks.
See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C
* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C
* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript
/* Copyright Fedor Indutny. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "http_parser.h"
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
/* 8 gb */
static const int64_t kBytes = 8LL << 30;
static const char data[] =
"POST /joyent/http-parser HTTP/1.1\r\n"
"Host: github.com\r\n"
"DNT: 1\r\n"
"Accept-Encoding: gzip, deflate, sdch\r\n"
"Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n"
"User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/39.0.2171.65 Safari/537.36\r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/webp,*/*;q=0.8\r\n"
"Referer: https://github.com/joyent/http-parser\r\n"
"Connection: keep-alive\r\n"
"Transfer-Encoding: chunked\r\n"
"Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n";
static const size_t data_len = sizeof(data) - 1;
static int on_info(http_parser* p) {
return 0;
}
static int on_data(http_parser* p, const char *at, size_t length) {
return 0;
}
static http_parser_settings settings = {
.on_message_begin = on_info,
.on_headers_complete = on_info,
.on_message_complete = on_info,
.on_header_field = on_data,
.on_header_value = on_data,
.on_url = on_data,
.on_status = on_data,
.on_body = on_data
};
int bench(int iter_count, int silent) {
struct http_parser parser;
int i;
int err;
struct timeval start;
struct timeval end;
if (!silent) {
err = gettimeofday(&start, NULL);
assert(err == 0);
}
fprintf(stderr, "req_len=%d\n", (int) data_len);
for (i = 0; i < iter_count; i++) {
size_t parsed;
http_parser_init(&parser, HTTP_REQUEST);
parsed = http_parser_execute(&parser, &settings, data, data_len);
assert(parsed == data_len);
}
if (!silent) {
double elapsed;
double bw;
double total;
err = gettimeofday(&end, NULL);
assert(err == 0);
fprintf(stdout, "Benchmark result:\n");
elapsed = (double) (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) * 1e-6f;
total = (double) iter_count * data_len;
bw = (double) total / elapsed;
fprintf(stdout, "%.2f mb | %.2f mb/s | %.2f req/sec | %.2f s\n",
(double) total / (1024 * 1024),
bw / (1024 * 1024),
(double) iter_count / elapsed,
elapsed);
fflush(stdout);
}
return 0;
}
int main(int argc, char** argv) {
int64_t iterations;
iterations = kBytes / (int64_t) data_len;
if (argc == 2 && strcmp(argv[1], "infinite") == 0) {
for (;;)
bench(iterations, 1);
return 0;
} else {
return bench(iterations, 0);
}
}
/* Copyright Joyent, Inc. and other Node contributors.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/* Dump what the parser finds to stdout as it happen */
#include "http_parser.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int on_message_begin(http_parser* _) {
(void)_;
printf("\n***MESSAGE BEGIN***\n\n");
return 0;
}
int on_headers_complete(http_parser* _) {
(void)_;
printf("\n***HEADERS COMPLETE***\n\n");
return 0;
}
int on_message_complete(http_parser* _) {
(void)_;
printf("\n***MESSAGE COMPLETE***\n\n");
return 0;
}
int on_url(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Url: %.*s\n", (int)length, at);
return 0;
}
int on_header_field(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Header field: %.*s\n", (int)length, at);
return 0;
}
int on_header_value(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Header value: %.*s\n", (int)length, at);
return 0;
}
int on_body(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Body: %.*s\n", (int)length, at);
return 0;
}
void usage(const char* name) {
fprintf(stderr,
"Usage: %s $type $filename\n"
" type: -x, where x is one of {r,b,q}\n"
" parses file as a Response, reQuest, or Both\n",
name);
exit(EXIT_FAILURE);
}
int main(int argc, char* argv[]) {
enum http_parser_type file_type;
if (argc != 3) {
usage(argv[0]);
}
char* type = argv[1];
if (type[0] != '-') {
usage(argv[0]);
}
switch (type[1]) {
/* in the case of "-", type[1] will be NUL */
case 'r':
file_type = HTTP_RESPONSE;
break;
case 'q':
file_type = HTTP_REQUEST;
break;
case 'b':
file_type = HTTP_BOTH;
break;
default:
usage(argv[0]);
}
char* filename = argv[2];
FILE* file = fopen(filename, "r");
if (file == NULL) {
perror("fopen");
goto fail;
}
fseek(file, 0, SEEK_END);
long file_length = ftell(file);
if (file_length == -1) {
perror("ftell");
goto fail;
}
fseek(file, 0, SEEK_SET);
char* data = malloc(file_length);
if (fread(data, 1, file_length, file) != (size_t)file_length) {
fprintf(stderr, "couldn't read entire file\n");
free(data);
goto fail;
}
http_parser_settings settings;
memset(&settings, 0, sizeof(settings));
settings.on_message_begin = on_message_begin;
settings.on_url = on_url;
settings.on_header_field = on_header_field;
settings.on_header_value = on_header_value;
settings.on_headers_complete = on_headers_complete;
settings.on_body = on_body;
settings.on_message_complete = on_message_complete;
http_parser parser;
http_parser_init(&parser, file_type);
size_t nparsed = http_parser_execute(&parser, &settings, data, file_length);
free(data);
if (nparsed != (size_t)file_length) {
fprintf(stderr,
"Error: %s (%s)\n",
http_errno_description(HTTP_PARSER_ERRNO(&parser)),
http_errno_name(HTTP_PARSER_ERRNO(&parser)));
goto fail;
}
return EXIT_SUCCESS;
fail:
fclose(file);
return EXIT_FAILURE;
}
#include "http_parser.h"
#include <stdio.h>
#include <string.h>
void
dump_url (const char *url, const struct http_parser_url *u)
{
unsigned int i;
printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
for (i = 0; i < UF_MAX; i++) {
if ((u->field_set & (1 << i)) == 0) {
printf("\tfield_data[%u]: unset\n", i);
continue;
}
printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
i,
u->field_data[i].off,
u->field_data[i].len,
u->field_data[i].len,
url + u->field_data[i].off);
}
}
int main(int argc, char ** argv) {
struct http_parser_url u;
int len, connect, result;
if (argc != 3) {
printf("Syntax : %s connect|get url\n", argv[0]);
return 1;
}
len = strlen(argv[2]);
connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
printf("Parsing %s, connect %d\n", argv[2], connect);
http_parser_url_init(&u);
result = http_parser_parse_url(argv[2], len, connect, &u);
if (result != 0) {
printf("Parse error : %d\n", result);
return result;
}
printf("Parse ok, result : \n");
dump_url(argv[2], &u);
return 0;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This software is licensed under the MIT License.
Copyright Fedor Indutny, 2018.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to permit
persons to whom the Software is furnished to do so, subject to the
following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
# llhttp
[![Build Status](https://secure.travis-ci.org/indutny/llhttp.svg)](http://travis-ci.org/nodejs/llhttp)
Port of [http_parser][0] to [llparse][1].
## Why?
Let's face it, [http_parser][0] is practically unmaintainable. Even
introduction of a single new method results in a significant code churn.
This project aims to:
* Make it maintainable
* Verifiable
* Improving benchmarks where possible
## How?
Over time, different approaches for improving [http_parser][0]'s code base
were tried. However, all of them failed due to resulting significant performance
degradation.
This project is a port of [http_parser][0] to TypeScript. [llparse][1] is used
to generate the output C and/or bitcode artifacts, which could be compiled and
linked with the embedder's program (like [Node.js][7]).
## Peformance
So far llhttp outperforms http_parser:
| | input size | bandwidth | reqs/sec | time |
|:----------------|-----------:|-------------:|-----------:|--------:|
| **llhttp** _(C)_ | 8192.00 mb | 1497.88 mb/s | 3020458.87 ops/sec | 5.47 s |
| **llhttp** _(bitcode)_ | 8192.00 mb | 1131.75 mb/s | 2282171.24 ops/sec | 7.24 s |
| **http_parser** | 8192.00 mb | 694.66 mb/s | 1406180.33 req/sec | 11.79 s |
llhttp is faster by approximately **116%**.
## Maintenance
llhttp project has about 1400 lines of TypeScript code describing the parser
itself and around 450 lines of C code and headers providing the helper methods.
The whole [http_parser][0] is implemented in approximately 2500 lines of C, and
436 lines of headers.
All optimizations and multi-character matching in llhttp are generated
automatically, and thus doesn't add any extra maintenance cost. On the contrary,
most of http_parser's code is hand-optimized and unrolled. Instead describing
"how" it should parse the HTTP requests/responses, a maintainer should
implement the new features in [http_parser][0] cautiously, considering
possible performance degradation and manually optimizing the new code.
## Verification
The state machine graph is encoded explicitly in llhttp. The [llparse][1]
automatically checks the graph for absence of loops and correct reporting of the
input ranges (spans) like header names and values. In the future, additional
checks could be performed to get even stricter verification of the llhttp.
## Usage
```C
#include "llhttp.h"
llhttp_t parser;
llhttp_settings_t settings;
/* Initialize user callbacks and settings */
llhttp_settings_init(&settings);
/* Set user callback */
settings.on_message_complete = handle_on_message_complete;
/* Initialize the parser in HTTP_BOTH mode, meaning that it will select between
* HTTP_REQUEST and HTTP_RESPONSE parsing automatically while reading the first
* input.
*/
llhttp_init(&parser, HTTP_BOTH, &settings);
/* Use `llhttp_set_type(&parser, HTTP_REQUEST);` to override the mode */
/* Parse request! */
const char* request = "GET / HTTP/1.1\r\n\r\n";
int request_len = strlen(request);
enum llhttp_errno err = llhttp_execute(&parser, request, request_len);
if (err == HPE_OK) {
/* Successfully parsed! */
} else {
fprintf(stderr, "Parse error: %s %s\n", llhttp_errno_name(err),
parser.reason);
}
```
---
#### LICENSE
This software is licensed under the MIT License.
Copyright Fedor Indutny, 2018.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to permit
persons to whom the Software is furnished to do so, subject to the
following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
[0]: https://github.com/nodejs/http-parser
[1]: https://github.com/nodejs/llparse
[2]: https://en.wikipedia.org/wiki/Register_allocation#Spilling
[3]: https://en.wikipedia.org/wiki/Tail_call
[4]: https://llvm.org/docs/LangRef.html
[5]: https://llvm.org/docs/LangRef.html#call-instruction
[6]: https://clang.llvm.org/
[7]: https://github.com/nodejs/node
This diff is collapsed.
{
'targets': [
{
'target_name': 'llhttp',
'type': 'static_library',
'include_dirs': [ '.', 'include' ],
'direct_dependent_settings': {
'include_dirs': [ 'include' ],
},
'sources': [ 'src/llhttp.c', 'src/api.c', 'src/http.c' ],
},
]
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment