commit 49d5a8ae1e1ab97f176d25618cb768e1e54a6b3a
parent cb449cfa2b92fbdd397e35cc39656b33510d19b1
Author: Mikołaj Lenczewski <mblenczewski@gmail.com>
Date: Sat, 29 Nov 2025 19:42:58 +0000
Added picohttpparser and my own http parser implementations to bench.c
Diffstat:
| M | .gitignore | | | 6 | ++++-- |
| M | bench.c | | | 128 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
| A | build.sh | | | 5 | +++++ |
| A | clean.sh | | | 5 | +++++ |
| A | mblhttp.h | | | 159 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | picohttpparser.c | | | 707 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | picohttpparser.h | | | 90 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
7 files changed, 1076 insertions(+), 24 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
*.out
-bench
-\ No newline at end of file
+bench
+
+**/.*.swp
+tags
diff --git a/bench.c b/bench.c
@@ -1,5 +1,7 @@
// A benchmark adapted from https://github.com/h2o/picohttpparser
+#define _GNU_SOURCE 1
+
#include <assert.h>
#include <stdio.h>
#include <time.h>
@@ -8,6 +10,11 @@
#define HTTPP_IMPLEMENTATION
#include "httpp.h"
+#include "picohttpparser.h"
+#include "picohttpparser.c"
+
+#include "mblhttp.h"
+
#define REQ \
"GET /cookies HTTP/1.1\r\n" \
"Host: 127.0.0.1:8090\r\n" \
@@ -25,34 +32,111 @@
double now_seconds()
{
- struct timespec ts;
- clock_gettime(CLOCK_MONOTONIC, &ts);
- return ts.tv_sec + ts.tv_nsec / 1e9;
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec + ts.tv_nsec / 1e9;
+}
+
+static double
+bench_httpp(char *req, size_t len)
+{
+ double t0 = now_seconds();
+
+ for (int i = 0; i < ITERATIONS; i++) {
+ httpp_req_t *result = httpp_parse_request(req);
+ assert(result);
+
+ httpp_req_free(result);
+ }
+
+ double t1 = now_seconds();
+
+ return t1 - t0;
+}
+
+static double
+bench_phr(char *req, size_t len)
+{
+ double t0 = now_seconds();
+
+ size_t num_hdrs = 64;
+ struct phr_header hdrs[num_hdrs];
+
+ char const *method, *path;
+ size_t method_len, path_len;
+ int minor_version;
+
+ for (int i = 0; i < ITERATIONS; i++) {
+ int res = phr_parse_request(req, len,
+ &method, &method_len,
+ &path, &path_len,
+ &minor_version,
+ hdrs, &num_hdrs,
+ 0);
+ assert(res > 0);
+ }
+
+ double t1 = now_seconds();
+
+ return t1 - t0;
}
-int main()
+static double
+bench_mbl(char *req, size_t len)
{
- char* raw = REQ;
- size_t raw_len = strlen(raw);
- int i;
+ double t0 = now_seconds();
+
+ size_t num_hdrs = 64;
+ struct mbl_http_header hdrs[num_hdrs];
+
+ struct mbl_http_msg msg;
+
+ for (int i = 0; i < ITERATIONS; i++) {
+ int res = mbl_http_msg_parse(&msg, req, len, hdrs, num_hdrs);
+ assert(res >= 0);
+ }
+
+ double t1 = now_seconds();
+
+#if 1
+ struct mbl_http_header *hdr = mbl_http_msg_find_header(&msg, "hOST", 4);
+ assert(hdr);
- double t0 = now_seconds();
- for (i = 0; i < ITERATIONS; ++i) {
- httpp_req_t* req = httpp_parse_request(raw);
- if (!req) {
- fprintf(stderr, "parse failed at iter %d\n", i);
- return 1;
- }
+ printf("parsed http request line: %.*s\n",
+ msg.request_line.len, msg.request_line.ptr);
- httpp_req_free(req);
- }
- double t1 = now_seconds();
+ printf("parsed http request headers: %zu/%zu\n",
+ msg.hdrs_len, msg.hdrs_cap);
+ for (size_t i = 0; i < msg.hdrs_len; i++) {
+ printf("\t%zu = { key: \"%.*s\", val: \"%.*s\" }\n",
+ i, msg.hdrs[i].key.len, msg.hdrs[i].key.ptr,
+ msg.hdrs[i].val.len, msg.hdrs[i].val.ptr);
+ }
- double elapsed = t1 - t0;
- double reqs_per_second = (double) ITERATIONS / elapsed;
+ printf("parsed http request body: %d bytes\n", msg.body.len);
+ printf("%.*s\n", msg.body.len, (char *) msg.body.ptr);
+#endif
+
+ return t1 - t0;
+}
+
+static void
+stats(char const *name, double elapsed)
+{
+ printf("Benchmarking: %s\n", name);
+ printf("Elapsed %f seconds.\n", elapsed);
+ printf("Requests per second ≈ %.2f \n", ITERATIONS / elapsed);
+}
+
+int
+main()
+{
+ char* req = REQ;
+ size_t len = strlen(REQ);
- printf("Elapsed %f seconds.\n", elapsed);
- printf("Requests per second ≈ %.2f \n", reqs_per_second);
+ stats("httpp", bench_httpp(req, len));
+ stats("phr", bench_phr(req, len));
+ stats("mbl", bench_mbl(req, len));
- return 0;
+ return 0;
}
diff --git a/build.sh b/build.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -ex
+
+cc -o bench bench.c -Wall -Wextra -std=c11 -O0 -g3
diff --git a/clean.sh b/clean.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -ex
+
+rm bench
diff --git a/mblhttp.h b/mblhttp.h
@@ -0,0 +1,159 @@
+#ifndef MBLHTTP_H
+#define MBLHTTP_H
+
+#include <ctype.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+struct mbl_http_str {
+ char *ptr;
+ int len;
+};
+
+struct mbl_http_buf {
+ void *ptr;
+ int len;
+};
+
+struct mbl_http_header {
+ struct mbl_http_str key, val;
+};
+
+struct mbl_http_msg {
+ struct mbl_http_str request_line;
+
+ struct mbl_http_header *hdrs;
+ size_t hdrs_len, hdrs_cap;
+
+ struct mbl_http_buf body;
+};
+
+/* please ignore the stray `static inline` functions, this really probably
+ * be split out into its own .c source file, but this is faster and much
+ * less faff :)
+ */
+
+static inline int
+parse_request_line(char **ptr, char *end, struct mbl_http_str *out)
+{
+ char *start = *ptr;
+ while (start < end) {
+ if (*start++ == '\r' && *start++ == '\n') {
+ out->ptr = *ptr;
+ out->len = (start - *ptr) - 2;
+
+ *ptr = start;
+
+ return 0;
+ }
+ }
+
+ *ptr = start;
+
+ return -1;
+}
+
+static inline int
+parse_request_header(char *ptr, char *end, struct mbl_http_header *out)
+{
+ char *delim = memchr(ptr, ':', end - ptr);
+ if (!delim) /* malformed http header */
+ return -1;
+
+ out->key.ptr = ptr;
+ out->key.len = delim - ptr;
+
+ out->val.ptr = ++delim;
+ out->val.len = end - delim;
+
+ /* ltrim key */
+ while (isspace(*out->key.ptr) && out->key.len) {
+ out->key.ptr++;
+ out->key.len--;
+ }
+
+ /* ltrim val */
+ while (isspace(*out->val.ptr) && out->val.len) {
+ out->val.ptr++;
+ out->val.len--;
+ }
+
+ return 0;
+}
+
+static inline int
+parse_request_headers(char **ptr, char *end,
+ struct mbl_http_header *hdrs, size_t cap, size_t *len)
+{
+ char *start = *ptr;
+
+ char term[2] = { '\r', '\n' };
+
+ *len = 0;
+
+ while (start < end) {
+ char *hdr_begin = start;
+ char *hdr_end = memmem(start, end - start, term, sizeof term);
+
+ if (!hdr_end) /* no \r\n found, malformed http request */
+ return -1;
+
+ if (hdr_begin == hdr_end) { /* final \r\n */
+ start = hdr_end + sizeof term;
+ break;
+ }
+
+ /* have header, parse it */
+ if (*len < cap) {
+ struct mbl_http_header *hdr = hdrs + (*len)++;
+ if (parse_request_header(hdr_begin, hdr_end, hdr) < 0)
+ return -2;
+ }
+
+ start = hdr_end + sizeof term;
+ }
+
+ *ptr = start;
+
+ return 0;
+}
+
+static inline int
+mbl_http_msg_parse(struct mbl_http_msg *msg, char *buf, size_t len,
+ struct mbl_http_header hdrs[], size_t hdrs_cap)
+{
+ msg->hdrs = hdrs;
+ msg->hdrs_cap = hdrs_cap;
+
+ char *ptr = buf, *end = buf + len;
+ if (parse_request_line(&ptr, end, &msg->request_line) < 0)
+ return -1;
+
+ if (parse_request_headers(&ptr, end, msg->hdrs, msg->hdrs_cap, &msg->hdrs_len) < 0)
+ return -2;
+
+ msg->body.ptr = ptr;
+ msg->body.len = end - ptr;
+
+ return 0;
+}
+
+static inline struct mbl_http_header *
+mbl_http_msg_find_header(struct mbl_http_msg *msg, char const *key, int len)
+{
+ for (size_t i = 0; i < msg->hdrs_len; i++) {
+ struct mbl_http_header *hdr = msg->hdrs + i;
+
+ if (hdr->key.len != len)
+ continue;
+
+ if (strncasecmp(hdr->key.ptr, key, len) == 0)
+ return hdr;
+ }
+
+ return NULL;
+}
+
+#endif /* MBLHTTP_H */
diff --git a/picohttpparser.c b/picohttpparser.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ * Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef __SSE4_2__
+#ifdef _MSC_VER
+#include <nmmintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+#include "picohttpparser.h"
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+
+#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+
+#define CHECK_EOF() \
+ if (buf == buf_end) { \
+ *ret = -2; \
+ return NULL; \
+ }
+
+#define EXPECT_CHAR_NO_CHECK(ch) \
+ if (*buf++ != ch) { \
+ *ret = -1; \
+ return NULL; \
+ }
+
+#define EXPECT_CHAR(ch) \
+ CHECK_EOF(); \
+ EXPECT_CHAR_NO_CHECK(ch);
+
+#define ADVANCE_TOKEN(tok, toklen) \
+ do { \
+ const char *tok_start = buf; \
+ static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
+ int found2; \
+ buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
+ if (!found2) { \
+ CHECK_EOF(); \
+ } \
+ while (1) { \
+ if (*buf == ' ') { \
+ break; \
+ } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
+ if ((unsigned char)*buf < '\040' || *buf == '\177') { \
+ *ret = -1; \
+ return NULL; \
+ } \
+ } \
+ ++buf; \
+ CHECK_EOF(); \
+ } \
+ tok = tok_start; \
+ toklen = buf - tok_start; \
+ } while (0)
+
+static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+ "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+ "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
+{
+ *found = 0;
+#if __SSE4_2__
+ if (likely(buf_end - buf >= 16)) {
+ __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
+
+ size_t left = (buf_end - buf) & ~15;
+ do {
+ __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
+ int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+ if (unlikely(r != 16)) {
+ buf += r;
+ *found = 1;
+ break;
+ }
+ buf += 16;
+ left -= 16;
+ } while (likely(left != 0));
+ }
+#else
+ /* suppress unused parameter warning */
+ (void)buf_end;
+ (void)ranges;
+ (void)ranges_size;
+#endif
+ return buf;
+}
+
+static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
+{
+ const char *token_start = buf;
+
+#ifdef __SSE4_2__
+ static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
+ "\012\037" /* allow SP and up to but not including DEL */
+ "\177\177"; /* allow chars w. MSB set */
+ int found;
+ buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
+ if (found)
+ goto FOUND_CTL;
+#else
+ /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
+ while (likely(buf_end - buf >= 8)) {
+#define DOIT() \
+ do { \
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
+ goto NonPrintable; \
+ ++buf; \
+ } while (0)
+ DOIT();
+ DOIT();
+ DOIT();
+ DOIT();
+ DOIT();
+ DOIT();
+ DOIT();
+ DOIT();
+#undef DOIT
+ continue;
+ NonPrintable:
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+ goto FOUND_CTL;
+ }
+ ++buf;
+ }
+#endif
+ for (;; ++buf) {
+ CHECK_EOF();
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+ goto FOUND_CTL;
+ }
+ }
+ }
+FOUND_CTL:
+ if (likely(*buf == '\015')) {
+ ++buf;
+ EXPECT_CHAR('\012');
+ *token_len = buf - 2 - token_start;
+ } else if (*buf == '\012') {
+ *token_len = buf - token_start;
+ ++buf;
+ } else {
+ *ret = -1;
+ return NULL;
+ }
+ *token = token_start;
+
+ return buf;
+}
+
+static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
+{
+ int ret_cnt = 0;
+ buf = last_len < 3 ? buf : buf + last_len - 3;
+
+ while (1) {
+ CHECK_EOF();
+ if (*buf == '\015') {
+ ++buf;
+ CHECK_EOF();
+ EXPECT_CHAR('\012');
+ ++ret_cnt;
+ } else if (*buf == '\012') {
+ ++buf;
+ ++ret_cnt;
+ } else {
+ ++buf;
+ ret_cnt = 0;
+ }
+ if (ret_cnt == 2) {
+ return buf;
+ }
+ }
+
+ *ret = -2;
+ return NULL;
+}
+
+#define PARSE_INT(valp_, mul_) \
+ if (*buf < '0' || '9' < *buf) { \
+ buf++; \
+ *ret = -1; \
+ return NULL; \
+ } \
+ *(valp_) = (mul_) * (*buf++ - '0');
+
+#define PARSE_INT_3(valp_) \
+ do { \
+ int res_ = 0; \
+ PARSE_INT(&res_, 100) \
+ *valp_ = res_; \
+ PARSE_INT(&res_, 10) \
+ *valp_ += res_; \
+ PARSE_INT(&res_, 1) \
+ *valp_ += res_; \
+ } while (0)
+
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
+ int *ret)
+{
+ /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
+ * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
+ static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */
+ "\"\"" /* 0x22 */
+ "()" /* 0x28,0x29 */
+ ",," /* 0x2c */
+ "//" /* 0x2f */
+ ":@" /* 0x3a-0x40 */
+ "[]" /* 0x5b-0x5d */
+ "{\xff"; /* 0x7b-0xff */
+ const char *buf_start = buf;
+ int found;
+ buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
+ if (!found) {
+ CHECK_EOF();
+ }
+ while (1) {
+ if (*buf == next_char) {
+ break;
+ } else if (!token_char_map[(unsigned char)*buf]) {
+ *ret = -1;
+ return NULL;
+ }
+ ++buf;
+ CHECK_EOF();
+ }
+ *token = buf_start;
+ *token_len = buf - buf_start;
+ return buf;
+}
+
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
+{
+ /* we want at least [HTTP/1.<two chars>] to try to parse */
+ if (buf_end - buf < 9) {
+ *ret = -2;
+ return NULL;
+ }
+ EXPECT_CHAR_NO_CHECK('H');
+ EXPECT_CHAR_NO_CHECK('T');
+ EXPECT_CHAR_NO_CHECK('T');
+ EXPECT_CHAR_NO_CHECK('P');
+ EXPECT_CHAR_NO_CHECK('/');
+ EXPECT_CHAR_NO_CHECK('1');
+ EXPECT_CHAR_NO_CHECK('.');
+ PARSE_INT(minor_version, 1);
+ return buf;
+}
+
+static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
+ size_t max_headers, int *ret)
+{
+ for (;; ++*num_headers) {
+ CHECK_EOF();
+ if (*buf == '\015') {
+ ++buf;
+ EXPECT_CHAR('\012');
+ break;
+ } else if (*buf == '\012') {
+ ++buf;
+ break;
+ }
+ if (*num_headers == max_headers) {
+ *ret = -1;
+ return NULL;
+ }
+ if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
+ /* parsing name, but do not discard SP before colon, see
+ * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
+ if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
+ return NULL;
+ }
+ if (headers[*num_headers].name_len == 0) {
+ *ret = -1;
+ return NULL;
+ }
+ ++buf;
+ for (;; ++buf) {
+ CHECK_EOF();
+ if (!(*buf == ' ' || *buf == '\t')) {
+ break;
+ }
+ }
+ } else {
+ headers[*num_headers].name = NULL;
+ headers[*num_headers].name_len = 0;
+ }
+ const char *value;
+ size_t value_len;
+ if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
+ return NULL;
+ }
+ /* remove trailing SPs and HTABs */
+ const char *value_end = value + value_len;
+ for (; value_end != value; --value_end) {
+ const char c = *(value_end - 1);
+ if (!(c == ' ' || c == '\t')) {
+ break;
+ }
+ }
+ headers[*num_headers].value = value;
+ headers[*num_headers].value_len = value_end - value;
+ }
+ return buf;
+}
+
+static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
+ size_t max_headers, int *ret)
+{
+ /* skip first empty line (some clients add CRLF after POST content) */
+ CHECK_EOF();
+ if (*buf == '\015') {
+ ++buf;
+ EXPECT_CHAR('\012');
+ } else if (*buf == '\012') {
+ ++buf;
+ }
+
+ /* parse request line */
+ if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
+ return NULL;
+ }
+ do {
+ ++buf;
+ CHECK_EOF();
+ } while (*buf == ' ');
+ ADVANCE_TOKEN(*path, *path_len);
+ do {
+ ++buf;
+ CHECK_EOF();
+ } while (*buf == ' ');
+ if (*method_len == 0 || *path_len == 0) {
+ *ret = -1;
+ return NULL;
+ }
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+ return NULL;
+ }
+ if (*buf == '\015') {
+ ++buf;
+ EXPECT_CHAR('\012');
+ } else if (*buf == '\012') {
+ ++buf;
+ } else {
+ *ret = -1;
+ return NULL;
+ }
+
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+
+int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+ const char *buf = buf_start, *buf_end = buf_start + len;
+ size_t max_headers = *num_headers;
+ int r;
+
+ *method = NULL;
+ *method_len = 0;
+ *path = NULL;
+ *path_len = 0;
+ *minor_version = -1;
+ *num_headers = 0;
+
+ /* if last_len != 0, check if the request is complete (a fast countermeasure
+ againt slowloris */
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+ return r;
+ }
+
+ if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
+ &r)) == NULL) {
+ return r;
+ }
+
+ return (int)(buf - buf_start);
+}
+
+static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
+ size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
+{
+ /* parse "HTTP/1.x" */
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+ return NULL;
+ }
+ /* skip space */
+ if (*buf != ' ') {
+ *ret = -1;
+ return NULL;
+ }
+ do {
+ ++buf;
+ CHECK_EOF();
+ } while (*buf == ' ');
+ /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
+ if (buf_end - buf < 4) {
+ *ret = -2;
+ return NULL;
+ }
+ PARSE_INT_3(status);
+
+ /* get message including preceding space */
+ if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
+ return NULL;
+ }
+ if (*msg_len == 0) {
+ /* ok */
+ } else if (**msg == ' ') {
+ /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
+ * before running past the end of the given buffer. */
+ do {
+ ++*msg;
+ --*msg_len;
+ } while (**msg == ' ');
+ } else {
+ /* garbage found after status code */
+ *ret = -1;
+ return NULL;
+ }
+
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+
+int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
+ struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+ const char *buf = buf_start, *buf_end = buf + len;
+ size_t max_headers = *num_headers;
+ int r;
+
+ *minor_version = -1;
+ *status = 0;
+ *msg = NULL;
+ *msg_len = 0;
+ *num_headers = 0;
+
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
+ against slowloris */
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+ return r;
+ }
+
+ if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
+ return r;
+ }
+
+ return (int)(buf - buf_start);
+}
+
+int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+ const char *buf = buf_start, *buf_end = buf + len;
+ size_t max_headers = *num_headers;
+ int r;
+
+ *num_headers = 0;
+
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
+ against slowloris */
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+ return r;
+ }
+
+ if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
+ return r;
+ }
+
+ return (int)(buf - buf_start);
+}
+
+enum {
+ CHUNKED_IN_CHUNK_SIZE,
+ CHUNKED_IN_CHUNK_EXT,
+ CHUNKED_IN_CHUNK_HEADER_EXPECT_LF,
+ CHUNKED_IN_CHUNK_DATA,
+ CHUNKED_IN_CHUNK_DATA_EXPECT_CR,
+ CHUNKED_IN_CHUNK_DATA_EXPECT_LF,
+ CHUNKED_IN_TRAILERS_LINE_HEAD,
+ CHUNKED_IN_TRAILERS_LINE_MIDDLE
+};
+
+static int decode_hex(int ch)
+{
+ if ('0' <= ch && ch <= '9') {
+ return ch - '0';
+ } else if ('A' <= ch && ch <= 'F') {
+ return ch - 'A' + 0xa;
+ } else if ('a' <= ch && ch <= 'f') {
+ return ch - 'a' + 0xa;
+ } else {
+ return -1;
+ }
+}
+
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
+{
+ size_t dst = 0, src = 0, bufsz = *_bufsz;
+ ssize_t ret = -2; /* incomplete */
+
+ decoder->_total_read += bufsz;
+
+ while (1) {
+ switch (decoder->_state) {
+ case CHUNKED_IN_CHUNK_SIZE:
+ for (;; ++src) {
+ int v;
+ if (src == bufsz)
+ goto Exit;
+ if ((v = decode_hex(buf[src])) == -1) {
+ if (decoder->_hex_count == 0) {
+ ret = -1;
+ goto Exit;
+ }
+ /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
+ switch (buf[src]) {
+ case ' ':
+ case '\011':
+ case ';':
+ case '\012':
+ case '\015':
+ break;
+ default:
+ ret = -1;
+ goto Exit;
+ }
+ break;
+ }
+ if (decoder->_hex_count == sizeof(size_t) * 2) {
+ ret = -1;
+ goto Exit;
+ }
+ decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
+ ++decoder->_hex_count;
+ }
+ decoder->_hex_count = 0;
+ decoder->_state = CHUNKED_IN_CHUNK_EXT;
+ /* fallthru */
+ case CHUNKED_IN_CHUNK_EXT:
+ /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
+ for (;; ++src) {
+ if (src == bufsz)
+ goto Exit;
+ if (buf[src] == '\015') {
+ break;
+ } else if (buf[src] == '\012') {
+ ret = -1;
+ goto Exit;
+ }
+ }
+ ++src;
+ decoder->_state = CHUNKED_IN_CHUNK_HEADER_EXPECT_LF;
+ /* fallthru */
+ case CHUNKED_IN_CHUNK_HEADER_EXPECT_LF:
+ if (src == bufsz)
+ goto Exit;
+ if (buf[src] != '\012') {
+ ret = -1;
+ goto Exit;
+ }
+ ++src;
+ if (decoder->bytes_left_in_chunk == 0) {
+ if (decoder->consume_trailer) {
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+ break;
+ } else {
+ goto Complete;
+ }
+ }
+ decoder->_state = CHUNKED_IN_CHUNK_DATA;
+ /* fallthru */
+ case CHUNKED_IN_CHUNK_DATA: {
+ size_t avail = bufsz - src;
+ if (avail < decoder->bytes_left_in_chunk) {
+ if (dst != src)
+ memmove(buf + dst, buf + src, avail);
+ src += avail;
+ dst += avail;
+ decoder->bytes_left_in_chunk -= avail;
+ goto Exit;
+ }
+ if (dst != src)
+ memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
+ src += decoder->bytes_left_in_chunk;
+ dst += decoder->bytes_left_in_chunk;
+ decoder->bytes_left_in_chunk = 0;
+ decoder->_state = CHUNKED_IN_CHUNK_DATA_EXPECT_CR;
+ }
+ /* fallthru */
+ case CHUNKED_IN_CHUNK_DATA_EXPECT_CR:
+ if (src == bufsz)
+ goto Exit;
+ if (buf[src] != '\015') {
+ ret = -1;
+ goto Exit;
+ }
+ ++src;
+ decoder->_state = CHUNKED_IN_CHUNK_DATA_EXPECT_LF;
+ /* fallthru */
+ case CHUNKED_IN_CHUNK_DATA_EXPECT_LF:
+ if (src == bufsz)
+ goto Exit;
+ if (buf[src] != '\012') {
+ ret = -1;
+ goto Exit;
+ }
+ ++src;
+ decoder->_state = CHUNKED_IN_CHUNK_SIZE;
+ break;
+ case CHUNKED_IN_TRAILERS_LINE_HEAD:
+ for (;; ++src) {
+ if (src == bufsz)
+ goto Exit;
+ if (buf[src] != '\015')
+ break;
+ }
+ if (buf[src++] == '\012')
+ goto Complete;
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
+ /* fallthru */
+ case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
+ for (;; ++src) {
+ if (src == bufsz)
+ goto Exit;
+ if (buf[src] == '\012')
+ break;
+ }
+ ++src;
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+ break;
+ default:
+ assert(!"decoder is corrupt");
+ }
+ }
+
+Complete:
+ ret = bufsz - src;
+Exit:
+ if (dst != src)
+ memmove(buf + dst, buf + src, bufsz - src);
+ *_bufsz = dst;
+ /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
+ if (ret == -2) {
+ decoder->_total_overhead += bufsz - dst;
+ if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
+ ret = -1;
+ }
+ return ret;
+}
+
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
+{
+ return decoder->_state == CHUNKED_IN_CHUNK_DATA;
+}
+
+#undef CHECK_EOF
+#undef EXPECT_CHAR
+#undef ADVANCE_TOKEN
diff --git a/picohttpparser.h b/picohttpparser.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ * Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef picohttpparser_h
+#define picohttpparser_h
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#ifdef _MSC_VER
+#define ssize_t intptr_t
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* contains name and value of a header (name == NULL if is a continuing line
+ * of a multiline header */
+struct phr_header {
+ const char *name;
+ size_t name_len;
+ const char *value;
+ size_t value_len;
+};
+
+/* returns number of bytes consumed if successful, -2 if request is partial,
+ * -1 if failed */
+int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
+ int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len);
+
+/* ditto */
+int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
+ struct phr_header *headers, size_t *num_headers, size_t last_len);
+
+/* ditto */
+int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len);
+
+/* should be zero-filled before start */
+struct phr_chunked_decoder {
+ size_t bytes_left_in_chunk; /* number of bytes left in current chunk */
+ char consume_trailer; /* if trailing headers should be consumed */
+ char _hex_count;
+ char _state;
+ uint64_t _total_read;
+ uint64_t _total_overhead;
+};
+
+/* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
+ * encoding headers. When the function returns without an error, bufsz is
+ * updated to the length of the decoded data available. Applications should
+ * repeatedly call the function while it returns -2 (incomplete) every time
+ * supplying newly arrived data. If the end of the chunked-encoded data is
+ * found, the function returns a non-negative number indicating the number of
+ * octets left undecoded, that starts from the offset returned by `*bufsz`.
+ * Returns -1 on error.
+ */
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
+
+/* returns if the chunked decoder is in middle of chunked data */
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif