fixes #682 Support for Chunked Transfer Coding

This is the client side only, although the work is structured to support server applications. The chunked API is for now private, although the intent to is to make it public for applications who really want to use it. Note that chunked transfer encoding puts data through extra copies. First it copies through the buffering area (because I have to be able to extract variable length strings from inside the data stream), and then again to reassemble the chunks into a single unified object. We do assume that the user wants the entire thing as a single object. This means that using this to pull unbounded data will just silently consume all memory. Use caution!
author: Garrett D'Amore <garrett@damore.org> 2018-11-01 23:41:53 -0700
committer: Garrett D'Amore <garrett@damore.org> 2018-11-02 20:57:08 -0700
commit: db92342b43d429b8b07244cc003a8589a1b1c542 (patch)
tree: 5624a3142b8309257ff523b0bf85343bee08033d /src/supplemental/http/http_chunk.c
parent: 156604bd07ee60faa323968c71627f1c701b473a (diff)
download: nng-db92342b43d429b8b07244cc003a8589a1b1c542.tar.gz
nng-db92342b43d429b8b07244cc003a8589a1b1c542.tar.bz2
nng-db92342b43d429b8b07244cc003a8589a1b1c542.zip
1 files changed, 341 insertions, 0 deletions
diff --git a/src/supplemental/http/http_chunk.c b/src/supplemental/http/http_chunk.c
new file mode 100644
index 00000000..9333548e
--- /dev/null
+++ b/src/supplemental/http/http_chunk.c
@@ -0,0 +1,341 @@
+//
+// Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
+// Copyright 2018 Capitar IT Group BV <info@capitar.com>
+//
+// This software is supplied under the terms of the MIT License, a
+// copy of which should be located in the distribution where this
+// file was obtained (LICENSE.txt).  A copy of the license may also be
+// found online at https://opensource.org/licenses/MIT.
+//
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "core/nng_impl.h"
+
+#include "http_api.h"
+
+// Chunked transfer encoding support.
+
+// Note that HTTP/1.1 chunked transfer encoding is horrible, and should
+// be avoided if at all possible.  It necessarily creates extra need for
+// data copies, creates a lot of extra back and forth complexity.  If you're
+// stuck in this code, we feel great sympathy for you.
+//
+// We feel strongly enough about this that we refuse to provide any
+// method to automatically generate chunked transfers.  If you think
+// you need to send chunked transfers (because you have no idea how
+// much data you will send, such as a streaming workload), consider a
+// different method such as WebSocket to send your data.  Unbounded
+// entity body data is just impolite.
+
+enum chunk_state {
+	CS_INIT,   // initial state
+	CS_LEN,    // length
+	CS_EXT,    // random extension text (we ignore)
+	CS_CR,     // carriage return after length (and extensions)
+	CS_DATA,   // actual data
+	CS_TRLR,   // trailer
+	CS_TRLRCR, // CRLF at end of trailer
+	CS_DONE,
+};
+
+struct nng_http_chunks {
+	nni_list         cl_chunks;
+	size_t           cl_maxsz;
+	size_t           cl_size; // parsed size (so far)
+	size_t           cl_line; // bytes since last newline
+	enum chunk_state cl_state;
+};
+
+struct nng_http_chunk {
+	nni_list_node c_node;
+	size_t        c_size;
+	size_t        c_alloc;
+	size_t        c_resid; // residual data to transfer
+	char *        c_data;
+};
+
+int
+nni_http_chunks_init(nni_http_chunks **clp, size_t maxsz)
+{
+	nni_http_chunks *cl;
+
+	if ((cl = NNI_ALLOC_STRUCT(cl)) == NULL) {
+		return (NNG_ENOMEM);
+	}
+	NNI_LIST_INIT(&cl->cl_chunks, nni_http_chunk, c_node);
+	cl->cl_maxsz = maxsz;
+	*clp         = cl;
+	return (0);
+}
+
+void
+nni_http_chunks_free(nni_http_chunks *cl)
+{
+	nni_http_chunk *ch;
+	if (cl == NULL) {
+		return;
+	}
+	while ((ch = nni_list_first(&cl->cl_chunks)) != NULL) {
+		nni_list_remove(&cl->cl_chunks, ch);
+		if (ch->c_data != NULL) {
+			nni_free(ch->c_data, ch->c_alloc);
+		}
+		NNI_FREE_STRUCT(ch);
+	}
+	NNI_FREE_STRUCT(cl);
+}
+
+nni_http_chunk *
+nni_http_chunks_iter(nni_http_chunks *cl, nni_http_chunk *last)
+{
+	if (last == NULL) {
+		return (nni_list_first(&cl->cl_chunks));
+	}
+	return (nni_list_next(&cl->cl_chunks, last));
+}
+
+size_t
+nni_http_chunks_size(nni_http_chunks *cl)
+{
+	size_t          tot = 0;
+	nni_http_chunk *ch;
+	NNI_LIST_FOREACH (&cl->cl_chunks, ch) {
+		tot += ch->c_size;
+	}
+	return (tot);
+}
+
+size_t
+nni_http_chunk_size(nni_http_chunk *ch)
+{
+	return (ch->c_size);
+}
+
+void *
+nni_http_chunk_data(nni_http_chunk *ch)
+{
+	return (ch->c_data);
+}
+
+static int
+chunk_ingest_len(nni_http_chunks *cl, char c)
+{
+	if (isdigit(c)) {
+		cl->cl_size *= 16;
+		cl->cl_size += (c - '0');
+	} else if ((c >= 'A') && (c <= 'F')) {
+		cl->cl_size *= 16;
+		cl->cl_size += (c - 'A') + 10;
+	} else if ((c >= 'a') && (c <= 'f')) {
+		cl->cl_size *= 16;
+		cl->cl_size += (c - 'a') + 10;
+	} else if (c == ';') {
+		cl->cl_state = CS_EXT;
+	} else if (c == '\r') {
+		cl->cl_state = CS_CR;
+	} else {
+		return (NNG_EPROTO);
+	}
+	return (0);
+}
+
+static int
+chunk_ingest_ext(nni_http_chunks *cl, char c)
+{
+	if (c == '\r') {
+		cl->cl_state = CS_CR;
+	} else if (!isprint(c)) {
+		return (NNG_EPROTO);
+	}
+	return (0);
+}
+
+static int
+chunk_ingest_newline(nni_http_chunks *cl, char c)
+{
+	nni_http_chunk *chunk;
+
+	if (c != '\n') {
+		return (NNG_EPROTO);
+	}
+	if (cl->cl_size == 0) {
+		cl->cl_line  = 0;
+		cl->cl_state = CS_TRLR;
+		return (0);
+	}
+	if ((cl->cl_maxsz > 0) &&
+	    ((nni_http_chunks_size(cl) + cl->cl_size) > cl->cl_maxsz)) {
+		return (NNG_EMSGSIZE);
+	}
+	if ((chunk = NNI_ALLOC_STRUCT(chunk)) == NULL) {
+		return (NNG_ENOMEM);
+	}
+	// two extra bytes to accommodate trailing CRLF
+	if ((chunk->c_data = nni_alloc(cl->cl_size + 2)) == NULL) {
+		NNI_FREE_STRUCT(chunk);
+		return (NNG_ENOMEM);
+	}
+
+	// Data, so allocate a new chunk, stick it on the end of the list,
+	// and note that we have residual data needs.  The residual is
+	// to allow for the trailing CRLF to be consumed.
+	cl->cl_state   = CS_DATA;
+	chunk->c_size  = cl->cl_size;
+	chunk->c_alloc = cl->cl_size + 2;
+	chunk->c_resid = chunk->c_alloc;
+	nni_list_append(&cl->cl_chunks, chunk);
+
+	return (0);
+}
+
+static int
+chunk_ingest_trailer(nni_http_chunks *cl, char c)
+{
+	if (c == '\r') {
+		cl->cl_state = CS_TRLRCR;
+		return (0);
+	}
+	if (!isprint(c)) {
+		return (NNG_EPROTO);
+	}
+	cl->cl_line++;
+	return (0);
+}
+
+static int
+chunk_ingest_trailercr(nni_http_chunks *cl, char c)
+{
+	if (c != '\n') {
+		return (NNG_EPROTO);
+	}
+	if (cl->cl_line == 0) {
+		cl->cl_state = CS_DONE;
+		return (0);
+	}
+	cl->cl_line  = 0;
+	cl->cl_state = CS_TRLR;
+	return (0);
+}
+
+static int
+chunk_ingest_char(nni_http_chunks *cl, char c)
+{
+	int rv;
+	switch (cl->cl_state) {
+	case CS_INIT:
+		if (!isalnum(c)) {
+			rv = NNG_EPROTO;
+			break;
+		}
+		cl->cl_state = CS_LEN;
+		// fallthrough
+	case CS_LEN:
+		rv = chunk_ingest_len(cl, c);
+		break;
+	case CS_EXT:
+		rv = chunk_ingest_ext(cl, c);
+		break;
+	case CS_CR:
+		rv = chunk_ingest_newline(cl, c);
+		break;
+	case CS_TRLR:
+		rv = chunk_ingest_trailer(cl, c);
+		break;
+	case CS_TRLRCR:
+		rv = chunk_ingest_trailercr(cl, c);
+		break;
+	default:
+		// NB: No support for CS_DATA here, as that is handled
+		// in the caller for reasons of efficiency.
+		rv = NNG_EPROTO;
+		break;
+	}
+
+	return (rv);
+}
+
+static int
+chunk_ingest_data(nni_http_chunks *cl, char *buf, size_t n, size_t *lenp)
+{
+	nni_http_chunk *chunk;
+	size_t          offset;
+	char *          dest;
+
+	chunk = nni_list_last(&cl->cl_chunks);
+
+	NNI_ASSERT(chunk != NULL);
+	NNI_ASSERT(cl->cl_state == CS_DATA);
+	NNI_ASSERT(chunk->c_resid <= chunk->c_alloc);
+	NNI_ASSERT(chunk->c_alloc > 2); // not be zero, plus newlines
+
+	dest   = chunk->c_data;
+	offset = chunk->c_alloc - chunk->c_resid;
+	dest += offset;
+
+	if (n >= chunk->c_resid) {
+		n = chunk->c_resid;
+		memcpy(dest, buf, n);
+
+		if ((chunk->c_data[chunk->c_size] != '\r') ||
+		    (chunk->c_data[chunk->c_size + 1] != '\n')) {
+			return (NNG_EPROTO);
+		}
+		chunk->c_resid = 0;
+		cl->cl_state   = CS_INIT;
+		cl->cl_size    = 0;
+		cl->cl_line    = 0;
+		*lenp          = n;
+		return (0);
+	}
+
+	memcpy(dest, buf, n);
+	chunk->c_resid -= n;
+	*lenp = n;
+	return (0);
+}
+
+int
+nni_http_chunks_parse(nni_http_chunks *cl, void *buf, size_t n, size_t *lenp)
+{
+	size_t i   = 0;
+	char * src = buf;
+
+	// Format of this data is <hexdigits> [ ; <ascii> CRLF ]
+	// The <ascii> are chunk extensions, and we don't support any.
+
+	while ((cl->cl_state != CS_DONE) && (i < n)) {
+		int    rv;
+		size_t cnt;
+		switch (cl->cl_state) {
+		case CS_DONE:
+			// Completed parse!
+			break;
+
+		case CS_DATA:
+			if ((rv = chunk_ingest_data(cl, src + i, n, &cnt)) !=
+			    0) {
+				return (rv);
+			}
+			i += cnt;
+			break;
+
+		default:
+			// All others character by character parse through
+			// the state machine grinder.
+			if ((rv = chunk_ingest_char(cl, src[i])) != 0) {
+				return (rv);
+			}
+			i++;
+			break;
+		}
+	}
+
+	*lenp = i;
+	if (cl->cl_state != CS_DONE) {
+		return (NNG_EAGAIN);
+	}
+	return (0);
+}
author	Garrett D'Amore <garrett@damore.org>	2018-11-01 23:41:53 -0700
committer	Garrett D'Amore <garrett@damore.org>	2018-11-02 20:57:08 -0700
commit	db92342b43d429b8b07244cc003a8589a1b1c542 (patch)
tree	5624a3142b8309257ff523b0bf85343bee08033d /src/supplemental/http/http_chunk.c
parent	156604bd07ee60faa323968c71627f1c701b473a (diff)
download	nng-db92342b43d429b8b07244cc003a8589a1b1c542.tar.gz nng-db92342b43d429b8b07244cc003a8589a1b1c542.tar.bz2 nng-db92342b43d429b8b07244cc003a8589a1b1c542.zip