diff options
Diffstat (limited to 'src/protocol/reqrep0')
| -rw-r--r-- | src/protocol/reqrep0/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/protocol/reqrep0/rep.c | 621 | ||||
| -rw-r--r-- | src/protocol/reqrep0/req.c | 861 | ||||
| -rw-r--r-- | src/protocol/reqrep0/xrep.c | 434 | ||||
| -rw-r--r-- | src/protocol/reqrep0/xreq.c | 324 |
5 files changed, 1707 insertions, 539 deletions
diff --git a/src/protocol/reqrep0/CMakeLists.txt b/src/protocol/reqrep0/CMakeLists.txt index 7b04aa2d..071c28f1 100644 --- a/src/protocol/reqrep0/CMakeLists.txt +++ b/src/protocol/reqrep0/CMakeLists.txt @@ -11,12 +11,14 @@ # Req/Rep protocol if (NNG_PROTO_REQ0) - set(REQ0_SOURCES protocol/reqrep0/req.c protocol/reqrep0/req.h) + set(REQ0_SOURCES protocol/reqrep0/req.c protocol/reqrep0/xreq.c + protocol/reqrep0/req.h) set(REQ0_HEADERS protocol/reqrep0/req.h) endif() if (NNG_PROTO_REP0) - set(REP0_SOURCES protocol/reqrep0/rep.c protocol/reqrep0/rep.h) + set(REP0_SOURCES protocol/reqrep0/rep.c protocol/reqrep0/xrep.c + protocol/reqrep0/rep.h) set(REP0_HEADERS protocol/reqrep0/rep.h) endif() diff --git a/src/protocol/reqrep0/rep.c b/src/protocol/reqrep0/rep.c index 78a1f2ee..e512c18b 100644 --- a/src/protocol/reqrep0/rep.c +++ b/src/protocol/reqrep0/rep.c @@ -28,48 +28,219 @@ typedef struct rep0_pipe rep0_pipe; typedef struct rep0_sock rep0_sock; +typedef struct rep0_ctx rep0_ctx; -static void rep0_sock_getq_cb(void *); -static void rep0_pipe_getq_cb(void *); -static void rep0_pipe_putq_cb(void *); static void rep0_pipe_send_cb(void *); static void rep0_pipe_recv_cb(void *); static void rep0_pipe_fini(void *); +struct rep0_ctx { + rep0_sock * sock; + bool closed; + char * btrace; + size_t btrace_len; + size_t btrace_size; + int ttl; + uint32_t pipe_id; + nni_aio * saio; // send aio + nni_aio * raio; // recv aio + nni_list_node sqnode; + nni_list_node rqnode; +}; + // rep0_sock is our per-socket protocol private structure. struct rep0_sock { - nni_msgq * uwq; - nni_msgq * urq; - nni_mtx lk; - int ttl; - nni_idhash *pipes; - char * btrace; - size_t btrace_len; - nni_aio * aio_getq; + nni_mtx lk; + int ttl; + nni_idhash * pipes; + nni_list recvpipes; // list of pipes with data to receive + nni_list recvq; + bool closed; + rep0_ctx * ctx; + nni_pollable *recvable; + nni_pollable *sendable; }; // rep0_pipe is our per-pipe protocol private structure. struct rep0_pipe { - nni_pipe * pipe; - rep0_sock *rep; - nni_msgq * sendq; - nni_aio * aio_getq; - nni_aio * aio_send; - nni_aio * aio_recv; - nni_aio * aio_putq; + nni_pipe * pipe; + rep0_sock * rep; + uint32_t id; + nni_aio * aio_send; + nni_aio * aio_recv; + nni_list_node rnode; // receivable list linkage + nni_list sendq; // contexts waiting to send + bool busy; }; static void +rep0_ctx_close(void *arg) +{ + rep0_ctx * ctx = arg; + rep0_sock *s = ctx->sock; + nni_aio * aio; + + nni_mtx_lock(&s->lk); + ctx->closed = true; + if ((aio = ctx->saio) != NULL) { + nni_msg *msg; + nni_list_node_remove(&ctx->sqnode); + msg = nni_aio_get_msg(aio); + nni_msg_free(msg); + nni_aio_finish_error(aio, NNG_ECLOSED); + } + if ((aio = ctx->raio) != NULL) { + nni_list_remove(&s->recvq, ctx); + ctx->raio = NULL; + nni_aio_finish_error(aio, NNG_ECLOSED); + } + nni_mtx_unlock(&s->lk); +} + +static void +rep0_ctx_fini(void *arg) +{ + rep0_ctx *ctx = arg; + + rep0_ctx_close(ctx); + nni_free(ctx->btrace, ctx->btrace_size); + NNI_FREE_STRUCT(ctx); +} + +static int +rep0_ctx_init(void **ctxp, void *sarg) +{ + rep0_sock *s = sarg; + rep0_ctx * ctx; + + if ((ctx = NNI_ALLOC_STRUCT(ctx)) == NULL) { + return (NNG_ENOMEM); + } + + // this is 1kB, which covers the worst case. + ctx->btrace_size = 256 * sizeof(uint32_t); + if ((ctx->btrace = nni_alloc(ctx->btrace_size)) == NULL) { + NNI_FREE_STRUCT(ctx); + return (NNG_ENOMEM); + } + NNI_LIST_NODE_INIT(&ctx->sqnode); + NNI_LIST_NODE_INIT(&ctx->rqnode); + ctx->btrace_len = 0; + ctx->sock = s; + ctx->pipe_id = 0; + *ctxp = ctx; + + return (0); +} + +static void +rep0_ctx_cancel_send(nni_aio *aio, int rv) +{ + rep0_ctx * ctx = nni_aio_get_prov_data(aio); + rep0_sock *s = ctx->sock; + + nni_mtx_lock(&s->lk); + if (ctx->saio != aio) { + nni_mtx_unlock(&s->lk); + return; + } + nni_list_node_remove(&ctx->sqnode); + ctx->saio = NULL; + nni_mtx_unlock(&s->lk); + + nni_msg_header_clear(nni_aio_get_msg(aio)); // reset the headers + nni_aio_finish_error(aio, rv); +} + +static void +rep0_ctx_send(void *arg, nni_aio *aio) +{ + rep0_ctx * ctx = arg; + rep0_sock *s = ctx->sock; + rep0_pipe *p; + nni_msg * msg; + int rv; + size_t len; + uint32_t p_id; // pipe id + + msg = nni_aio_get_msg(aio); + nni_msg_header_clear(msg); + + nni_mtx_lock(&s->lk); + len = ctx->btrace_len; + p_id = ctx->pipe_id; + + // Assert "completion" of the previous req request. This ensures + // exactly one send for one receive ordering. + ctx->btrace_len = 0; + ctx->pipe_id = 0; + + if (ctx == s->ctx) { + // No matter how this goes, we will no longer be able + // to send on the socket (root context). That's because + // we will have finished (successfully or otherwise) the + // reply for the single request we got. + nni_pollable_clear(s->sendable); + } + + if (nni_aio_start(aio, rep0_ctx_cancel_send, ctx) != 0) { + nni_mtx_unlock(&s->lk); + return; + } + if (ctx->closed) { + nni_mtx_unlock(&s->lk); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if (len == 0) { + nni_mtx_unlock(&s->lk); + nni_aio_finish_error(aio, NNG_ESTATE); + return; + } + if ((rv = nni_msg_header_append(msg, ctx->btrace, len)) != 0) { + nni_mtx_unlock(&s->lk); + nni_aio_finish_error(aio, rv); + return; + } + + if ((rv = nni_idhash_find(s->pipes, p_id, (void **) &p)) != 0) { + // Pipe is gone. Make this look like a good send to avoid + // disrupting the state machine. We don't care if the peer + // lost interest in our reply. + nni_aio_set_msg(aio, NULL); + nni_mtx_unlock(&s->lk); + nni_aio_finish(aio, 0, nni_msg_len(msg)); + nni_msg_free(msg); + return; + } + if (p->busy) { + ctx->saio = aio; + nni_list_append(&p->sendq, ctx); + nni_mtx_unlock(&s->lk); + return; + } + + p->busy = true; + len = nni_msg_len(msg); + nni_aio_set_msg(aio, NULL); + nni_aio_set_msg(p->aio_send, msg); + nni_pipe_send(p->pipe, p->aio_send); + nni_mtx_unlock(&s->lk); + + nni_aio_finish(aio, 0, len); +} + +static void rep0_sock_fini(void *arg) { rep0_sock *s = arg; - nni_aio_stop(s->aio_getq); - nni_aio_fini(s->aio_getq); nni_idhash_fini(s->pipes); - if (s->btrace != NULL) { - nni_free(s->btrace, s->btrace_len); + if (s->ctx != NULL) { + rep0_ctx_fini(s->ctx); } + nni_pollable_free(s->sendable); + nni_pollable_free(s->recvable); nni_mtx_fini(&s->lk); NNI_FREE_STRUCT(s); } @@ -80,21 +251,34 @@ rep0_sock_init(void **sp, nni_sock *sock) rep0_sock *s; int rv; + NNI_ARG_UNUSED(sock); + if ((s = NNI_ALLOC_STRUCT(s)) == NULL) { return (NNG_ENOMEM); } nni_mtx_init(&s->lk); - if (((rv = nni_idhash_init(&s->pipes)) != 0) || - ((rv = nni_aio_init(&s->aio_getq, rep0_sock_getq_cb, s)) != 0)) { + if ((rv = nni_idhash_init(&s->pipes)) != 0) { + rep0_sock_fini(s); + return (rv); + } + + NNI_LIST_INIT(&s->recvq, rep0_ctx, rqnode); + NNI_LIST_INIT(&s->recvpipes, rep0_pipe, rnode); + + s->ttl = 8; + + if ((rv = rep0_ctx_init((void **) &s->ctx, s)) != 0) { rep0_sock_fini(s); return (rv); } - s->ttl = 8; // Per RFC - s->btrace = NULL; - s->btrace_len = 0; - s->uwq = nni_sock_sendq(sock); - s->urq = nni_sock_recvq(sock); + // We start off without being either readable or pollable. + // Readability comes when there is something on the socket. + if (((rv = nni_pollable_alloc(&s->sendable)) != 0) || + ((rv = nni_pollable_alloc(&s->recvable)) != 0)) { + rep0_sock_fini(s); + return (rv); + } *sp = s; @@ -104,9 +288,7 @@ rep0_sock_init(void **sp, nni_sock *sock) static void rep0_sock_open(void *arg) { - rep0_sock *s = arg; - - nni_msgq_aio_get(s->uwq, s->aio_getq); + NNI_ARG_UNUSED(arg); } static void @@ -114,7 +296,7 @@ rep0_sock_close(void *arg) { rep0_sock *s = arg; - nni_aio_abort(s->aio_getq, NNG_ECLOSED); + rep0_ctx_close(s->ctx); } static void @@ -122,11 +304,8 @@ rep0_pipe_fini(void *arg) { rep0_pipe *p = arg; - nni_aio_fini(p->aio_getq); nni_aio_fini(p->aio_send); nni_aio_fini(p->aio_recv); - nni_aio_fini(p->aio_putq); - nni_msgq_fini(p->sendq); NNI_FREE_STRUCT(p); } @@ -139,15 +318,15 @@ rep0_pipe_init(void **pp, nni_pipe *pipe, void *s) if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { return (NNG_ENOMEM); } - if (((rv = nni_msgq_init(&p->sendq, 2)) != 0) || - ((rv = nni_aio_init(&p->aio_getq, rep0_pipe_getq_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_send, rep0_pipe_send_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_recv, rep0_pipe_recv_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_putq, rep0_pipe_putq_cb, p)) != 0)) { + if (((rv = nni_aio_init(&p->aio_send, rep0_pipe_send_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_recv, rep0_pipe_recv_cb, p)) != 0)) { rep0_pipe_fini(p); return (rv); } + NNI_LIST_INIT(&p->sendq, rep0_ctx, sqnode); + + p->id = nni_pipe_id(pipe); p->pipe = pipe; p->rep = s; *pp = p; @@ -164,8 +343,8 @@ rep0_pipe_start(void *arg) if ((rv = nni_idhash_insert(s->pipes, nni_pipe_id(p->pipe), p)) != 0) { return (rv); } - - nni_msgq_aio_get(p->sendq, p->aio_getq); + // By definition, we have not received a request yet on this pipe, + // so it cannot cause us to become sendable. nni_pipe_recv(p->pipe, p->aio_recv); return (0); } @@ -175,94 +354,136 @@ rep0_pipe_stop(void *arg) { rep0_pipe *p = arg; rep0_sock *s = p->rep; + rep0_ctx * ctx; + + nni_mtx_lock(&s->lk); + while ((ctx = nni_list_first(&p->sendq)) != NULL) { + nni_aio *aio; + nni_msg *msg; + // Pipe was closed. To avoid pushing an error back to the + // entire socket, we pretend we completed this successfully. + nni_list_remove(&p->sendq, ctx); + aio = ctx->saio; + ctx->saio = NULL; + msg = nni_aio_get_msg(aio); + nni_aio_finish(aio, 0, nni_msg_len(msg)); + nni_msg_free(msg); + } + if (p->id == s->ctx->pipe_id) { + // We "can" send. (Well, not really, but we will happily + // accept a message and discard it.) + nni_pollable_raise(s->sendable); + } + nni_mtx_unlock(&s->lk); - nni_msgq_close(p->sendq); - nni_aio_stop(p->aio_getq); nni_aio_stop(p->aio_send); nni_aio_stop(p->aio_recv); - nni_aio_stop(p->aio_putq); nni_idhash_remove(s->pipes, nni_pipe_id(p->pipe)); } static void -rep0_sock_getq_cb(void *arg) +rep0_pipe_send_cb(void *arg) { - rep0_sock *s = arg; - nni_msgq * uwq = s->uwq; + rep0_pipe *p = arg; + rep0_sock *s = p->rep; + rep0_ctx * ctx; + nni_aio * aio; nni_msg * msg; - uint32_t id; - rep0_pipe *p; - int rv; - - // This watches for messages from the upper write queue, - // extracts the destination pipe, and forwards it to the appropriate - // destination pipe via a separate queue. This prevents a single bad - // or slow pipe from gumming up the works for the entire socket. + size_t len; - if (nni_aio_result(s->aio_getq) != 0) { - // Closed socket? + nni_mtx_lock(&s->lk); + p->busy = false; + if (nni_aio_result(p->aio_send) != 0) { + nni_msg_free(nni_aio_get_msg(p->aio_send)); + nni_aio_set_msg(p->aio_send, NULL); + nni_pipe_stop(p->pipe); + nni_mtx_unlock(&s->lk); return; } - - msg = nni_aio_get_msg(s->aio_getq); - nni_aio_set_msg(s->aio_getq, NULL); - - // We yank the outgoing pipe id from the header - if (nni_msg_header_len(msg) < 4) { - nni_msg_free(msg); - - // Look for another message on the upper write queue. - nni_msgq_aio_get(uwq, s->aio_getq); + if ((ctx = nni_list_first(&p->sendq)) == NULL) { + // Nothing else to send. + if (p->id == s->ctx->pipe_id) { + // Mark us ready for the other side to send! + nni_pollable_raise(s->sendable); + } + nni_mtx_unlock(&s->lk); return; } - id = nni_msg_header_trim_u32(msg); + nni_list_remove(&p->sendq, ctx); + aio = ctx->saio; + ctx->saio = NULL; + p->busy = true; + msg = nni_aio_get_msg(aio); + len = nni_msg_len(msg); + nni_aio_set_msg(aio, NULL); + nni_aio_set_msg(p->aio_send, msg); + nni_pipe_send(p->pipe, p->aio_send); - // Look for the pipe, and attempt to put the message there - // (nonblocking) if we can. If we can't for any reason, then we - // free the message. - // XXX: LOCKING?!?! - if ((rv = nni_idhash_find(s->pipes, id, (void **) &p)) == 0) { - rv = nni_msgq_tryput(p->sendq, msg); - } - if (rv != 0) { - nni_msg_free(msg); - } + nni_mtx_unlock(&s->lk); - // Now look for another message on the upper write queue. - nni_msgq_aio_get(uwq, s->aio_getq); + nni_aio_finish_synch(aio, 0, len); } static void -rep0_pipe_getq_cb(void *arg) +rep0_cancel_recv(nni_aio *aio, int rv) { - rep0_pipe *p = arg; + rep0_ctx * ctx = nni_aio_get_prov_data(aio); + rep0_sock *s = ctx->sock; - if (nni_aio_result(p->aio_getq) != 0) { - nni_pipe_stop(p->pipe); - return; + nni_mtx_lock(&s->lk); + if (ctx->raio == aio) { + nni_list_remove(&s->recvq, ctx); + ctx->raio = NULL; + nni_aio_finish_error(aio, rv); } - - nni_aio_set_msg(p->aio_send, nni_aio_get_msg(p->aio_getq)); - nni_aio_set_msg(p->aio_getq, NULL); - - nni_pipe_send(p->pipe, p->aio_send); + nni_mtx_unlock(&s->lk); } static void -rep0_pipe_send_cb(void *arg) +rep0_ctx_recv(void *arg, nni_aio *aio) { - rep0_pipe *p = arg; + rep0_ctx * ctx = arg; + rep0_sock *s = ctx->sock; + rep0_pipe *p; + size_t len; + nni_msg * msg; - if (nni_aio_result(p->aio_send) != 0) { - nni_msg_free(nni_aio_get_msg(p->aio_send)); - nni_aio_set_msg(p->aio_send, NULL); - nni_pipe_stop(p->pipe); + nni_mtx_lock(&s->lk); + if (nni_aio_start(aio, rep0_cancel_recv, ctx) != 0) { + nni_mtx_unlock(&s->lk); + return; + } + if (ctx->closed) { + nni_mtx_unlock(&s->lk); + nni_aio_finish_error(aio, NNG_ECLOSED); return; } + if ((p = nni_list_first(&s->recvpipes)) == NULL) { + nni_pollable_clear(s->recvable); + ctx->raio = aio; + nni_list_append(&s->recvq, ctx); + nni_mtx_unlock(&s->lk); + return; + } + msg = nni_aio_get_msg(p->aio_recv); + nni_aio_set_msg(p->aio_recv, NULL); + nni_list_remove(&s->recvpipes, p); + if (nni_list_empty(&s->recvpipes)) { + nni_pollable_clear(s->recvable); + } + nni_pipe_recv(p->pipe, p->aio_recv); + + len = nni_msg_header_len(msg); + memcpy(ctx->btrace, nni_msg_header(msg), len); + ctx->btrace_len = len; + ctx->pipe_id = nni_pipe_id(p->pipe); + nni_mtx_unlock(&s->lk); - nni_msgq_aio_get(p->sendq, p->aio_getq); + nni_msg_header_clear(msg); + nni_aio_set_msg(aio, msg); + nni_aio_finish(aio, 0, nni_msg_len(msg)); } static void @@ -270,9 +491,12 @@ rep0_pipe_recv_cb(void *arg) { rep0_pipe *p = arg; rep0_sock *s = p->rep; + rep0_ctx * ctx; nni_msg * msg; int rv; uint8_t * body; + nni_aio * aio; + size_t len; int hops; if (nni_aio_result(p->aio_recv) != 0) { @@ -281,28 +505,22 @@ rep0_pipe_recv_cb(void *arg) } msg = nni_aio_get_msg(p->aio_recv); - nni_aio_set_msg(p->aio_recv, NULL); - nni_msg_set_pipe(msg, nni_pipe_id(p->pipe)); - - // Store the pipe id in the header, first thing. - rv = nni_msg_header_append_u32(msg, nni_pipe_id(p->pipe)); - if (rv != 0) { - // Failure here causes us to drop the message. - goto drop; - } + nni_msg_set_pipe(msg, p->id); // Move backtrace from body to header hops = 1; for (;;) { int end = 0; - if (hops >= s->ttl) { + + if (hops > s->ttl) { // This isn't malformed, but it has gone through // too many hops. Do not disconnect, because we // can legitimately receive messages with too many // hops from devices, etc. goto drop; } + hops++; if (nni_msg_len(msg) < 4) { // Peer is speaking garbage. Kick it. nni_msg_free(msg); @@ -313,10 +531,7 @@ rep0_pipe_recv_cb(void *arg) end = (body[0] & 0x80) ? 1 : 0; rv = nni_msg_header_append(msg, body, 4); if (rv != 0) { - // Presumably this is due to out of memory. - // We could just discard and try again, but we - // just toss the connection for now. Given the - // out of memory situation, this is not unreasonable. + // Out of memory, so drop it. goto drop; } nni_msg_trim(msg, 4); @@ -325,28 +540,46 @@ rep0_pipe_recv_cb(void *arg) } } - // Go ahead and send it up. - nni_aio_set_msg(p->aio_putq, msg); - nni_msgq_aio_put(s->urq, p->aio_putq); - return; + len = nni_msg_header_len(msg); -drop: - nni_msg_free(msg); + nni_mtx_lock(&s->lk); + + if ((ctx = nni_list_first(&s->recvq)) == NULL) { + // No one waiting to receive yet, holding pattern. + nni_list_append(&s->recvpipes, p); + nni_pollable_raise(s->recvable); + nni_mtx_unlock(&s->lk); + return; + } + + nni_list_remove(&s->recvq, ctx); + aio = ctx->raio; + ctx->raio = NULL; + nni_aio_set_msg(aio, msg); + nni_aio_set_msg(p->aio_recv, NULL); + + // schedule another receive nni_pipe_recv(p->pipe, p->aio_recv); -} -static void -rep0_pipe_putq_cb(void *arg) -{ - rep0_pipe *p = arg; + ctx->btrace_len = len; + memcpy(ctx->btrace, nni_msg_header(msg), len); + nni_msg_header_clear(msg); + ctx->pipe_id = p->id; - if (nni_aio_result(p->aio_putq) != 0) { - nni_msg_free(nni_aio_get_msg(p->aio_putq)); - nni_aio_set_msg(p->aio_putq, NULL); - nni_pipe_stop(p->pipe); - return; + // If we got a request on a pipe that wasn't busy, we should mark + // it sendable. (The sendable flag is not set when there is no + // request needing a reply.) + if ((ctx == s->ctx) && (!p->busy)) { + nni_pollable_raise(s->sendable); } + nni_mtx_unlock(&s->lk); + + nni_aio_finish_synch(aio, 0, nni_msg_len(msg)); + return; + +drop: + nni_msg_free(msg); nni_pipe_recv(p->pipe, p->aio_recv); } @@ -354,6 +587,7 @@ static int rep0_sock_setopt_maxttl(void *arg, const void *buf, size_t sz, int typ) { rep0_sock *s = arg; + return (nni_copyin_int(&s->ttl, buf, sz, 1, 255, typ)); } @@ -361,75 +595,43 @@ static int rep0_sock_getopt_maxttl(void *arg, void *buf, size_t *szp, int typ) { rep0_sock *s = arg; + return (nni_copyout_int(s->ttl, buf, szp, typ)); } -static nni_msg * -rep0_sock_filter(void *arg, nni_msg *msg) +static int +rep0_sock_getopt_sendfd(void *arg, void *buf, size_t *szp, int typ) { rep0_sock *s = arg; - char * header; - size_t len; - - nni_mtx_lock(&s->lk); + int rv; + int fd; - len = nni_msg_header_len(msg); - header = nni_msg_header(msg); - if (s->btrace != NULL) { - nni_free(s->btrace, s->btrace_len); - s->btrace = NULL; - s->btrace_len = 0; - } - if ((s->btrace = nni_alloc(len)) == NULL) { - nni_msg_free(msg); - return (NULL); + if ((rv = nni_pollable_getfd(s->sendable, &fd)) != 0) { + return (rv); } - s->btrace_len = len; - memcpy(s->btrace, header, len); - nni_msg_header_clear(msg); - nni_mtx_unlock(&s->lk); - return (msg); + return (nni_copyout_int(fd, buf, szp, typ)); } -static void -rep0_sock_send_raw(void *arg, nni_aio *aio) +static int +rep0_sock_getopt_recvfd(void *arg, void *buf, size_t *szp, int typ) { rep0_sock *s = arg; - nni_msgq_aio_put(s->uwq, aio); + int rv; + int fd; + + if ((rv = nni_pollable_getfd(s->recvable, &fd)) != 0) { + return (rv); + } + + return (nni_copyout_int(fd, buf, szp, typ)); } static void rep0_sock_send(void *arg, nni_aio *aio) { rep0_sock *s = arg; - int rv; - nni_msg * msg; - - nni_mtx_lock(&s->lk); - if (s->btrace == NULL) { - nni_mtx_unlock(&s->lk); - nni_aio_finish_error(aio, NNG_ESTATE); - return; - } - - msg = nni_aio_get_msg(aio); - - // drop anything else in the header... (it should already be - // empty, but there can be stale backtrace info there.) - nni_msg_header_clear(msg); - - if ((rv = nni_msg_header_append(msg, s->btrace, s->btrace_len)) != 0) { - nni_mtx_unlock(&s->lk); - nni_aio_finish_error(aio, rv); - return; - } - - nni_free(s->btrace, s->btrace_len); - s->btrace = NULL; - s->btrace_len = 0; - nni_mtx_unlock(&s->lk); - nni_msgq_aio_put(s->uwq, aio); + rep0_ctx_send(s->ctx, aio); } static void @@ -437,7 +639,7 @@ rep0_sock_recv(void *arg, nni_aio *aio) { rep0_sock *s = arg; - nni_msgq_aio_get(s->urq, aio); + rep0_ctx_recv(s->ctx, aio); } // This is the global protocol structure -- our linkage to the core. @@ -449,6 +651,21 @@ static nni_proto_pipe_ops rep0_pipe_ops = { .pipe_stop = rep0_pipe_stop, }; +static nni_proto_ctx_option rep0_ctx_options[] = { + // terminate list + { + .co_name = NULL, + }, +}; + +static nni_proto_ctx_ops rep0_ctx_ops = { + .ctx_init = rep0_ctx_init, + .ctx_fini = rep0_ctx_fini, + .ctx_send = rep0_ctx_send, + .ctx_recv = rep0_ctx_recv, + .ctx_options = rep0_ctx_options, +}; + static nni_proto_sock_option rep0_sock_options[] = { { .pso_name = NNG_OPT_MAXTTL, @@ -456,6 +673,18 @@ static nni_proto_sock_option rep0_sock_options[] = { .pso_getopt = rep0_sock_getopt_maxttl, .pso_setopt = rep0_sock_setopt_maxttl, }, + { + .pso_name = NNG_OPT_RECVFD, + .pso_type = NNI_TYPE_INT32, + .pso_getopt = rep0_sock_getopt_recvfd, + .pso_setopt = NULL, + }, + { + .pso_name = NNG_OPT_SENDFD, + .pso_type = NNI_TYPE_INT32, + .pso_getopt = rep0_sock_getopt_sendfd, + .pso_setopt = NULL, + }, // terminate list { .pso_name = NULL, @@ -468,38 +697,18 @@ static nni_proto_sock_ops rep0_sock_ops = { .sock_open = rep0_sock_open, .sock_close = rep0_sock_close, .sock_options = rep0_sock_options, - .sock_filter = rep0_sock_filter, .sock_send = rep0_sock_send, .sock_recv = rep0_sock_recv, }; -static nni_proto_sock_ops rep0_sock_ops_raw = { - .sock_init = rep0_sock_init, - .sock_fini = rep0_sock_fini, - .sock_open = rep0_sock_open, - .sock_close = rep0_sock_close, - .sock_options = rep0_sock_options, - .sock_filter = NULL, // No filtering for raw mode - .sock_send = rep0_sock_send_raw, - .sock_recv = rep0_sock_recv, -}; - static nni_proto rep0_proto = { .proto_version = NNI_PROTOCOL_VERSION, .proto_self = { NNI_PROTO_REP_V0, "rep" }, .proto_peer = { NNI_PROTO_REQ_V0, "req" }, - .proto_flags = NNI_PROTO_FLAG_SNDRCV, + .proto_flags = NNI_PROTO_FLAG_SNDRCV | NNI_PROTO_FLAG_NOMSGQ, .proto_sock_ops = &rep0_sock_ops, .proto_pipe_ops = &rep0_pipe_ops, -}; - -static nni_proto rep0_proto_raw = { - .proto_version = NNI_PROTOCOL_VERSION, - .proto_self = { NNI_PROTO_REP_V0, "rep" }, - .proto_peer = { NNI_PROTO_REQ_V0, "req" }, - .proto_flags = NNI_PROTO_FLAG_SNDRCV | NNI_PROTO_FLAG_RAW, - .proto_sock_ops = &rep0_sock_ops_raw, - .proto_pipe_ops = &rep0_pipe_ops, + .proto_ctx_ops = &rep0_ctx_ops, }; int @@ -507,9 +716,3 @@ nng_rep0_open(nng_socket *sidp) { return (nni_proto_open(sidp, &rep0_proto)); } - -int -nng_rep0_open_raw(nng_socket *sidp) -{ - return (nni_proto_open(sidp, &rep0_proto_raw)); -} diff --git a/src/protocol/reqrep0/req.c b/src/protocol/reqrep0/req.c index 4d35ca1f..8149ce08 100644 --- a/src/protocol/reqrep0/req.c +++ b/src/protocol/reqrep0/req.c @@ -28,34 +28,53 @@ typedef struct req0_pipe req0_pipe; typedef struct req0_sock req0_sock; +typedef struct req0_ctx req0_ctx; -static void req0_resend(req0_sock *); -static void req0_timeout(void *); +static void req0_run_sendq(req0_sock *, nni_list *); +static void req0_ctx_reset(req0_ctx *); +static void req0_ctx_timeout(void *); static void req0_pipe_fini(void *); +static void req0_ctx_fini(void *); +static int req0_ctx_init(void **, void *); + +// A req0_ctx is a "context" for the request. It uses most of the +// socket, but keeps track of its own outstanding replays, the request ID, +// and so forth. +struct req0_ctx { + nni_list_node snode; + nni_list_node sqnode; // node on the sendq + nni_list_node pnode; // node on the pipe list + uint32_t reqid; + req0_sock * sock; + nni_aio * raio; // user aio waiting to receive - only one! + nni_aio * saio; + nng_msg * reqmsg; // request message + size_t reqlen; + nng_msg * repmsg; // reply message + nni_timer_node timer; + nni_duration retry; +}; // A req0_sock is our per-socket protocol private structure. struct req0_sock { - nni_msgq * uwq; - nni_msgq * urq; + nni_sock * nsock; nni_duration retry; - nni_time resend; - bool raw; - bool wantw; bool closed; int ttl; - nni_msg * reqmsg; - req0_pipe *pendpipe; + req0_ctx *ctx; // base socket ctx nni_list readypipes; nni_list busypipes; + nni_list ctxs; - nni_timer_node timer; + nni_list sendq; // contexts waiting to send. + nni_idhash * reqids; // contexts by request ID + nni_pollable *recvable; + nni_pollable *sendable; - uint32_t nextid; // next id - uint8_t reqid[4]; // outstanding request ID (big endian) - nni_mtx mtx; - nni_cv cv; + nni_mtx mtx; + nni_cv cv; }; // A req0_pipe is our per-pipe protocol private structure. @@ -63,60 +82,61 @@ struct req0_pipe { nni_pipe * pipe; req0_sock * req; nni_list_node node; - nni_aio * aio_getq; // raw mode only - nni_aio * aio_sendraw; // raw mode only - nni_aio * aio_sendcooked; // cooked mode only + nni_list ctxs; // ctxs with pending traffic + nni_aio * aio_send; nni_aio * aio_recv; - nni_aio * aio_putq; - nni_mtx mtx; }; -static void req0_getq_cb(void *); -static void req0_sendraw_cb(void *); -static void req0_sendcooked_cb(void *); +static void req0_sock_fini(void *); +static void req0_send_cb(void *); static void req0_recv_cb(void *); -static void req0_putq_cb(void *); static int -req0_sock_init_impl(void **sp, nni_sock *sock, bool raw) +req0_sock_init(void **sp, nni_sock *sock) { req0_sock *s; + int rv; if ((s = NNI_ALLOC_STRUCT(s)) == NULL) { return (NNG_ENOMEM); } + if ((rv = nni_idhash_init(&s->reqids)) != 0) { + NNI_FREE_STRUCT(s); + return (rv); + } + + // Request IDs are 32 bits, with the high order bit set. + // We start at a random point, to minimize likelihood of + // accidental collision across restarts. + nni_idhash_set_limits( + s->reqids, 0x80000000u, 0xffffffffu, nni_random() | 0x80000000u); + nni_mtx_init(&s->mtx); nni_cv_init(&s->cv, &s->mtx); NNI_LIST_INIT(&s->readypipes, req0_pipe, node); NNI_LIST_INIT(&s->busypipes, req0_pipe, node); - nni_timer_init(&s->timer, req0_timeout, s); + NNI_LIST_INIT(&s->sendq, req0_ctx, sqnode); + NNI_LIST_INIT(&s->ctxs, req0_ctx, snode); // this is "semi random" start for request IDs. - s->nextid = nni_random(); - s->retry = NNI_SECOND * 60; - s->reqmsg = NULL; - s->raw = raw; - s->wantw = false; - s->resend = NNI_TIME_ZERO; - s->ttl = 8; - s->uwq = nni_sock_sendq(sock); - s->urq = nni_sock_recvq(sock); - *sp = s; + s->nsock = sock; + s->retry = NNI_SECOND * 60; - return (0); -} + if ((rv = req0_ctx_init((void **) &s->ctx, s)) != 0) { + req0_sock_fini(s); + return (rv); + } + if (((rv = nni_pollable_alloc(&s->sendable)) != 0) || + ((rv = nni_pollable_alloc(&s->recvable)) != 0)) { + req0_sock_fini(s); + return (rv); + } -static int -req0_sock_init(void **sp, nni_sock *sock) -{ - return (req0_sock_init_impl(sp, sock, false)); -} + s->ttl = 8; + *sp = s; -static int -req0_sock_init_raw(void **sp, nni_sock *sock) -{ - return (req0_sock_init_impl(sp, sock, true)); + return (0); } static void @@ -129,12 +149,18 @@ static void req0_sock_close(void *arg) { req0_sock *s = arg; + req0_ctx * ctx; nni_mtx_lock(&s->mtx); s->closed = true; + NNI_LIST_FOREACH (&s->ctxs, ctx) { + if (ctx->raio != NULL) { + nni_aio_finish_error(ctx->raio, NNG_ECLOSED); + ctx->raio = NULL; + req0_ctx_reset(ctx); + } + } nni_mtx_unlock(&s->mtx); - - nni_timer_cancel(&s->timer); } static void @@ -147,10 +173,13 @@ req0_sock_fini(void *arg) (!nni_list_empty(&s->busypipes))) { nni_cv_wait(&s->cv); } - if (s->reqmsg != NULL) { - nni_msg_free(s->reqmsg); - } nni_mtx_unlock(&s->mtx); + if (s->ctx) { + req0_ctx_fini(s->ctx); + } + nni_pollable_free(s->recvable); + nni_pollable_free(s->sendable); + nni_idhash_fini(s->reqids); nni_cv_fini(&s->cv); nni_mtx_fini(&s->mtx); NNI_FREE_STRUCT(s); @@ -161,12 +190,8 @@ req0_pipe_fini(void *arg) { req0_pipe *p = arg; - nni_aio_fini(p->aio_getq); - nni_aio_fini(p->aio_putq); nni_aio_fini(p->aio_recv); - nni_aio_fini(p->aio_sendcooked); - nni_aio_fini(p->aio_sendraw); - nni_mtx_fini(&p->mtx); + nni_aio_fini(p->aio_send); NNI_FREE_STRUCT(p); } @@ -179,18 +204,14 @@ req0_pipe_init(void **pp, nni_pipe *pipe, void *s) if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { return (NNG_ENOMEM); } - nni_mtx_init(&p->mtx); - if (((rv = nni_aio_init(&p->aio_getq, req0_getq_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_putq, req0_putq_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_recv, req0_recv_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_sendraw, req0_sendraw_cb, p)) != 0) || - ((rv = nni_aio_init(&p->aio_sendcooked, req0_sendcooked_cb, p)) != - 0)) { + if (((rv = nni_aio_init(&p->aio_recv, req0_recv_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_send, req0_send_cb, p)) != 0)) { req0_pipe_fini(p); return (rv); } NNI_LIST_NODE_INIT(&p->node); + NNI_LIST_INIT(&p->ctxs, req0_ctx, pnode); p->pipe = pipe; p->req = s; *pp = p; @@ -213,14 +234,10 @@ req0_pipe_start(void *arg) return (NNG_ECLOSED); } nni_list_append(&s->readypipes, p); - // If sock was waiting for somewhere to send data, go ahead and - // send it to this pipe. - if (s->wantw) { - req0_resend(s); - } + nni_pollable_raise(s->sendable); + req0_run_sendq(s, NULL); nni_mtx_unlock(&s->mtx); - nni_msgq_aio_get(s->uwq, p->aio_getq); nni_pipe_recv(p->pipe, p->aio_recv); return (0); } @@ -230,12 +247,10 @@ req0_pipe_stop(void *arg) { req0_pipe *p = arg; req0_sock *s = p->req; + req0_ctx * ctx; - nni_aio_stop(p->aio_getq); - nni_aio_stop(p->aio_putq); nni_aio_stop(p->aio_recv); - nni_aio_stop(p->aio_sendcooked); - nni_aio_stop(p->aio_sendraw); + nni_aio_stop(p->aio_send); // At this point there should not be any further AIOs running. // Further, any completion tasks have completed. @@ -249,126 +264,54 @@ req0_pipe_stop(void *arg) nni_cv_wake(&s->cv); } } - - if ((p == s->pendpipe) && (s->reqmsg != NULL)) { - // removing the pipe we sent the last request on... - // schedule immediate resend. - s->pendpipe = NULL; - s->resend = NNI_TIME_ZERO; - s->wantw = true; - req0_resend(s); + if (nni_list_empty(&s->readypipes)) { + nni_pollable_clear(s->sendable); } - nni_mtx_unlock(&s->mtx); -} -static int -req0_sock_setopt_maxttl(void *arg, const void *buf, size_t sz, int typ) -{ - req0_sock *s = arg; - return (nni_copyin_int(&s->ttl, buf, sz, 1, 255, typ)); -} - -static int -req0_sock_getopt_maxttl(void *arg, void *buf, size_t *szp, int typ) -{ - req0_sock *s = arg; - return (nni_copyout_int(s->ttl, buf, szp, typ)); -} - -static int -req0_sock_setopt_resendtime(void *arg, const void *buf, size_t sz, int typ) -{ - req0_sock *s = arg; - return (nni_copyin_ms(&s->retry, buf, sz, typ)); -} - -static int -req0_sock_getopt_resendtime(void *arg, void *buf, size_t *szp, int typ) -{ - req0_sock *s = arg; - return (nni_copyout_ms(s->retry, buf, szp, typ)); -} - -// Raw and cooked mode differ in the way they send messages out. -// -// For cooked mdes, we have a getq callback on the upper write queue, which -// when it finds a message, cancels any current processing, and saves a copy -// of the message, and then tries to "resend" the message, looking for a -// suitable available outgoing pipe. If no suitable pipe is available, -// a flag is set, so that as soon as such a pipe is available we trigger -// a resend attempt. We also trigger the attempt on either timeout, or if -// the underlying pipe we chose disconnects. -// -// For raw mode we can just let the pipes "contend" via getq to get a -// message from the upper write queue. The msgqueue implementation -// actually provides ordering, so load will be spread automatically. -// (NB: We may have to revise this in the future if we want to provide some -// kind of priority.) - -static void -req0_getq_cb(void *arg) -{ - req0_pipe *p = arg; - - // We should be in RAW mode. Cooked mode traffic bypasses - // the upper write queue entirely, and should never end up here. - // If the mode changes, we may briefly deliver a message, but - // that's ok (there's an inherent race anyway). (One minor - // exception: we wind up here in error state when the uwq is closed.) - - if (nni_aio_result(p->aio_getq) != 0) { - nni_pipe_stop(p->pipe); - return; + while ((ctx = nni_list_first(&p->ctxs)) != NULL) { + nni_list_remove(&p->ctxs, ctx); + // Reset the timer on this so it expires immediately. + // This is actually easier than canceling the timer and + // running the sendq separately. (In particular, it avoids + // a potential deadlock on cancelling the timer.) + nni_timer_schedule(&ctx->timer, NNI_TIME_ZERO); } - - nni_aio_set_msg(p->aio_sendraw, nni_aio_get_msg(p->aio_getq)); - nni_aio_set_msg(p->aio_getq, NULL); - - // Send the message, but use the raw mode aio. - nni_pipe_send(p->pipe, p->aio_sendraw); + nni_mtx_unlock(&s->mtx); } -static void -req0_sendraw_cb(void *arg) -{ - req0_pipe *p = arg; - - if (nni_aio_result(p->aio_sendraw) != 0) { - nni_msg_free(nni_aio_get_msg(p->aio_sendraw)); - nni_aio_set_msg(p->aio_sendraw, NULL); - nni_pipe_stop(p->pipe); - return; - } - - // Sent a message so we just need to look for another one. - nni_msgq_aio_get(p->req->uwq, p->aio_getq); -} +// For cooked mode, we use a context, and send out that way. This +// completely bypasses the upper write queue. Each context keeps one +// message pending; these are "scheduled" via the sendq. The sendq +// is ordered, so FIFO ordering between contexts is provided for. static void -req0_sendcooked_cb(void *arg) +req0_send_cb(void *arg) { req0_pipe *p = arg; req0_sock *s = p->req; + nni_aio * aio; + nni_list aios; - if (nni_aio_result(p->aio_sendcooked) != 0) { + nni_aio_list_init(&aios); + if (nni_aio_result(p->aio_send) != 0) { // We failed to send... clean up and deal with it. - // We leave ourselves on the busy list for now, which - // means no new asynchronous traffic can occur here. - nni_msg_free(nni_aio_get_msg(p->aio_sendcooked)); - nni_aio_set_msg(p->aio_sendcooked, NULL); + nni_msg_free(nni_aio_get_msg(p->aio_send)); + nni_aio_set_msg(p->aio_send, NULL); nni_pipe_stop(p->pipe); return; } - // Cooked mode. We completed a cooked send, so we need to - // reinsert ourselves in the ready list, and possibly schedule - // a resend. + // We completed a cooked send, so we need to reinsert ourselves + // in the ready list, and re-run the sendq. nni_mtx_lock(&s->mtx); if (nni_list_active(&s->busypipes, p)) { nni_list_remove(&s->busypipes, p); nni_list_append(&s->readypipes, p); - req0_resend(s); + if (nni_list_empty(&s->sendq)) { + nni_pollable_raise(s->sendable); + } + req0_run_sendq(s, &aios); } else { // We wind up here if stop was called from the reader // side while we were waiting to be scheduled to run for the @@ -377,29 +320,22 @@ req0_sendcooked_cb(void *arg) nni_pipe_stop(p->pipe); } nni_mtx_unlock(&s->mtx); -} -static void -req0_putq_cb(void *arg) -{ - req0_pipe *p = arg; - - if (nni_aio_result(p->aio_putq) != 0) { - nni_msg_free(nni_aio_get_msg(p->aio_putq)); - nni_aio_set_msg(p->aio_putq, NULL); - nni_pipe_stop(p->pipe); - return; + while ((aio = nni_list_first(&aios)) != NULL) { + nni_list_remove(&aios, aio); + nni_aio_finish_synch(aio, 0, 0); } - nni_aio_set_msg(p->aio_putq, NULL); - - nni_pipe_recv(p->pipe, p->aio_recv); } static void req0_recv_cb(void *arg) { req0_pipe *p = arg; + req0_sock *s = p->req; + req0_ctx * ctx; nni_msg * msg; + nni_aio * aio; + uint32_t id; if (nni_aio_result(p->aio_recv) != 0) { nni_pipe_stop(p->pipe); @@ -410,22 +346,58 @@ req0_recv_cb(void *arg) nni_aio_set_msg(p->aio_recv, NULL); nni_msg_set_pipe(msg, nni_pipe_id(p->pipe)); - // We yank 4 bytes of body, and move them to the header. + // We yank 4 bytes from front of body, and move them to the header. if (nni_msg_len(msg) < 4) { // Malformed message. goto malformed; } - if (nni_msg_header_append(msg, nni_msg_body(msg), 4) != 0) { + id = nni_msg_trim_u32(msg); + if (nni_msg_header_append_u32(msg, id) != 0) { // Arguably we could just discard and carry on. But // dropping the connection is probably more helpful since // it lets the other side see that a problem occurred. // Plus it gives us a chance to reclaim some memory. goto malformed; } - (void) nni_msg_trim(msg, 4); // Cannot fail - nni_aio_set_msg(p->aio_putq, msg); - nni_msgq_aio_put(p->req->urq, p->aio_putq); + // Schedule another receive while we are processing this. + nni_mtx_lock(&s->mtx); + nni_pipe_recv(p->pipe, p->aio_recv); + + // Look for a context to receive it. + if ((nni_idhash_find(s->reqids, id, (void **) &ctx) != 0) || + (ctx->saio != NULL) || (ctx->repmsg != NULL)) { + nni_mtx_unlock(&s->mtx); + // No waiting context, we have not sent the request out to + // the wire yet, or context already has a reply ready. + // Discard the message. + nni_msg_free(msg); + return; + } + + // We have our match, so we can remove this. + nni_list_node_remove(&ctx->sqnode); + nni_idhash_remove(s->reqids, id); + ctx->reqid = 0; + if (ctx->reqmsg != NULL) { + nni_msg_free(ctx->reqmsg); + ctx->reqmsg = NULL; + } + + // Is there an aio waiting for us? + if ((aio = ctx->raio) != NULL) { + ctx->raio = NULL; + nni_mtx_unlock(&s->mtx); + nni_aio_set_msg(aio, msg); + nni_aio_finish_synch(aio, 0, nni_msg_len(msg)); + } else { + // No AIO, so stash msg. Receive will pick it up later. + ctx->repmsg = msg; + if (ctx == s->ctx) { + nni_pollable_raise(s->recvable); + } + nni_mtx_unlock(&s->mtx); + } return; malformed: @@ -434,191 +406,417 @@ malformed: } static void -req0_timeout(void *arg) +req0_ctx_timeout(void *arg) { - req0_sock *s = arg; + req0_ctx * ctx = arg; + req0_sock *s = ctx->sock; nni_mtx_lock(&s->mtx); - if (s->reqmsg != NULL) { - s->wantw = true; - req0_resend(s); + if ((ctx->reqmsg != NULL) && (!s->closed)) { + if (!nni_list_node_active(&ctx->sqnode)) { + nni_list_append(&s->sendq, ctx); + } + req0_run_sendq(s, NULL); } nni_mtx_unlock(&s->mtx); } -static void -req0_resend(req0_sock *s) +static int +req0_ctx_init(void **cpp, void *sarg) { - req0_pipe *p; - nni_msg * msg; + req0_sock *s = sarg; + req0_ctx * ctx; - // Note: This routine should be called with the socket lock held. - // Also, this should only be called while handling cooked mode - // requests. - if ((msg = s->reqmsg) == NULL) { - return; + if ((ctx = NNI_ALLOC_STRUCT(ctx)) == NULL) { + return (NNG_ENOMEM); } - if (s->closed) { - s->reqmsg = NULL; - nni_msg_free(msg); + nni_timer_init(&ctx->timer, req0_ctx_timeout, ctx); + + nni_mtx_lock(&s->mtx); + ctx->sock = s; + ctx->raio = NULL; + ctx->retry = s->retry; + nni_list_append(&s->ctxs, ctx); + nni_mtx_unlock(&s->mtx); + + *cpp = ctx; + return (0); +} + +static void +req0_ctx_fini(void *arg) +{ + req0_ctx * ctx = arg; + req0_sock *s = ctx->sock; + nni_aio * aio; + + nni_mtx_lock(&s->mtx); + if ((aio = ctx->raio) != NULL) { + ctx->raio = NULL; + nni_aio_finish_error(aio, NNG_ECLOSED); } + if ((aio = ctx->saio) != NULL) { + ctx->saio = NULL; + nni_aio_set_msg(aio, ctx->reqmsg); + ctx->reqmsg = NULL; + nni_aio_finish_error(aio, NNG_ECLOSED); + } + req0_ctx_reset(ctx); + nni_list_remove(&s->ctxs, ctx); + nni_mtx_unlock(&s->mtx); - if (s->wantw) { - s->wantw = false; + nni_timer_cancel(&ctx->timer); + nni_timer_fini(&ctx->timer); - if (nni_msg_dup(&msg, s->reqmsg) != 0) { - // Failed to alloc message, reschedule it. Also, - // mark that we have a message we want to resend, - // in case something comes available. - s->wantw = true; - nni_timer_schedule(&s->timer, nni_clock() + s->retry); - return; - } + NNI_FREE_STRUCT(ctx); +} + +static int +req0_ctx_setopt_resendtime(void *arg, const void *buf, size_t sz, int typ) +{ + req0_ctx *ctx = arg; + return (nni_copyin_ms(&ctx->retry, buf, sz, typ)); +} + +static int +req0_ctx_getopt_resendtime(void *arg, void *buf, size_t *szp, int typ) +{ + req0_ctx *ctx = arg; + return (nni_copyout_ms(ctx->retry, buf, szp, typ)); +} + +static void +req0_run_sendq(req0_sock *s, nni_list *aiolist) +{ + req0_ctx *ctx; + nni_aio * aio; + + // Note: This routine should be called with the socket lock held. + while ((ctx = nni_list_first(&s->sendq)) != NULL) { + nni_msg * msg; + req0_pipe *p; - // Now we iterate across all possible outpipes, until - // one accepts it. if ((p = nni_list_first(&s->readypipes)) == NULL) { - // No pipes ready to process us. Note that we have - // something to send, and schedule it. - nni_msg_free(msg); - s->wantw = true; return; } + // We have a place to send it, so do the send. + // If a sending error occurs that causes the message to + // be dropped, we rely on the resend timer to pick it up. + // We also notify the completion callback if this is the + // first send attempt. + nni_list_remove(&s->sendq, ctx); + + // Schedule a resubmit timer. We only do this if we got + // a pipe to send to. Otherwise, we should get handled + // the next time that the sendq is run. + nni_timer_schedule(&ctx->timer, nni_clock() + ctx->retry); + + if (nni_msg_dup(&msg, ctx->reqmsg) != 0) { + // Oops. Well, keep trying each context; maybe + // one of them will get lucky. + continue; + } + + // Put us on the pipe list of active contexts. + // This gives the pipe a chance to kick a resubmit + // if the pipe is removed. + nni_list_node_remove(&ctx->pnode); + nni_list_append(&p->ctxs, ctx); + nni_list_remove(&s->readypipes, p); nni_list_append(&s->busypipes, p); - s->pendpipe = p; - s->resend = nni_clock() + s->retry; - nni_aio_set_msg(p->aio_sendcooked, msg); + if ((aio = ctx->saio) != NULL) { + ctx->saio = NULL; + nni_aio_bump_count(aio, ctx->reqlen); + // If the list was passed in, we want to do a + // synchronous completion later. + if (aiolist != NULL) { + nni_list_append(aiolist, aio); + } else { + nni_aio_finish(aio, 0, 0); + } + if (ctx == s->ctx) { + if (nni_list_empty(&s->readypipes)) { + nni_pollable_clear(s->sendable); + } else { + nni_pollable_raise(s->sendable); + } + } + } + + nni_aio_set_msg(p->aio_send, msg); + nni_pipe_send(p->pipe, p->aio_send); + } +} - // Note that because we were ready rather than busy, we - // should not have any I/O oustanding and hence the aio - // object will be available for our use. - nni_pipe_send(p->pipe, p->aio_sendcooked); - nni_timer_schedule(&s->timer, s->resend); +void +req0_ctx_reset(req0_ctx *ctx) +{ + req0_sock *s = ctx->sock; + // Call with sock lock held! + + // We cannot safely "wait" using nni_timer_cancel, but this removes + // any scheduled timer activation. If the timeout is already running + // concurrently, it will still run. It should do nothing, because + // we toss the reqmsg. There is still a very narrow race if the + // timeout fires, but doesn't actually start running before we + // both finish this function, *and* manage to reschedule another + // request. The consequence of that occurring is that the request + // will be emitted on the wire twice. This is not actually tragic. + nni_timer_schedule(&ctx->timer, NNI_TIME_NEVER); + + nni_list_node_remove(&ctx->pnode); + nni_list_node_remove(&ctx->sqnode); + if (ctx->reqid != 0) { + nni_idhash_remove(s->reqids, ctx->reqid); + ctx->reqid = 0; + } + if (ctx->reqmsg != NULL) { + nni_msg_free(ctx->reqmsg); + ctx->reqmsg = NULL; + } + if (ctx->repmsg != NULL) { + nni_msg_free(ctx->repmsg); + ctx->repmsg = NULL; } } static void -req0_sock_send(void *arg, nni_aio *aio) +req0_ctx_cancel_recv(nni_aio *aio, int rv) { - req0_sock *s = arg; - uint32_t id; - size_t len; - nni_msg * msg; - int rv; + req0_ctx * ctx = nni_aio_get_prov_data(aio); + req0_sock *s = ctx->sock; nni_mtx_lock(&s->mtx); + if (ctx->raio != aio) { + // already completed, ignore this. + nni_mtx_unlock(&s->mtx); + return; + } + ctx->raio = NULL; - msg = nni_aio_get_msg(aio); - len = nni_msg_len(msg); + // Cancellation of a pending receive is treated as aborting the + // entire state machine. This allows us to preserve the semantic of + // exactly one receive operation per send operation, and should + // be the least surprising for users. The main consequence is that + // if a receive operation is completed (in error or otherwise), the + // user must submit a new send operation to restart the state machine. + req0_ctx_reset(ctx); - // In cooked mode, because we need to manage our own resend logic, - // we bypass the upper writeq entirely. + nni_aio_finish_error(aio, rv); + nni_mtx_unlock(&s->mtx); +} - // Generate a new request ID. We always set the high - // order bit so that the peer can locate the end of the - // backtrace. (Pipe IDs have the high order bit clear.) - id = (s->nextid++) | 0x80000000u; - // Request ID is in big endian format. - NNI_PUT32(s->reqid, id); +static void +req0_ctx_recv(void *arg, nni_aio *aio) +{ + req0_ctx * ctx = arg; + req0_sock *s = ctx->sock; + nni_msg * msg; - if ((rv = nni_msg_header_append(msg, s->reqid, 4)) != 0) { + nni_mtx_lock(&s->mtx); + if (nni_aio_start(aio, req0_ctx_cancel_recv, ctx) != 0) { nni_mtx_unlock(&s->mtx); - nni_aio_finish_error(aio, rv); return; } - - // If another message is there, this cancels it. - if (s->reqmsg != NULL) { - nni_msg_free(s->reqmsg); - s->reqmsg = NULL; + if (s->closed) { + nni_mtx_unlock(&s->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if ((ctx->raio != NULL) || + ((ctx->reqmsg == NULL) && (ctx->repmsg == NULL))) { + // We have already got a pending receive or have not + // tried to send a request yet. + // Either of these violate our basic state assumptions. + nni_mtx_unlock(&s->mtx); + nni_aio_finish_error(aio, NNG_ESTATE); + return; } - nni_aio_set_msg(aio, NULL); - - // Make a duplicate message... for retries. - s->reqmsg = msg; - // Schedule for immediate send - s->resend = NNI_TIME_ZERO; - s->wantw = true; + if ((msg = ctx->repmsg) == NULL) { + ctx->raio = aio; + nni_mtx_unlock(&s->mtx); + return; + } - req0_resend(s); + ctx->repmsg = NULL; + // We have got a message to pass up, yay! + nni_aio_set_msg(aio, msg); + if (ctx == s->ctx) { + nni_pollable_clear(s->recvable); + } nni_mtx_unlock(&s->mtx); - - nni_aio_finish(aio, 0, len); + nni_aio_finish(aio, 0, nni_msg_len(msg)); } static void -req0_sock_send_raw(void *arg, nni_aio *aio) +req0_ctx_cancel_send(nni_aio *aio, int rv) { - req0_sock *s = arg; + req0_ctx * ctx = nni_aio_get_prov_data(aio); + req0_sock *s = ctx->sock; + + nni_mtx_lock(&s->mtx); + if (ctx->saio != aio) { + // already completed, ignore this. + nni_mtx_unlock(&s->mtx); + return; + } - nni_msgq_aio_put(s->uwq, aio); + // There should not be a pending reply, because we canceled + // it while we were waiting. + NNI_ASSERT(ctx->raio == NULL); + ctx->saio = NULL; + // Restore the message back to the aio. + nni_aio_set_msg(aio, ctx->reqmsg); + nni_msg_header_clear(ctx->reqmsg); + ctx->reqmsg = NULL; + + // Cancellation of a pending receive is treated as aborting the + // entire state machine. This allows us to preserve the semantic of + // exactly one receive operation per send operation, and should + // be the least surprising for users. The main consequence is that + // if a receive operation is completed (in error or otherwise), the + // user must submit a new send operation to restart the state machine. + req0_ctx_reset(ctx); + + nni_aio_finish_error(aio, rv); + nni_mtx_unlock(&s->mtx); } -static nni_msg * -req0_sock_filter(void *arg, nni_msg *msg) +static void +req0_ctx_send(void *arg, nni_aio *aio) { - req0_sock *s = arg; - nni_msg * rmsg; + req0_ctx * ctx = arg; + req0_sock *s = ctx->sock; + nng_msg * msg = nni_aio_get_msg(aio); + uint64_t id; + int rv; nni_mtx_lock(&s->mtx); - - if (nni_msg_header_len(msg) < 4) { + // Even though we always complete synchronously, this guards against + // restarting a request that was stopped. + if (nni_aio_start(aio, req0_ctx_cancel_send, ctx) != 0) { nni_mtx_unlock(&s->mtx); - nni_msg_free(msg); - return (NULL); + return; + } + // Sending a new requst cancels the old one, including any + // outstanding reply. + if (ctx->raio != NULL) { + nni_aio_finish_error(ctx->raio, NNG_ECANCELED); + ctx->raio = NULL; + } + if (ctx->saio != NULL) { + nni_aio_set_msg(ctx->saio, ctx->reqmsg); + nni_msg_header_clear(ctx->reqmsg); + ctx->reqmsg = NULL; + nni_aio_finish_error(ctx->saio, NNG_ECANCELED); + ctx->saio = NULL; + nni_list_remove(&s->sendq, ctx); } - if ((rmsg = s->reqmsg) == NULL) { - // We had no outstanding request. (Perhaps canceled, - // or duplicate response.) + // This resets the entire state machine. + req0_ctx_reset(ctx); + + // Insert us on the per ID hash list, so that receives can find us. + if ((rv = nni_idhash_alloc(s->reqids, &id, ctx)) != 0) { nni_mtx_unlock(&s->mtx); - nni_msg_free(msg); - return (NULL); + nni_aio_finish_error(aio, rv); + return; } - - if (memcmp(nni_msg_header(msg), s->reqid, 4) != 0) { - // Wrong request id. + ctx->reqid = (uint32_t) id; + if ((rv = nni_msg_header_append_u32(msg, ctx->reqid)) != 0) { + nni_idhash_remove(s->reqids, id); nni_mtx_unlock(&s->mtx); - nni_msg_free(msg); - return (NULL); + nni_aio_finish_error(aio, rv); + return; } + ctx->reqlen = nni_msg_len(msg); + ctx->reqmsg = msg; + ctx->saio = aio; + nni_aio_set_msg(aio, NULL); - s->reqmsg = NULL; - s->pendpipe = NULL; - nni_mtx_unlock(&s->mtx); + // Stick us on the sendq list. + nni_list_append(&s->sendq, ctx); - nni_msg_free(rmsg); + req0_run_sendq(s, NULL); + nni_mtx_unlock(&s->mtx); +} - return (msg); +static void +req0_sock_send(void *arg, nni_aio *aio) +{ + req0_sock *s = arg; + req0_ctx_send(s->ctx, aio); } static void req0_sock_recv(void *arg, nni_aio *aio) { req0_sock *s = arg; + req0_ctx_recv(s->ctx, aio); +} - nni_mtx_lock(&s->mtx); - if (s->reqmsg == NULL) { - nni_mtx_unlock(&s->mtx); - nni_aio_finish_error(aio, NNG_ESTATE); - return; +static int +req0_sock_setopt_maxttl(void *arg, const void *buf, size_t sz, int typ) +{ + req0_sock *s = arg; + return (nni_copyin_int(&s->ttl, buf, sz, 1, 255, typ)); +} + +static int +req0_sock_getopt_maxttl(void *arg, void *buf, size_t *szp, int typ) +{ + req0_sock *s = arg; + return (nni_copyout_int(s->ttl, buf, szp, typ)); +} + +static int +req0_sock_setopt_resendtime(void *arg, const void *buf, size_t sz, int typ) +{ + req0_sock *s = arg; + int rv; + rv = req0_ctx_setopt_resendtime(s->ctx, buf, sz, typ); + s->retry = s->ctx->retry; + return (rv); +} + +static int +req0_sock_getopt_resendtime(void *arg, void *buf, size_t *szp, int typ) +{ + req0_sock *s = arg; + return (req0_ctx_getopt_resendtime(s->ctx, buf, szp, typ)); +} + +static int +req0_sock_getopt_sendfd(void *arg, void *buf, size_t *szp, int typ) +{ + req0_sock *s = arg; + int rv; + int fd; + + if ((rv = nni_pollable_getfd(s->sendable, &fd)) != 0) { + return (rv); } - nni_mtx_unlock(&s->mtx); - nni_msgq_aio_get(s->urq, aio); + return (nni_copyout_int(fd, buf, szp, typ)); } -static void -req0_sock_recv_raw(void *arg, nni_aio *aio) +static int +req0_sock_getopt_recvfd(void *arg, void *buf, size_t *szp, int typ) { req0_sock *s = arg; + int rv; + int fd; - nni_msgq_aio_get(s->urq, aio); + if ((rv = nni_pollable_getfd(s->recvable, &fd)) != 0) { + return (rv); + } + + return (nni_copyout_int(fd, buf, szp, typ)); } static nni_proto_pipe_ops req0_pipe_ops = { @@ -628,6 +826,26 @@ static nni_proto_pipe_ops req0_pipe_ops = { .pipe_stop = req0_pipe_stop, }; +static nni_proto_ctx_option req0_ctx_options[] = { + { + .co_name = NNG_OPT_REQ_RESENDTIME, + .co_type = NNI_TYPE_DURATION, + .co_getopt = req0_ctx_getopt_resendtime, + .co_setopt = req0_ctx_setopt_resendtime, + }, + { + .co_name = NULL, + }, +}; + +static nni_proto_ctx_ops req0_ctx_ops = { + .ctx_init = req0_ctx_init, + .ctx_fini = req0_ctx_fini, + .ctx_recv = req0_ctx_recv, + .ctx_send = req0_ctx_send, + .ctx_options = req0_ctx_options, +}; + static nni_proto_sock_option req0_sock_options[] = { { .pso_name = NNG_OPT_MAXTTL, @@ -641,6 +859,18 @@ static nni_proto_sock_option req0_sock_options[] = { .pso_getopt = req0_sock_getopt_resendtime, .pso_setopt = req0_sock_setopt_resendtime, }, + { + .pso_name = NNG_OPT_RECVFD, + .pso_type = NNI_TYPE_INT32, + .pso_getopt = req0_sock_getopt_recvfd, + .pso_setopt = NULL, + }, + { + .pso_name = NNG_OPT_SENDFD, + .pso_type = NNI_TYPE_INT32, + .pso_getopt = req0_sock_getopt_sendfd, + .pso_setopt = NULL, + }, // terminate list { .pso_name = NULL, @@ -653,7 +883,6 @@ static nni_proto_sock_ops req0_sock_ops = { .sock_open = req0_sock_open, .sock_close = req0_sock_close, .sock_options = req0_sock_options, - .sock_filter = req0_sock_filter, .sock_send = req0_sock_send, .sock_recv = req0_sock_recv, }; @@ -662,9 +891,10 @@ static nni_proto req0_proto = { .proto_version = NNI_PROTOCOL_VERSION, .proto_self = { NNI_PROTO_REQ_V0, "req" }, .proto_peer = { NNI_PROTO_REP_V0, "rep" }, - .proto_flags = NNI_PROTO_FLAG_SNDRCV, + .proto_flags = NNI_PROTO_FLAG_SNDRCV | NNI_PROTO_FLAG_NOMSGQ, .proto_sock_ops = &req0_sock_ops, .proto_pipe_ops = &req0_pipe_ops, + .proto_ctx_ops = &req0_ctx_ops, }; int @@ -672,28 +902,3 @@ nng_req0_open(nng_socket *sidp) { return (nni_proto_open(sidp, &req0_proto)); } - -static nni_proto_sock_ops req0_sock_ops_raw = { - .sock_init = req0_sock_init_raw, - .sock_fini = req0_sock_fini, - .sock_open = req0_sock_open, - .sock_close = req0_sock_close, - .sock_options = req0_sock_options, - .sock_send = req0_sock_send_raw, - .sock_recv = req0_sock_recv_raw, -}; - -static nni_proto req0_proto_raw = { - .proto_version = NNI_PROTOCOL_VERSION, - .proto_self = { NNI_PROTO_REQ_V0, "req" }, - .proto_peer = { NNI_PROTO_REP_V0, "rep" }, - .proto_flags = NNI_PROTO_FLAG_SNDRCV | NNI_PROTO_FLAG_RAW, - .proto_sock_ops = &req0_sock_ops_raw, - .proto_pipe_ops = &req0_pipe_ops, -}; - -int -nng_req0_open_raw(nng_socket *sidp) -{ - return (nni_proto_open(sidp, &req0_proto_raw)); -}
\ No newline at end of file diff --git a/src/protocol/reqrep0/xrep.c b/src/protocol/reqrep0/xrep.c new file mode 100644 index 00000000..f7189453 --- /dev/null +++ b/src/protocol/reqrep0/xrep.c @@ -0,0 +1,434 @@ +// +// Copyright 2018 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <stdlib.h> +#include <string.h> + +#include "core/nng_impl.h" +#include "protocol/reqrep0/rep.h" + +// Response protocol in raw mode. The REP protocol is the "reply" side of a +// request-reply pair. This is useful for building RPC servers, for +// example. + +#ifndef NNI_PROTO_REQ_V0 +#define NNI_PROTO_REQ_V0 NNI_PROTO(3, 0) +#endif + +#ifndef NNI_PROTO_REP_V0 +#define NNI_PROTO_REP_V0 NNI_PROTO(3, 1) +#endif + +typedef struct xrep0_pipe xrep0_pipe; +typedef struct xrep0_sock xrep0_sock; + +static void xrep0_sock_getq_cb(void *); +static void xrep0_pipe_getq_cb(void *); +static void xrep0_pipe_putq_cb(void *); +static void xrep0_pipe_send_cb(void *); +static void xrep0_pipe_recv_cb(void *); +static void xrep0_pipe_fini(void *); + +// xrep0_sock is our per-socket protocol private structure. +struct xrep0_sock { + nni_msgq * uwq; + nni_msgq * urq; + nni_mtx lk; + int ttl; + nni_idhash *pipes; + nni_aio * aio_getq; +}; + +// xrep0_pipe is our per-pipe protocol private structure. +struct xrep0_pipe { + nni_pipe * pipe; + xrep0_sock *rep; + nni_msgq * sendq; + nni_aio * aio_getq; + nni_aio * aio_send; + nni_aio * aio_recv; + nni_aio * aio_putq; +}; + +static void +xrep0_sock_fini(void *arg) +{ + xrep0_sock *s = arg; + + nni_aio_stop(s->aio_getq); + nni_aio_fini(s->aio_getq); + nni_idhash_fini(s->pipes); + nni_mtx_fini(&s->lk); + NNI_FREE_STRUCT(s); +} + +static int +xrep0_sock_init(void **sp, nni_sock *sock) +{ + xrep0_sock *s; + int rv; + + if ((s = NNI_ALLOC_STRUCT(s)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&s->lk); + if (((rv = nni_idhash_init(&s->pipes)) != 0) || + ((rv = nni_aio_init(&s->aio_getq, xrep0_sock_getq_cb, s)) != 0)) { + xrep0_sock_fini(s); + return (rv); + } + + s->ttl = 8; // Per RFC + s->uwq = nni_sock_sendq(sock); + s->urq = nni_sock_recvq(sock); + + *sp = s; + + return (0); +} + +static void +xrep0_sock_open(void *arg) +{ + xrep0_sock *s = arg; + + // This starts us retrieving message from the upper write q. + nni_msgq_aio_get(s->uwq, s->aio_getq); +} + +static void +xrep0_sock_close(void *arg) +{ + xrep0_sock *s = arg; + + nni_aio_abort(s->aio_getq, NNG_ECLOSED); +} + +static void +xrep0_pipe_fini(void *arg) +{ + xrep0_pipe *p = arg; + + nni_aio_fini(p->aio_getq); + nni_aio_fini(p->aio_send); + nni_aio_fini(p->aio_recv); + nni_aio_fini(p->aio_putq); + nni_msgq_fini(p->sendq); + NNI_FREE_STRUCT(p); +} + +static int +xrep0_pipe_init(void **pp, nni_pipe *pipe, void *s) +{ + xrep0_pipe *p; + int rv; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + + // We want a pretty deep sendq on pipes. The rationale here is + // that the send rate will be mitigated by the receive rate. + // If a slow pipe (req pipe not reading its own responses!?) + // comes up, then we will start discarding its replies eventually, + // but it takes some time. It would be poor form for a peer to + // smash us with requests, but be unable to handle replies faster + // than we can forward them. If they do that, their replies get + // dropped. (From a DDoS perspective, it might be nice in the + // future if we had a way to exert backpressure to the send side -- + // essentially don't let peers send requests faster than they are + // willing to receive replies. Something to think about for the + // future.) + if (((rv = nni_msgq_init(&p->sendq, 64)) != 0) || + ((rv = nni_aio_init(&p->aio_getq, xrep0_pipe_getq_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_send, xrep0_pipe_send_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_recv, xrep0_pipe_recv_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_putq, xrep0_pipe_putq_cb, p)) != 0)) { + xrep0_pipe_fini(p); + return (rv); + } + + p->pipe = pipe; + p->rep = s; + *pp = p; + return (0); +} + +static int +xrep0_pipe_start(void *arg) +{ + xrep0_pipe *p = arg; + xrep0_sock *s = p->rep; + int rv; + + if ((rv = nni_idhash_insert(s->pipes, nni_pipe_id(p->pipe), p)) != 0) { + return (rv); + } + + nni_msgq_aio_get(p->sendq, p->aio_getq); + nni_pipe_recv(p->pipe, p->aio_recv); + return (0); +} + +static void +xrep0_pipe_stop(void *arg) +{ + xrep0_pipe *p = arg; + xrep0_sock *s = p->rep; + + nni_msgq_close(p->sendq); + nni_aio_stop(p->aio_getq); + nni_aio_stop(p->aio_send); + nni_aio_stop(p->aio_recv); + nni_aio_stop(p->aio_putq); + + nni_idhash_remove(s->pipes, nni_pipe_id(p->pipe)); +} + +static void +xrep0_sock_getq_cb(void *arg) +{ + xrep0_sock *s = arg; + nni_msgq * uwq = s->uwq; + nni_msg * msg; + uint32_t id; + xrep0_pipe *p; + int rv; + + // This watches for messages from the upper write queue, + // extracts the destination pipe, and forwards it to the appropriate + // destination pipe via a separate queue. This prevents a single bad + // or slow pipe from gumming up the works for the entire socket. + + if (nni_aio_result(s->aio_getq) != 0) { + // Closed socket? + return; + } + + msg = nni_aio_get_msg(s->aio_getq); + nni_aio_set_msg(s->aio_getq, NULL); + + // We yank the outgoing pipe id from the header + if (nni_msg_header_len(msg) < 4) { + nni_msg_free(msg); + + // Look for another message on the upper write queue. + nni_msgq_aio_get(uwq, s->aio_getq); + return; + } + + id = nni_msg_header_trim_u32(msg); + + // Look for the pipe, and attempt to put the message there + // (nonblocking) if we can. If we can't for any reason, then we + // free the message. + if ((rv = nni_idhash_find(s->pipes, id, (void **) &p)) == 0) { + rv = nni_msgq_tryput(p->sendq, msg); + } + if (rv != 0) { + nni_msg_free(msg); + } + + // Now look for another message on the upper write queue. + nni_msgq_aio_get(uwq, s->aio_getq); +} + +static void +xrep0_pipe_getq_cb(void *arg) +{ + xrep0_pipe *p = arg; + + if (nni_aio_result(p->aio_getq) != 0) { + nni_pipe_stop(p->pipe); + return; + } + + nni_aio_set_msg(p->aio_send, nni_aio_get_msg(p->aio_getq)); + nni_aio_set_msg(p->aio_getq, NULL); + + nni_pipe_send(p->pipe, p->aio_send); +} + +static void +xrep0_pipe_send_cb(void *arg) +{ + xrep0_pipe *p = arg; + + if (nni_aio_result(p->aio_send) != 0) { + nni_msg_free(nni_aio_get_msg(p->aio_send)); + nni_aio_set_msg(p->aio_send, NULL); + nni_pipe_stop(p->pipe); + return; + } + + nni_msgq_aio_get(p->sendq, p->aio_getq); +} + +static void +xrep0_pipe_recv_cb(void *arg) +{ + xrep0_pipe *p = arg; + xrep0_sock *s = p->rep; + nni_msg * msg; + int rv; + uint8_t * body; + int hops; + + if (nni_aio_result(p->aio_recv) != 0) { + nni_pipe_stop(p->pipe); + return; + } + + msg = nni_aio_get_msg(p->aio_recv); + nni_aio_set_msg(p->aio_recv, NULL); + + nni_msg_set_pipe(msg, nni_pipe_id(p->pipe)); + + // Store the pipe id in the header, first thing. + rv = nni_msg_header_append_u32(msg, nni_pipe_id(p->pipe)); + if (rv != 0) { + // Failure here causes us to drop the message. + goto drop; + } + + // Move backtrace from body to header + hops = 1; + for (;;) { + int end = 0; + if (hops > s->ttl) { + // This isn't malformed, but it has gone through + // too many hops. Do not disconnect, because we + // can legitimately receive messages with too many + // hops from devices, etc. + goto drop; + } + hops++; + if (nni_msg_len(msg) < 4) { + // Peer is speaking garbage. Kick it. + nni_msg_free(msg); + nni_pipe_stop(p->pipe); + return; + } + body = nni_msg_body(msg); + end = (body[0] & 0x80) ? 1 : 0; + rv = nni_msg_header_append(msg, body, 4); + if (rv != 0) { + // Out of memory most likely, but keep going to + // avoid breaking things. + goto drop; + } + nni_msg_trim(msg, 4); + if (end) { + break; + } + } + + // Go ahead and send it up. + nni_aio_set_msg(p->aio_putq, msg); + nni_msgq_aio_put(s->urq, p->aio_putq); + return; + +drop: + nni_msg_free(msg); + nni_pipe_recv(p->pipe, p->aio_recv); +} + +static void +xrep0_pipe_putq_cb(void *arg) +{ + xrep0_pipe *p = arg; + + if (nni_aio_result(p->aio_putq) != 0) { + nni_msg_free(nni_aio_get_msg(p->aio_putq)); + nni_aio_set_msg(p->aio_putq, NULL); + nni_pipe_stop(p->pipe); + return; + } + + nni_pipe_recv(p->pipe, p->aio_recv); +} + +static int +xrep0_sock_setopt_maxttl(void *arg, const void *buf, size_t sz, int typ) +{ + xrep0_sock *s = arg; + return (nni_copyin_int(&s->ttl, buf, sz, 1, 255, typ)); +} + +static int +xrep0_sock_getopt_maxttl(void *arg, void *buf, size_t *szp, int typ) +{ + xrep0_sock *s = arg; + return (nni_copyout_int(s->ttl, buf, szp, typ)); +} + +static void +xrep0_sock_send(void *arg, nni_aio *aio) +{ + xrep0_sock *s = arg; + + nni_msgq_aio_put(s->uwq, aio); +} + +static void +xrep0_sock_recv(void *arg, nni_aio *aio) +{ + xrep0_sock *s = arg; + + nni_msgq_aio_get(s->urq, aio); +} + +// This is the global protocol structure -- our linkage to the core. +// This should be the only global non-static symbol in this file. +static nni_proto_pipe_ops xrep0_pipe_ops = { + .pipe_init = xrep0_pipe_init, + .pipe_fini = xrep0_pipe_fini, + .pipe_start = xrep0_pipe_start, + .pipe_stop = xrep0_pipe_stop, +}; + +static nni_proto_sock_option xrep0_sock_options[] = { + { + .pso_name = NNG_OPT_MAXTTL, + .pso_type = NNI_TYPE_INT32, + .pso_getopt = xrep0_sock_getopt_maxttl, + .pso_setopt = xrep0_sock_setopt_maxttl, + }, + // terminate list + { + .pso_name = NULL, + }, +}; + +static nni_proto_sock_ops xrep0_sock_ops = { + .sock_init = xrep0_sock_init, + .sock_fini = xrep0_sock_fini, + .sock_open = xrep0_sock_open, + .sock_close = xrep0_sock_close, + .sock_options = xrep0_sock_options, + .sock_filter = NULL, // No filtering for raw mode + .sock_send = xrep0_sock_send, + .sock_recv = xrep0_sock_recv, +}; + +static nni_proto xrep0_proto = { + .proto_version = NNI_PROTOCOL_VERSION, + .proto_self = { NNI_PROTO_REP_V0, "rep" }, + .proto_peer = { NNI_PROTO_REQ_V0, "req" }, + .proto_flags = NNI_PROTO_FLAG_SNDRCV | NNI_PROTO_FLAG_RAW, + .proto_sock_ops = &xrep0_sock_ops, + .proto_pipe_ops = &xrep0_pipe_ops, +}; + +int +nng_rep0_open_raw(nng_socket *sidp) +{ + return (nni_proto_open(sidp, &xrep0_proto)); +} diff --git a/src/protocol/reqrep0/xreq.c b/src/protocol/reqrep0/xreq.c new file mode 100644 index 00000000..5c1841b2 --- /dev/null +++ b/src/protocol/reqrep0/xreq.c @@ -0,0 +1,324 @@ +// +// Copyright 2018 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/nng_impl.h" +#include "protocol/reqrep0/req.h" + +// Request protocol. The REQ protocol is the "request" side of a +// request-reply pair. This is useful for building RPC clients, for example. + +#ifndef NNI_PROTO_REQ_V0 +#define NNI_PROTO_REQ_V0 NNI_PROTO(3, 0) +#endif + +#ifndef NNI_PROTO_REP_V0 +#define NNI_PROTO_REP_V0 NNI_PROTO(3, 1) +#endif + +typedef struct xreq0_pipe xreq0_pipe; +typedef struct xreq0_sock xreq0_sock; + +// An xreq0_sock is our per-socket protocol private structure. +struct xreq0_sock { + nni_msgq *uwq; + nni_msgq *urq; + int ttl; +}; + +// A req0_pipe is our per-pipe protocol private structure. +struct xreq0_pipe { + nni_pipe * pipe; + xreq0_sock *req; + nni_aio * aio_getq; + nni_aio * aio_send; + nni_aio * aio_recv; + nni_aio * aio_putq; +}; + +static void xreq0_sock_fini(void *); +static void xreq0_getq_cb(void *); +static void xreq0_send_cb(void *); +static void xreq0_recv_cb(void *); +static void xreq0_putq_cb(void *); + +static int +xreq0_sock_init(void **sp, nni_sock *sock) +{ + xreq0_sock *s; + + if ((s = NNI_ALLOC_STRUCT(s)) == NULL) { + return (NNG_ENOMEM); + } + + s->ttl = 8; + s->uwq = nni_sock_sendq(sock); + s->urq = nni_sock_recvq(sock); + *sp = s; + + return (0); +} + +static void +xreq0_sock_open(void *arg) +{ + NNI_ARG_UNUSED(arg); +} + +static void +xreq0_sock_close(void *arg) +{ + NNI_ARG_UNUSED(arg); +} + +static void +xreq0_sock_fini(void *arg) +{ + xreq0_sock *s = arg; + + NNI_FREE_STRUCT(s); +} + +static void +xreq0_pipe_fini(void *arg) +{ + xreq0_pipe *p = arg; + + nni_aio_fini(p->aio_getq); + nni_aio_fini(p->aio_putq); + nni_aio_fini(p->aio_recv); + nni_aio_fini(p->aio_send); + NNI_FREE_STRUCT(p); +} + +static int +xreq0_pipe_init(void **pp, nni_pipe *pipe, void *s) +{ + xreq0_pipe *p; + int rv; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + if (((rv = nni_aio_init(&p->aio_getq, xreq0_getq_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_putq, xreq0_putq_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_recv, xreq0_recv_cb, p)) != 0) || + ((rv = nni_aio_init(&p->aio_send, xreq0_send_cb, p)) != 0)) { + xreq0_pipe_fini(p); + return (rv); + } + + p->pipe = pipe; + p->req = s; + *pp = p; + return (0); +} + +static int +xreq0_pipe_start(void *arg) +{ + xreq0_pipe *p = arg; + xreq0_sock *s = p->req; + + if (nni_pipe_peer(p->pipe) != NNI_PROTO_REP_V0) { + return (NNG_EPROTO); + } + + nni_msgq_aio_get(s->uwq, p->aio_getq); + nni_pipe_recv(p->pipe, p->aio_recv); + return (0); +} + +static void +xreq0_pipe_stop(void *arg) +{ + xreq0_pipe *p = arg; + + nni_aio_stop(p->aio_getq); + nni_aio_stop(p->aio_putq); + nni_aio_stop(p->aio_recv); + nni_aio_stop(p->aio_send); + + // At this point there should not be any further AIOs running. + // Further, any completion tasks have completed. +} + +// For raw mode we can just let the pipes "contend" via getq to get a +// message from the upper write queue. The msgqueue implementation +// actually provides ordering, so load will be spread automatically. +// (NB: We may have to revise this in the future if we want to provide some +// kind of priority.) + +static void +xreq0_getq_cb(void *arg) +{ + xreq0_pipe *p = arg; + + if (nni_aio_result(p->aio_getq) != 0) { + nni_pipe_stop(p->pipe); + return; + } + + nni_aio_set_msg(p->aio_send, nni_aio_get_msg(p->aio_getq)); + nni_aio_set_msg(p->aio_getq, NULL); + + nni_pipe_send(p->pipe, p->aio_send); +} + +static void +xreq0_send_cb(void *arg) +{ + xreq0_pipe *p = arg; + + if (nni_aio_result(p->aio_send) != 0) { + nni_msg_free(nni_aio_get_msg(p->aio_send)); + nni_aio_set_msg(p->aio_send, NULL); + nni_pipe_stop(p->pipe); + return; + } + + // Sent a message so we just need to look for another one. + nni_msgq_aio_get(p->req->uwq, p->aio_getq); +} + +static void +xreq0_putq_cb(void *arg) +{ + xreq0_pipe *p = arg; + + if (nni_aio_result(p->aio_putq) != 0) { + nni_msg_free(nni_aio_get_msg(p->aio_putq)); + nni_aio_set_msg(p->aio_putq, NULL); + nni_pipe_stop(p->pipe); + return; + } + nni_aio_set_msg(p->aio_putq, NULL); + + nni_pipe_recv(p->pipe, p->aio_recv); +} + +static void +xreq0_recv_cb(void *arg) +{ + xreq0_pipe *p = arg; + xreq0_sock *sock = p->req; + nni_msg * msg; + uint32_t id; + + if (nni_aio_result(p->aio_recv) != 0) { + nni_pipe_stop(p->pipe); + return; + } + + msg = nni_aio_get_msg(p->aio_recv); + nni_aio_set_msg(p->aio_recv, NULL); + nni_msg_set_pipe(msg, nni_pipe_id(p->pipe)); + + // We yank 4 bytes from front of body, and move them to the header. + if (nni_msg_len(msg) < 4) { + // Malformed message. + goto malformed; + } + id = nni_msg_trim_u32(msg); + if (nni_msg_header_append_u32(msg, id) != 0) { + // Arguably we could just discard and carry on. But + // dropping the connection is probably more helpful since + // it lets the other side see that a problem occurred. + // Plus it gives us a chance to reclaim some memory. + goto malformed; + } + + nni_aio_set_msg(p->aio_putq, msg); + nni_msgq_aio_put(sock->urq, p->aio_putq); + return; + +malformed: + nni_msg_free(msg); + nni_pipe_stop(p->pipe); +} + +static void +xreq0_sock_send(void *arg, nni_aio *aio) +{ + xreq0_sock *s = arg; + + nni_msgq_aio_put(s->uwq, aio); +} + +static void +xreq0_sock_recv(void *arg, nni_aio *aio) +{ + xreq0_sock *s = arg; + + nni_msgq_aio_get(s->urq, aio); +} + +static int +xreq0_sock_setopt_maxttl(void *arg, const void *buf, size_t sz, int typ) +{ + xreq0_sock *s = arg; + return (nni_copyin_int(&s->ttl, buf, sz, 1, 255, typ)); +} + +static int +xreq0_sock_getopt_maxttl(void *arg, void *buf, size_t *szp, int typ) +{ + xreq0_sock *s = arg; + return (nni_copyout_int(s->ttl, buf, szp, typ)); +} + +static nni_proto_pipe_ops xreq0_pipe_ops = { + .pipe_init = xreq0_pipe_init, + .pipe_fini = xreq0_pipe_fini, + .pipe_start = xreq0_pipe_start, + .pipe_stop = xreq0_pipe_stop, +}; + +static nni_proto_sock_option xreq0_sock_options[] = { + { + .pso_name = NNG_OPT_MAXTTL, + .pso_type = NNI_TYPE_INT32, + .pso_getopt = xreq0_sock_getopt_maxttl, + .pso_setopt = xreq0_sock_setopt_maxttl, + }, + // terminate list + { + .pso_name = NULL, + }, +}; + +static nni_proto_sock_ops xreq0_sock_ops = { + .sock_init = xreq0_sock_init, + .sock_fini = xreq0_sock_fini, + .sock_open = xreq0_sock_open, + .sock_close = xreq0_sock_close, + .sock_options = xreq0_sock_options, + .sock_send = xreq0_sock_send, + .sock_recv = xreq0_sock_recv, +}; + +static nni_proto xreq0_proto = { + .proto_version = NNI_PROTOCOL_VERSION, + .proto_self = { NNI_PROTO_REQ_V0, "req" }, + .proto_peer = { NNI_PROTO_REP_V0, "rep" }, + .proto_flags = NNI_PROTO_FLAG_SNDRCV | NNI_PROTO_FLAG_RAW, + .proto_sock_ops = &xreq0_sock_ops, + .proto_pipe_ops = &xreq0_pipe_ops, + .proto_ctx_ops = NULL, // raw mode does not support contexts +}; + +int +nng_req0_open_raw(nng_socket *sidp) +{ + return (nni_proto_open(sidp, &xreq0_proto)); +} |
