diff options
Diffstat (limited to 'src/sp/transport')
| -rw-r--r-- | src/sp/transport/CMakeLists.txt | 19 | ||||
| -rw-r--r-- | src/sp/transport/inproc/CMakeLists.txt | 16 | ||||
| -rw-r--r-- | src/sp/transport/inproc/inproc.c | 692 | ||||
| -rw-r--r-- | src/sp/transport/ipc/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | src/sp/transport/ipc/ipc.c | 1171 | ||||
| -rw-r--r-- | src/sp/transport/ipc/ipc_test.c | 395 | ||||
| -rw-r--r-- | src/sp/transport/tcp/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | src/sp/transport/tcp/tcp.c | 1263 | ||||
| -rw-r--r-- | src/sp/transport/tcp/tcp_test.c | 297 | ||||
| -rw-r--r-- | src/sp/transport/tls/CMakeLists.txt | 16 | ||||
| -rw-r--r-- | src/sp/transport/tls/tls.c | 1292 | ||||
| -rw-r--r-- | src/sp/transport/ws/CMakeLists.txt | 24 | ||||
| -rw-r--r-- | src/sp/transport/ws/README.adoc | 38 | ||||
| -rw-r--r-- | src/sp/transport/ws/websocket.c | 740 | ||||
| -rw-r--r-- | src/sp/transport/ws/ws_test.c | 181 | ||||
| -rw-r--r-- | src/sp/transport/zerotier/CMakeLists.txt | 37 | ||||
| -rw-r--r-- | src/sp/transport/zerotier/zerotier.c | 3241 | ||||
| -rw-r--r-- | src/sp/transport/zerotier/zthash.c | 302 | ||||
| -rw-r--r-- | src/sp/transport/zerotier/zthash.h | 43 |
19 files changed, 9801 insertions, 0 deletions
diff --git a/src/sp/transport/CMakeLists.txt b/src/sp/transport/CMakeLists.txt new file mode 100644 index 00000000..add8a9c9 --- /dev/null +++ b/src/sp/transport/CMakeLists.txt @@ -0,0 +1,19 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staystail.tech> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# Transports. +nng_directory(transport) + +add_subdirectory(inproc) +add_subdirectory(ipc) +add_subdirectory(tcp) +add_subdirectory(tls) +add_subdirectory(ws) +add_subdirectory(zerotier) + diff --git a/src/sp/transport/inproc/CMakeLists.txt b/src/sp/transport/inproc/CMakeLists.txt new file mode 100644 index 00000000..317686bb --- /dev/null +++ b/src/sp/transport/inproc/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +# Copyright 2018 Capitar IT Group BV <info@capitar.com> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# inproc protocol +nng_directory(inproc) + +nng_sources_if(NNG_TRANSPORT_INPROC inproc.c) +nng_headers_if(NNG_TRANSPORT_INPROC nng/transport/inproc/inproc.h) +nng_defines_if(NNG_TRANSPORT_INPROC NNG_TRANSPORT_INPROC)
\ No newline at end of file diff --git a/src/sp/transport/inproc/inproc.c b/src/sp/transport/inproc/inproc.c new file mode 100644 index 00000000..84e2c625 --- /dev/null +++ b/src/sp/transport/inproc/inproc.c @@ -0,0 +1,692 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// Copyright 2018 Devolutions <info@devolutions.net> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <string.h> + +#include "core/nng_impl.h" + +// Inproc transport. This just transports messages from one +// peer to another. The inproc transport is only valid within the same +// process. + +typedef struct inproc_pair inproc_pair; +typedef struct inproc_pipe inproc_pipe; +typedef struct inproc_ep inproc_ep; +typedef struct inproc_queue inproc_queue; + +typedef struct { + nni_mtx mx; + nni_list servers; +} inproc_global; + +// inproc_pipe represents one half of a connection. +struct inproc_pipe { + const char * addr; + inproc_pair * pair; + inproc_queue *recv_queue; + inproc_queue *send_queue; + uint16_t peer; + uint16_t proto; +}; + +struct inproc_queue { + nni_list readers; + nni_list writers; + nni_mtx lock; + bool closed; +}; + +// inproc_pair represents a pair of pipes. Because we control both +// sides of the pipes, we can allocate and free this in one structure. +struct inproc_pair { + nni_atomic_int ref; + inproc_queue queues[2]; +}; + +struct inproc_ep { + const char * addr; + bool listener; + nni_list_node node; + uint16_t proto; + nni_cv cv; + nni_list clients; + nni_list aios; + size_t rcvmax; + nni_mtx mtx; +}; + +// nni_inproc is our global state - this contains the list of active endpoints +// which we use for coordinating rendezvous. +static inproc_global nni_inproc; + +static int +inproc_init(void) +{ + NNI_LIST_INIT(&nni_inproc.servers, inproc_ep, node); + + nni_mtx_init(&nni_inproc.mx); + return (0); +} + +static void +inproc_fini(void) +{ + nni_mtx_fini(&nni_inproc.mx); +} + +// inproc_pair destroy is called when both pipe-ends of the pipe +// have been destroyed. +static void +inproc_pair_destroy(inproc_pair *pair) +{ + for (int i = 0; i < 2; i++) { + nni_mtx_fini(&pair->queues[i].lock); + } + NNI_FREE_STRUCT(pair); +} + +static int +inproc_pipe_alloc(inproc_pipe **pipep, inproc_ep *ep) +{ + inproc_pipe *pipe; + + if ((pipe = NNI_ALLOC_STRUCT(pipe)) == NULL) { + return (NNG_ENOMEM); + } + + pipe->proto = ep->proto; + pipe->addr = ep->addr; + *pipep = pipe; + return (0); +} + +static int +inproc_pipe_init(void *arg, nni_pipe *p) +{ + NNI_ARG_UNUSED(arg); + NNI_ARG_UNUSED(p); + return (0); +} + +static void +inproc_pipe_fini(void *arg) +{ + inproc_pipe *pipe = arg; + inproc_pair *pair; + + if ((pair = pipe->pair) != NULL) { + // If we are the last peer, then toss the pair structure. + if (nni_atomic_dec_nv(&pair->ref) == 0) { + inproc_pair_destroy(pair); + } + } + + NNI_FREE_STRUCT(pipe); +} + +static void +inproc_queue_run_closed(inproc_queue *queue) +{ + nni_aio *aio; + while (((aio = nni_list_first(&queue->readers)) != NULL) || + ((aio = nni_list_first(&queue->writers)) != NULL)) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, NNG_ECLOSED); + } +} + +static void +inproc_queue_run(inproc_queue *queue) +{ + if (queue->closed) { + inproc_queue_run_closed(queue); + } + for (;;) { + nni_aio *rd; + nni_aio *wr; + nni_msg *msg; + nni_msg *pu; + + if (((rd = nni_list_first(&queue->readers)) == NULL) || + ((wr = nni_list_first(&queue->writers)) == NULL)) { + return; + } + + msg = nni_aio_get_msg(wr); + NNI_ASSERT(msg != NULL); + + // At this point, we pass success back to the caller. If + // we drop the message for any reason, its accounted on the + // receiver side. + nni_aio_list_remove(wr); + nni_aio_set_msg(wr, NULL); + nni_aio_finish( + wr, 0, nni_msg_len(msg) + nni_msg_header_len(msg)); + + // TODO: We could check the max receive size here. + + // Now the receive side. We need to ensure that we have + // an exclusive copy of the message, and pull the header + // up into the body to match protocol expectations. + if ((pu = nni_msg_pull_up(msg)) == NULL) { + nni_msg_free(msg); + continue; + } + msg = pu; + + nni_aio_list_remove(rd); + nni_aio_set_msg(rd, msg); + nni_aio_finish(rd, 0, nni_msg_len(msg)); + } +} + +static void +inproc_queue_cancel(nni_aio *aio, void *arg, int rv) +{ + inproc_queue *queue = arg; + + nni_mtx_lock(&queue->lock); + if (nni_aio_list_active(aio)) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&queue->lock); +} + +static void +inproc_pipe_send(void *arg, nni_aio *aio) +{ + inproc_pipe * pipe = arg; + inproc_queue *queue = pipe->send_queue; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + + nni_mtx_lock(&queue->lock); + if ((rv = nni_aio_schedule(aio, inproc_queue_cancel, queue)) != 0) { + nni_mtx_unlock(&queue->lock); + nni_aio_finish_error(aio, rv); + return; + } + nni_aio_list_append(&queue->writers, aio); + inproc_queue_run(queue); + nni_mtx_unlock(&queue->lock); +} + +static void +inproc_pipe_recv(void *arg, nni_aio *aio) +{ + inproc_pipe * pipe = arg; + inproc_queue *queue = pipe->recv_queue; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + + nni_mtx_lock(&queue->lock); + if ((rv = nni_aio_schedule(aio, inproc_queue_cancel, queue)) != 0) { + nni_mtx_unlock(&queue->lock); + nni_aio_finish_error(aio, rv); + return; + } + nni_aio_list_append(&queue->readers, aio); + inproc_queue_run(queue); + nni_mtx_unlock(&queue->lock); +} + +static void +inproc_pipe_close(void *arg) +{ + inproc_pipe *pipe = arg; + inproc_pair *pair = pipe->pair; + + for (int i = 0; i < 2; i++) { + inproc_queue *queue = &pair->queues[i]; + nni_mtx_lock(&queue->lock); + queue->closed = true; + inproc_queue_run_closed(queue); + nni_mtx_unlock(&queue->lock); + } +} + +static uint16_t +inproc_pipe_peer(void *arg) +{ + inproc_pipe *pipe = arg; + + return (pipe->peer); +} + +static int +inproc_pipe_get_addr(void *arg, void *buf, size_t *szp, nni_opt_type t) +{ + inproc_pipe *p = arg; + nni_sockaddr sa; + + memset(&sa, 0, sizeof(sa)); + sa.s_inproc.sa_family = NNG_AF_INPROC; + nni_strlcpy(sa.s_inproc.sa_name, p->addr, sizeof(sa.s_inproc.sa_name)); + return (nni_copyout_sockaddr(&sa, buf, szp, t)); +} + +static int +inproc_dialer_init(void **epp, nni_url *url, nni_dialer *ndialer) +{ + inproc_ep *ep; + nni_sock * sock = nni_dialer_sock(ndialer); + + if ((ep = NNI_ALLOC_STRUCT(ep)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&ep->mtx); + + ep->listener = false; + ep->proto = nni_sock_proto_id(sock); + ep->rcvmax = 0; + NNI_LIST_INIT(&ep->clients, inproc_ep, node); + nni_aio_list_init(&ep->aios); + + ep->addr = url->u_rawurl; // we match on the full URL. + + *epp = ep; + return (0); +} + +static int +inproc_listener_init(void **epp, nni_url *url, nni_listener *nlistener) +{ + inproc_ep *ep; + nni_sock * sock = nni_listener_sock(nlistener); + + if ((ep = NNI_ALLOC_STRUCT(ep)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&ep->mtx); + + ep->listener = true; + ep->proto = nni_sock_proto_id(sock); + ep->rcvmax = 0; + NNI_LIST_INIT(&ep->clients, inproc_ep, node); + nni_aio_list_init(&ep->aios); + + ep->addr = url->u_rawurl; // we match on the full URL. + + *epp = ep; + return (0); +} + +static void +inproc_ep_fini(void *arg) +{ + inproc_ep *ep = arg; + nni_mtx_fini(&ep->mtx); + NNI_FREE_STRUCT(ep); +} + +static void +inproc_conn_finish(nni_aio *aio, int rv, inproc_ep *ep, inproc_pipe *pipe) +{ + nni_aio_list_remove(aio); + + if ((!ep->listener) && nni_list_empty(&ep->aios)) { + nni_list_node_remove(&ep->node); + } + + if (rv == 0) { + nni_aio_set_output(aio, 0, pipe); + nni_aio_finish(aio, 0, 0); + } else { + NNI_ASSERT(pipe == NULL); + nni_aio_finish_error(aio, rv); + } +} + +static void +inproc_ep_close(void *arg) +{ + inproc_ep *ep = arg; + inproc_ep *client; + nni_aio * aio; + + nni_mtx_lock(&nni_inproc.mx); + if (nni_list_active(&nni_inproc.servers, ep)) { + nni_list_remove(&nni_inproc.servers, ep); + } + // Notify any waiting clients that we are closed. + while ((client = nni_list_first(&ep->clients)) != NULL) { + while ((aio = nni_list_first(&client->aios)) != NULL) { + inproc_conn_finish(aio, NNG_ECONNREFUSED, ep, NULL); + } + nni_list_remove(&ep->clients, client); + } + while ((aio = nni_list_first(&ep->aios)) != NULL) { + inproc_conn_finish(aio, NNG_ECLOSED, ep, NULL); + } + nni_mtx_unlock(&nni_inproc.mx); +} + +static void +inproc_accept_clients(inproc_ep *srv) +{ + inproc_ep *cli, *nclient; + + nclient = nni_list_first(&srv->clients); + while ((cli = nclient) != NULL) { + nni_aio *caio; + nclient = nni_list_next(&srv->clients, nclient); + NNI_LIST_FOREACH (&cli->aios, caio) { + + inproc_pipe *cpipe; + inproc_pipe *spipe; + inproc_pair *pair; + nni_aio * saio; + int rv; + + if ((saio = nni_list_first(&srv->aios)) == NULL) { + // No outstanding accept() calls. + break; + } + + if ((pair = NNI_ALLOC_STRUCT(pair)) == NULL) { + inproc_conn_finish( + caio, NNG_ENOMEM, cli, NULL); + inproc_conn_finish( + saio, NNG_ENOMEM, srv, NULL); + continue; + } + for (int i = 0; i < 2; i++) { + nni_aio_list_init(&pair->queues[i].readers); + nni_aio_list_init(&pair->queues[i].writers); + nni_mtx_init(&pair->queues[i].lock); + } + nni_atomic_init(&pair->ref); + nni_atomic_set(&pair->ref, 2); + + spipe = cpipe = NULL; + if (((rv = inproc_pipe_alloc(&cpipe, cli)) != 0) || + ((rv = inproc_pipe_alloc(&spipe, srv)) != 0)) { + + if (cpipe != NULL) { + inproc_pipe_fini(cpipe); + } + if (spipe != NULL) { + inproc_pipe_fini(spipe); + } + inproc_conn_finish(caio, rv, cli, NULL); + inproc_conn_finish(saio, rv, srv, NULL); + inproc_pair_destroy(pair); + continue; + } + + cpipe->peer = spipe->proto; + spipe->peer = cpipe->proto; + cpipe->pair = pair; + spipe->pair = pair; + cpipe->send_queue = &pair->queues[0]; + cpipe->recv_queue = &pair->queues[1]; + spipe->send_queue = &pair->queues[1]; + spipe->recv_queue = &pair->queues[0]; + + inproc_conn_finish(caio, 0, cli, cpipe); + inproc_conn_finish(saio, 0, srv, spipe); + } + + if (nni_list_first(&cli->aios) == NULL) { + // No more outstanding client connects. + // Normally there should only be one. + if (nni_list_active(&srv->clients, cli)) { + nni_list_remove(&srv->clients, cli); + } + } + } +} + +static void +inproc_ep_cancel(nni_aio *aio, void *arg, int rv) +{ + inproc_ep *ep = arg; + + nni_mtx_lock(&nni_inproc.mx); + if (nni_aio_list_active(aio)) { + nni_aio_list_remove(aio); + nni_list_node_remove(&ep->node); + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&nni_inproc.mx); +} + +static void +inproc_ep_connect(void *arg, nni_aio *aio) +{ + inproc_ep *ep = arg; + inproc_ep *server; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + + nni_mtx_lock(&nni_inproc.mx); + + // Find a server. + NNI_LIST_FOREACH (&nni_inproc.servers, server) { + if (strcmp(server->addr, ep->addr) == 0) { + break; + } + } + if (server == NULL) { + nni_mtx_unlock(&nni_inproc.mx); + nni_aio_finish_error(aio, NNG_ECONNREFUSED); + return; + } + + // We don't have to worry about the case where a zero timeout + // on connect was specified, as there is no option to specify + // that in the upper API. + if ((rv = nni_aio_schedule(aio, inproc_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&nni_inproc.mx); + nni_aio_finish_error(aio, rv); + return; + } + + nni_list_append(&server->clients, ep); + nni_aio_list_append(&ep->aios, aio); + + inproc_accept_clients(server); + nni_mtx_unlock(&nni_inproc.mx); +} + +static int +inproc_ep_bind(void *arg) +{ + inproc_ep *ep = arg; + inproc_ep *srch; + nni_list * list = &nni_inproc.servers; + + nni_mtx_lock(&nni_inproc.mx); + NNI_LIST_FOREACH (list, srch) { + if (strcmp(srch->addr, ep->addr) == 0) { + nni_mtx_unlock(&nni_inproc.mx); + return (NNG_EADDRINUSE); + } + } + nni_list_append(list, ep); + nni_mtx_unlock(&nni_inproc.mx); + return (0); +} + +static void +inproc_ep_accept(void *arg, nni_aio *aio) +{ + inproc_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + + nni_mtx_lock(&nni_inproc.mx); + + // We need not worry about the case where a non-blocking + // accept was tried -- there is no API to do such a thing. + if ((rv = nni_aio_schedule(aio, inproc_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&nni_inproc.mx); + nni_aio_finish_error(aio, rv); + return; + } + + // We are already on the master list of servers, thanks to bind. + // Insert us into pending server aios, and then run accept list. + nni_aio_list_append(&ep->aios, aio); + inproc_accept_clients(ep); + nni_mtx_unlock(&nni_inproc.mx); +} + +static int +inproc_ep_get_recvmaxsz(void *arg, void *v, size_t *szp, nni_opt_type t) +{ + inproc_ep *ep = arg; + int rv; + nni_mtx_lock(&ep->mtx); + rv = nni_copyout_size(ep->rcvmax, v, szp, t); + nni_mtx_unlock(&ep->mtx); + return (rv); +} + +static int +inproc_ep_set_recvmaxsz(void *arg, const void *v, size_t sz, nni_opt_type t) +{ + inproc_ep *ep = arg; + size_t val; + int rv; + if ((rv = nni_copyin_size(&val, v, sz, 0, NNI_MAXSZ, t)) == 0) { + nni_mtx_lock(&ep->mtx); + ep->rcvmax = val; + nni_mtx_unlock(&ep->mtx); + } + return (rv); +} + +static int +inproc_ep_get_addr(void *arg, void *v, size_t *szp, nni_opt_type t) +{ + inproc_ep * ep = arg; + nng_sockaddr sa; + sa.s_inproc.sa_family = NNG_AF_INPROC; + nni_strlcpy( + sa.s_inproc.sa_name, ep->addr, sizeof(sa.s_inproc.sa_name)); + return (nni_copyout_sockaddr(&sa, v, szp, t)); +} + +static const nni_option inproc_pipe_options[] = { + { + .o_name = NNG_OPT_LOCADDR, + .o_get = inproc_pipe_get_addr, + }, + { + .o_name = NNG_OPT_REMADDR, + .o_get = inproc_pipe_get_addr, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static int +inproc_pipe_getopt( + void *arg, const char *name, void *v, size_t *szp, nni_type t) +{ + return (nni_getopt(inproc_pipe_options, name, arg, v, szp, t)); +} + +static nni_tran_pipe_ops inproc_pipe_ops = { + .p_init = inproc_pipe_init, + .p_fini = inproc_pipe_fini, + .p_send = inproc_pipe_send, + .p_recv = inproc_pipe_recv, + .p_close = inproc_pipe_close, + .p_peer = inproc_pipe_peer, + .p_getopt = inproc_pipe_getopt, +}; + +static const nni_option inproc_ep_options[] = { + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = inproc_ep_get_recvmaxsz, + .o_set = inproc_ep_set_recvmaxsz, + }, + { + .o_name = NNG_OPT_LOCADDR, + .o_get = inproc_ep_get_addr, + }, + { + .o_name = NNG_OPT_REMADDR, + .o_get = inproc_ep_get_addr, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static int +inproc_ep_getopt(void *arg, const char *name, void *v, size_t *szp, nni_type t) +{ + return (nni_getopt(inproc_ep_options, name, arg, v, szp, t)); +} + +static int +inproc_ep_setopt( + void *arg, const char *name, const void *v, size_t sz, nni_type t) +{ + return (nni_setopt(inproc_ep_options, name, arg, v, sz, t)); +} + +static nni_tran_dialer_ops inproc_dialer_ops = { + .d_init = inproc_dialer_init, + .d_fini = inproc_ep_fini, + .d_connect = inproc_ep_connect, + .d_close = inproc_ep_close, + .d_getopt = inproc_ep_getopt, + .d_setopt = inproc_ep_setopt, +}; + +static nni_tran_listener_ops inproc_listener_ops = { + .l_init = inproc_listener_init, + .l_fini = inproc_ep_fini, + .l_bind = inproc_ep_bind, + .l_accept = inproc_ep_accept, + .l_close = inproc_ep_close, + .l_getopt = inproc_ep_getopt, + .l_setopt = inproc_ep_setopt, +}; + +// This is the inproc transport linkage, and should be the only global +// symbol in this entire file. +struct nni_tran nni_inproc_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "inproc", + .tran_dialer = &inproc_dialer_ops, + .tran_listener = &inproc_listener_ops, + .tran_pipe = &inproc_pipe_ops, + .tran_init = inproc_init, + .tran_fini = inproc_fini, +}; + +int +nng_inproc_register(void) +{ + return (nni_tran_register(&nni_inproc_tran)); +} diff --git a/src/sp/transport/ipc/CMakeLists.txt b/src/sp/transport/ipc/CMakeLists.txt new file mode 100644 index 00000000..c9927f75 --- /dev/null +++ b/src/sp/transport/ipc/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +# Copyright 2018 Capitar IT Group BV <info@capitar.com> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# ipc protocol +nng_directory(ipc) + +nng_sources_if(NNG_TRANSPORT_IPC ipc.c) +nng_headers_if(NNG_TRANSPORT_IPC nng/transport/ipc/ipc.h) +nng_defines_if(NNG_TRANSPORT_IPC NNG_TRANSPORT_IPC) +nng_test_if(NNG_TRANSPORT_IPC ipc_test)
\ No newline at end of file diff --git a/src/sp/transport/ipc/ipc.c b/src/sp/transport/ipc/ipc.c new file mode 100644 index 00000000..efaa823c --- /dev/null +++ b/src/sp/transport/ipc/ipc.c @@ -0,0 +1,1171 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// Copyright 2019 Devolutions <info@devolutions.net> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <stdio.h> +#include <stdlib.h> + +#include "core/nng_impl.h" + +#include <nng/transport/ipc/ipc.h> + +// IPC transport. Platform specific IPC operations must be +// supplied as well. Normally the IPC is UNIX domain sockets or +// Windows named pipes. Other platforms could use other mechanisms, +// but all implementations on the platform must use the same mechanism. + +typedef struct ipc_pipe ipc_pipe; +typedef struct ipc_ep ipc_ep; + +// ipc_pipe is one end of an IPC connection. +struct ipc_pipe { + nng_stream * conn; + uint16_t peer; + uint16_t proto; + size_t rcv_max; + bool closed; + nni_sockaddr sa; + ipc_ep * ep; + nni_pipe * pipe; + nni_list_node node; + nni_atomic_flag reaped; + nni_reap_node reap; + uint8_t tx_head[1 + sizeof(uint64_t)]; + uint8_t rx_head[1 + sizeof(uint64_t)]; + size_t got_tx_head; + size_t got_rx_head; + size_t want_tx_head; + size_t want_rx_head; + nni_list recv_q; + nni_list send_q; + nni_aio tx_aio; + nni_aio rx_aio; + nni_aio neg_aio; + nni_msg * rx_msg; + nni_mtx mtx; +}; + +struct ipc_ep { + nni_mtx mtx; + nni_sockaddr sa; + size_t rcv_max; + uint16_t proto; + bool started; + bool closed; + bool fini; + int ref_cnt; + nng_stream_dialer * dialer; + nng_stream_listener *listener; + nni_aio * user_aio; + nni_aio * conn_aio; + nni_aio * time_aio; + nni_list busy_pipes; // busy pipes -- ones passed to socket + nni_list wait_pipes; // pipes waiting to match to socket + nni_list neg_pipes; // pipes busy negotiating + nni_reap_node reap; +#ifdef NNG_ENABLE_STATS + nni_stat_item st_rcv_max; +#endif +}; + +static void ipc_pipe_send_start(ipc_pipe *p); +static void ipc_pipe_recv_start(ipc_pipe *p); +static void ipc_pipe_send_cb(void *); +static void ipc_pipe_recv_cb(void *); +static void ipc_pipe_neg_cb(void *); +static void ipc_pipe_fini(void *); +static void ipc_ep_fini(void *); + +static nni_reap_list ipc_ep_reap_list = { + .rl_offset = offsetof(ipc_ep, reap), + .rl_func = ipc_ep_fini, +}; + +static nni_reap_list ipc_pipe_reap_list = { + .rl_offset = offsetof(ipc_pipe, reap), + .rl_func = ipc_pipe_fini, +}; + +static int +ipc_tran_init(void) +{ + return (0); +} + +static void +ipc_tran_fini(void) +{ +} + +static void +ipc_pipe_close(void *arg) +{ + ipc_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + p->closed = true; + nni_mtx_unlock(&p->mtx); + + nni_aio_close(&p->rx_aio); + nni_aio_close(&p->tx_aio); + nni_aio_close(&p->neg_aio); + + nng_stream_close(p->conn); +} + +static void +ipc_pipe_stop(void *arg) +{ + ipc_pipe *p = arg; + + nni_aio_stop(&p->rx_aio); + nni_aio_stop(&p->tx_aio); + nni_aio_stop(&p->neg_aio); +} + +static int +ipc_pipe_init(void *arg, nni_pipe *pipe) +{ + ipc_pipe *p = arg; + p->pipe = pipe; + return (0); +} + +static void +ipc_pipe_fini(void *arg) +{ + ipc_pipe *p = arg; + ipc_ep * ep; + + ipc_pipe_stop(p); + if ((ep = p->ep) != NULL) { + nni_mtx_lock(&ep->mtx); + nni_list_node_remove(&p->node); + ep->ref_cnt--; + if (ep->fini && (ep->ref_cnt == 0)) { + nni_reap(&ipc_ep_reap_list, ep); + } + nni_mtx_unlock(&ep->mtx); + } + nni_aio_fini(&p->rx_aio); + nni_aio_fini(&p->tx_aio); + nni_aio_fini(&p->neg_aio); + nng_stream_free(p->conn); + if (p->rx_msg) { + nni_msg_free(p->rx_msg); + } + nni_mtx_fini(&p->mtx); + NNI_FREE_STRUCT(p); +} + +static void +ipc_pipe_reap(ipc_pipe *p) +{ + if (!nni_atomic_flag_test_and_set(&p->reaped)) { + if (p->conn != NULL) { + nng_stream_close(p->conn); + } + nni_reap(&ipc_pipe_reap_list, p); + } +} + +static int +ipc_pipe_alloc(ipc_pipe **pipe_p) +{ + ipc_pipe *p; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&p->mtx); + nni_aio_init(&p->tx_aio, ipc_pipe_send_cb, p); + nni_aio_init(&p->rx_aio, ipc_pipe_recv_cb, p); + nni_aio_init(&p->neg_aio, ipc_pipe_neg_cb, p); + nni_aio_list_init(&p->send_q); + nni_aio_list_init(&p->recv_q); + nni_atomic_flag_reset(&p->reaped); + *pipe_p = p; + return (0); +} + +static void +ipc_ep_match(ipc_ep *ep) +{ + nni_aio * aio; + ipc_pipe *p; + + if (((aio = ep->user_aio) == NULL) || + ((p = nni_list_first(&ep->wait_pipes)) == NULL)) { + return; + } + nni_list_remove(&ep->wait_pipes, p); + nni_list_append(&ep->busy_pipes, p); + ep->user_aio = NULL; + p->rcv_max = ep->rcv_max; + nni_aio_set_output(aio, 0, p); + nni_aio_finish(aio, 0, 0); +} + +static void +ipc_pipe_neg_cb(void *arg) +{ + ipc_pipe *p = arg; + ipc_ep * ep = p->ep; + nni_aio * aio = &p->neg_aio; + nni_aio * user_aio; + int rv; + + nni_mtx_lock(&ep->mtx); + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + // We start transmitting before we receive. + if (p->got_tx_head < p->want_tx_head) { + p->got_tx_head += nni_aio_count(aio); + } else if (p->got_rx_head < p->want_rx_head) { + p->got_rx_head += nni_aio_count(aio); + } + if (p->got_tx_head < p->want_tx_head) { + nni_iov iov; + iov.iov_len = p->want_tx_head - p->got_tx_head; + iov.iov_buf = &p->tx_head[p->got_tx_head]; + nni_aio_set_iov(aio, 1, &iov); + // send it down... + nng_stream_send(p->conn, aio); + nni_mtx_unlock(&p->ep->mtx); + return; + } + if (p->got_rx_head < p->want_rx_head) { + nni_iov iov; + iov.iov_len = p->want_rx_head - p->got_rx_head; + iov.iov_buf = &p->rx_head[p->got_rx_head]; + nni_aio_set_iov(aio, 1, &iov); + nng_stream_recv(p->conn, aio); + nni_mtx_unlock(&p->ep->mtx); + return; + } + // We have both sent and received the headers. Lets check the + // receive side header. + if ((p->rx_head[0] != 0) || (p->rx_head[1] != 'S') || + (p->rx_head[2] != 'P') || (p->rx_head[3] != 0) || + (p->rx_head[6] != 0) || (p->rx_head[7] != 0)) { + rv = NNG_EPROTO; + goto error; + } + + NNI_GET16(&p->rx_head[4], p->peer); + + // We are all ready now. We put this in the wait list, and + // then try to run the matcher. + nni_list_remove(&ep->neg_pipes, p); + nni_list_append(&ep->wait_pipes, p); + + ipc_ep_match(ep); + nni_mtx_unlock(&ep->mtx); + return; + +error: + + nng_stream_close(p->conn); + // If we are waiting to negotiate on a client side, then a failure + // here has to be passed to the user app. + if ((user_aio = ep->user_aio) != NULL) { + ep->user_aio = NULL; + nni_aio_finish_error(user_aio, rv); + } + nni_mtx_unlock(&ep->mtx); + ipc_pipe_reap(p); +} + +static void +ipc_pipe_send_cb(void *arg) +{ + ipc_pipe *p = arg; + int rv; + nni_aio * aio; + size_t n; + nni_msg * msg; + nni_aio * tx_aio = &p->tx_aio; + + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_result(tx_aio)) != 0) { + nni_pipe_bump_error(p->pipe, rv); + // Intentionally we do not queue up another transfer. + // There's an excellent chance that the pipe is no longer + // usable, with a partial transfer. + // The protocol should see this error, and close the + // pipe itself, we hope. + + while ((aio = nni_list_first(&p->send_q)) != NULL) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&p->mtx); + return; + } + + n = nni_aio_count(tx_aio); + nni_aio_iov_advance(tx_aio, n); + if (nni_aio_iov_count(tx_aio) != 0) { + nng_stream_send(p->conn, tx_aio); + nni_mtx_unlock(&p->mtx); + return; + } + + aio = nni_list_first(&p->send_q); + nni_aio_list_remove(aio); + ipc_pipe_send_start(p); + + msg = nni_aio_get_msg(aio); + n = nni_msg_len(msg); + nni_pipe_bump_tx(p->pipe, n); + nni_mtx_unlock(&p->mtx); + + nni_aio_set_msg(aio, NULL); + nni_msg_free(msg); + nni_aio_finish_sync(aio, 0, n); +} + +static void +ipc_pipe_recv_cb(void *arg) +{ + ipc_pipe *p = arg; + nni_aio * aio; + int rv; + size_t n; + nni_msg * msg; + nni_aio * rx_aio = &p->rx_aio; + + nni_mtx_lock(&p->mtx); + + if ((rv = nni_aio_result(rx_aio)) != 0) { + // Error on receive. This has to cause an error back + // to the user. Also, if we had allocated an rx_msg, lets + // toss it. + goto error; + } + + n = nni_aio_count(rx_aio); + nni_aio_iov_advance(rx_aio, n); + if (nni_aio_iov_count(rx_aio) != 0) { + // Was this a partial read? If so then resubmit for the rest. + nng_stream_recv(p->conn, rx_aio); + nni_mtx_unlock(&p->mtx); + return; + } + + // If we don't have a message yet, we were reading the message + // header, which is just the length. This tells us the size of the + // message to allocate and how much more to expect. + if (p->rx_msg == NULL) { + uint64_t len; + + // Check to make sure we got msg type 1. + if (p->rx_head[0] != 1) { + rv = NNG_EPROTO; + goto error; + } + + // We should have gotten a message header. + NNI_GET64(p->rx_head + 1, len); + + // Make sure the message payload is not too big. If it is + // the caller will shut down the pipe. + if ((len > p->rcv_max) && (p->rcv_max > 0)) { + rv = NNG_EMSGSIZE; + goto error; + } + + // Note that all IO on this pipe is blocked behind this + // allocation. We could possibly look at using a separate + // lock for the read side in the future, so that we allow + // transmits to proceed normally. In practice this is + // unlikely to be much of an issue though. + if ((rv = nni_msg_alloc(&p->rx_msg, (size_t) len)) != 0) { + goto error; + } + + if (len != 0) { + nni_iov iov; + // Submit the rest of the data for a read -- we want to + // read the entire message now. + iov.iov_buf = nni_msg_body(p->rx_msg); + iov.iov_len = (size_t) len; + + nni_aio_set_iov(rx_aio, 1, &iov); + nng_stream_recv(p->conn, rx_aio); + nni_mtx_unlock(&p->mtx); + return; + } + } + + // Otherwise we got a message read completely. Let the user know the + // good news. + + aio = nni_list_first(&p->recv_q); + nni_aio_list_remove(aio); + msg = p->rx_msg; + p->rx_msg = NULL; + n = nni_msg_len(msg); + nni_pipe_bump_rx(p->pipe, n); + ipc_pipe_recv_start(p); + nni_mtx_unlock(&p->mtx); + + nni_aio_set_msg(aio, msg); + nni_aio_finish_sync(aio, 0, n); + return; + +error: + while ((aio = nni_list_first(&p->recv_q)) != NULL) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, rv); + } + msg = p->rx_msg; + p->rx_msg = NULL; + nni_pipe_bump_error(p->pipe, rv); + // Intentionally, we do not queue up another receive. + // The protocol should notice this error and close the pipe. + nni_mtx_unlock(&p->mtx); + + nni_msg_free(msg); +} + +static void +ipc_pipe_send_cancel(nni_aio *aio, void *arg, int rv) +{ + ipc_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + if (!nni_aio_list_active(aio)) { + nni_mtx_unlock(&p->mtx); + return; + } + // If this is being sent, then cancel the pending transfer. + // The callback on the tx_aio will cause the user aio to + // be canceled too. + if (nni_list_first(&p->send_q) == aio) { + nni_aio_abort(&p->tx_aio, rv); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + + nni_aio_finish_error(aio, rv); +} + +static void +ipc_pipe_send_start(ipc_pipe *p) +{ + nni_aio *aio; + nni_msg *msg; + int nio; + nni_iov iov[3]; + uint64_t len; + + if (p->closed) { + while ((aio = nni_list_first(&p->send_q)) != NULL) { + nni_list_remove(&p->send_q, aio); + nni_aio_finish_error(aio, NNG_ECLOSED); + } + return; + } + if ((aio = nni_list_first(&p->send_q)) == NULL) { + return; + } + + // This runs to send the message. + msg = nni_aio_get_msg(aio); + len = nni_msg_len(msg) + nni_msg_header_len(msg); + + p->tx_head[0] = 1; // message type, 1. + NNI_PUT64(p->tx_head + 1, len); + + nio = 0; + iov[0].iov_buf = p->tx_head; + iov[0].iov_len = sizeof(p->tx_head); + nio++; + if (nni_msg_header_len(msg) > 0) { + iov[nio].iov_buf = nni_msg_header(msg); + iov[nio].iov_len = nni_msg_header_len(msg); + nio++; + } + if (nni_msg_len(msg) > 0) { + iov[nio].iov_buf = nni_msg_body(msg); + iov[nio].iov_len = nni_msg_len(msg); + nio++; + } + nni_aio_set_iov(&p->tx_aio, nio, iov); + nng_stream_send(p->conn, &p->tx_aio); +} + +static void +ipc_pipe_send(void *arg, nni_aio *aio) +{ + ipc_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, ipc_pipe_send_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + nni_list_append(&p->send_q, aio); + if (nni_list_first(&p->send_q) == aio) { + ipc_pipe_send_start(p); + } + nni_mtx_unlock(&p->mtx); +} + +static void +ipc_pipe_recv_cancel(nni_aio *aio, void *arg, int rv) +{ + ipc_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + if (!nni_aio_list_active(aio)) { + nni_mtx_unlock(&p->mtx); + return; + } + // If receive in progress, then cancel the pending transfer. + // The callback on the rx_aio will cause the user aio to + // be canceled too. + if (nni_list_first(&p->recv_q) == aio) { + nni_aio_abort(&p->rx_aio, rv); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); +} + +static void +ipc_pipe_recv_start(ipc_pipe *p) +{ + nni_iov iov; + NNI_ASSERT(p->rx_msg == NULL); + + if (p->closed) { + nni_aio *aio; + while ((aio = nni_list_first(&p->recv_q)) != NULL) { + nni_list_remove(&p->recv_q, aio); + nni_aio_finish_error(aio, NNG_ECLOSED); + } + return; + } + if (nni_list_empty(&p->recv_q)) { + return; + } + + // Schedule a read of the IPC header. + iov.iov_buf = p->rx_head; + iov.iov_len = sizeof(p->rx_head); + nni_aio_set_iov(&p->rx_aio, 1, &iov); + + nng_stream_recv(p->conn, &p->rx_aio); +} + +static void +ipc_pipe_recv(void *arg, nni_aio *aio) +{ + ipc_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if (p->closed) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if ((rv = nni_aio_schedule(aio, ipc_pipe_recv_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + + nni_list_append(&p->recv_q, aio); + if (nni_list_first(&p->recv_q) == aio) { + ipc_pipe_recv_start(p); + } + nni_mtx_unlock(&p->mtx); +} + +static uint16_t +ipc_pipe_peer(void *arg) +{ + ipc_pipe *p = arg; + + return (p->peer); +} + +static void +ipc_pipe_start(ipc_pipe *p, nng_stream *conn, ipc_ep *ep) +{ + nni_iov iov; + + ep->ref_cnt++; + + p->conn = conn; + p->ep = ep; + p->proto = ep->proto; + + p->tx_head[0] = 0; + p->tx_head[1] = 'S'; + p->tx_head[2] = 'P'; + p->tx_head[3] = 0; + NNI_PUT16(&p->tx_head[4], p->proto); + NNI_PUT16(&p->tx_head[6], 0); + + p->got_rx_head = 0; + p->got_tx_head = 0; + p->want_rx_head = 8; + p->want_tx_head = 8; + iov.iov_len = 8; + iov.iov_buf = &p->tx_head[0]; + nni_aio_set_iov(&p->neg_aio, 1, &iov); + nni_list_append(&ep->neg_pipes, p); + + nni_aio_set_timeout(&p->neg_aio, 10000); // 10 sec timeout to negotiate + nng_stream_send(p->conn, &p->neg_aio); +} + +static void +ipc_ep_close(void *arg) +{ + ipc_ep * ep = arg; + ipc_pipe *p; + + nni_mtx_lock(&ep->mtx); + ep->closed = true; + nni_aio_close(ep->time_aio); + if (ep->dialer != NULL) { + nng_stream_dialer_close(ep->dialer); + } + if (ep->listener != NULL) { + nng_stream_listener_close(ep->listener); + } + NNI_LIST_FOREACH (&ep->neg_pipes, p) { + ipc_pipe_close(p); + } + NNI_LIST_FOREACH (&ep->wait_pipes, p) { + ipc_pipe_close(p); + } + NNI_LIST_FOREACH (&ep->busy_pipes, p) { + ipc_pipe_close(p); + } + if (ep->user_aio != NULL) { + nni_aio_finish_error(ep->user_aio, NNG_ECLOSED); + ep->user_aio = NULL; + } + nni_mtx_unlock(&ep->mtx); +} + +static void +ipc_ep_fini(void *arg) +{ + ipc_ep *ep = arg; + + nni_mtx_lock(&ep->mtx); + ep->fini = true; + if (ep->ref_cnt != 0) { + nni_mtx_unlock(&ep->mtx); + return; + } + nni_mtx_unlock(&ep->mtx); + nni_aio_stop(ep->time_aio); + nni_aio_stop(ep->conn_aio); + nng_stream_dialer_free(ep->dialer); + nng_stream_listener_free(ep->listener); + nni_aio_free(ep->time_aio); + nni_aio_free(ep->conn_aio); + nni_mtx_fini(&ep->mtx); + NNI_FREE_STRUCT(ep); +} + +static void +ipc_ep_timer_cb(void *arg) +{ + ipc_ep *ep = arg; + nni_mtx_lock(&ep->mtx); + if (nni_aio_result(ep->time_aio) == 0) { + nng_stream_listener_accept(ep->listener, ep->conn_aio); + } + nni_mtx_unlock(&ep->mtx); +} + +static void +ipc_ep_accept_cb(void *arg) +{ + ipc_ep * ep = arg; + nni_aio * aio = ep->conn_aio; + ipc_pipe * p; + int rv; + nng_stream *conn; + + nni_mtx_lock(&ep->mtx); + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + conn = nni_aio_get_output(aio, 0); + if ((rv = ipc_pipe_alloc(&p)) != 0) { + nng_stream_free(conn); + goto error; + } + if (ep->closed) { + ipc_pipe_fini(p); + nng_stream_free(conn); + rv = NNG_ECLOSED; + goto error; + } + ipc_pipe_start(p, conn, ep); + nng_stream_listener_accept(ep->listener, ep->conn_aio); + nni_mtx_unlock(&ep->mtx); + return; + +error: + // When an error here occurs, let's send a notice up to the consumer. + // That way it can be reported properly. + if ((aio = ep->user_aio) != NULL) { + ep->user_aio = NULL; + nni_aio_finish_error(aio, rv); + } + + switch (rv) { + + case NNG_ENOMEM: + case NNG_ENOFILES: + nng_sleep_aio(10, ep->time_aio); + break; + + default: + if (!ep->closed) { + nng_stream_listener_accept(ep->listener, ep->conn_aio); + } + break; + } + nni_mtx_unlock(&ep->mtx); +} + +static void +ipc_ep_dial_cb(void *arg) +{ + ipc_ep * ep = arg; + nni_aio * aio = ep->conn_aio; + ipc_pipe * p; + int rv; + nng_stream *conn; + + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + conn = nni_aio_get_output(aio, 0); + if ((rv = ipc_pipe_alloc(&p)) != 0) { + nng_stream_free(conn); + goto error; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + ipc_pipe_fini(p); + nng_stream_free(conn); + rv = NNG_ECLOSED; + nni_mtx_unlock(&ep->mtx); + goto error; + } else { + ipc_pipe_start(p, conn, ep); + } + nni_mtx_unlock(&ep->mtx); + return; + +error: + // Error connecting. We need to pass this straight back + // to the user. + nni_mtx_lock(&ep->mtx); + if ((aio = ep->user_aio) != NULL) { + ep->user_aio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&ep->mtx); +} + +static int +ipc_ep_init(ipc_ep **epp, nni_sock *sock) +{ + ipc_ep *ep; + + if ((ep = NNI_ALLOC_STRUCT(ep)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&ep->mtx); + NNI_LIST_INIT(&ep->busy_pipes, ipc_pipe, node); + NNI_LIST_INIT(&ep->wait_pipes, ipc_pipe, node); + NNI_LIST_INIT(&ep->neg_pipes, ipc_pipe, node); + + ep->proto = nni_sock_proto_id(sock); + +#ifdef NNG_ENABLE_STATS + static const nni_stat_info rcv_max_info = { + .si_name = "rcv_max", + .si_desc = "maximum receive size", + .si_type = NNG_STAT_LEVEL, + .si_unit = NNG_UNIT_BYTES, + .si_atomic = true, + }; + nni_stat_init(&ep->st_rcv_max, &rcv_max_info); +#endif + + *epp = ep; + return (0); +} + +static int +ipc_ep_init_dialer(void **dp, nni_url *url, nni_dialer *dialer) +{ + ipc_ep * ep; + int rv; + nni_sock *sock = nni_dialer_sock(dialer); + + if ((rv = ipc_ep_init(&ep, sock)) != 0) { + return (rv); + } + + if (((rv = nni_aio_alloc(&ep->conn_aio, ipc_ep_dial_cb, ep)) != 0) || + ((rv = nng_stream_dialer_alloc_url(&ep->dialer, url)) != 0)) { + ipc_ep_fini(ep); + return (rv); + } +#ifdef NNG_ENABLE_STATS + nni_dialer_add_stat(dialer, &ep->st_rcv_max); +#endif + *dp = ep; + return (0); +} + +static int +ipc_ep_init_listener(void **dp, nni_url *url, nni_listener *listener) +{ + ipc_ep * ep; + int rv; + nni_sock *sock = nni_listener_sock(listener); + + if ((rv = ipc_ep_init(&ep, sock)) != 0) { + return (rv); + } + + if (((rv = nni_aio_alloc(&ep->conn_aio, ipc_ep_accept_cb, ep)) != 0) || + ((rv = nni_aio_alloc(&ep->time_aio, ipc_ep_timer_cb, ep)) != 0) || + ((rv = nng_stream_listener_alloc_url(&ep->listener, url)) != 0)) { + ipc_ep_fini(ep); + return (rv); + } + +#ifdef NNG_ENABLE_STATS + nni_listener_add_stat(listener, &ep->st_rcv_max); +#endif + *dp = ep; + return (0); +} + +static void +ipc_ep_cancel(nni_aio *aio, void *arg, int rv) +{ + ipc_ep *ep = arg; + nni_mtx_lock(&ep->mtx); + if (aio == ep->user_aio) { + ep->user_aio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&ep->mtx); +} + +static void +ipc_ep_connect(void *arg, nni_aio *aio) +{ + ipc_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if (ep->user_aio != NULL) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_EBUSY); + return; + } + + if ((rv = nni_aio_schedule(aio, ipc_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, rv); + return; + } + ep->user_aio = aio; + nng_stream_dialer_dial(ep->dialer, ep->conn_aio); + nni_mtx_unlock(&ep->mtx); +} + +static int +ipc_ep_get_recv_max_sz(void *arg, void *v, size_t *szp, nni_type t) +{ + ipc_ep *ep = arg; + int rv; + nni_mtx_lock(&ep->mtx); + rv = nni_copyout_size(ep->rcv_max, v, szp, t); + nni_mtx_unlock(&ep->mtx); + return (rv); +} + +static int +ipc_ep_set_recv_max_sz(void *arg, const void *v, size_t sz, nni_type t) +{ + ipc_ep *ep = arg; + size_t val; + int rv; + if ((rv = nni_copyin_size(&val, v, sz, 0, NNI_MAXSZ, t)) == 0) { + + ipc_pipe *p; + nni_mtx_lock(&ep->mtx); + ep->rcv_max = val; + NNI_LIST_FOREACH (&ep->wait_pipes, p) { + p->rcv_max = val; + } + NNI_LIST_FOREACH (&ep->neg_pipes, p) { + p->rcv_max = val; + } + NNI_LIST_FOREACH (&ep->busy_pipes, p) { + p->rcv_max = val; + } + nni_mtx_unlock(&ep->mtx); +#ifdef NNG_ENABLE_STATS + nni_stat_set_value(&ep->st_rcv_max, val); +#endif + } + return (rv); +} + +static int +ipc_ep_bind(void *arg) +{ + ipc_ep *ep = arg; + int rv; + + nni_mtx_lock(&ep->mtx); + rv = nng_stream_listener_listen(ep->listener); + nni_mtx_unlock(&ep->mtx); + return (rv); +} + +static void +ipc_ep_accept(void *arg, nni_aio *aio) +{ + ipc_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + nni_aio_finish_error(aio, NNG_ECLOSED); + nni_mtx_unlock(&ep->mtx); + return; + } + if (ep->user_aio != NULL) { + nni_aio_finish_error(aio, NNG_EBUSY); + nni_mtx_unlock(&ep->mtx); + return; + } + if ((rv = nni_aio_schedule(aio, ipc_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, rv); + return; + } + ep->user_aio = aio; + if (!ep->started) { + ep->started = true; + nng_stream_listener_accept(ep->listener, ep->conn_aio); + } else { + ipc_ep_match(ep); + } + + nni_mtx_unlock(&ep->mtx); +} + +static int +ipc_pipe_get(void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + ipc_pipe *p = arg; + + return (nni_stream_get(p->conn, name, buf, szp, t)); +} + +static nni_tran_pipe_ops ipc_tran_pipe_ops = { + .p_init = ipc_pipe_init, + .p_fini = ipc_pipe_fini, + .p_stop = ipc_pipe_stop, + .p_send = ipc_pipe_send, + .p_recv = ipc_pipe_recv, + .p_close = ipc_pipe_close, + .p_peer = ipc_pipe_peer, + .p_getopt = ipc_pipe_get, +}; + +static const nni_option ipc_ep_options[] = { + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = ipc_ep_get_recv_max_sz, + .o_set = ipc_ep_set_recv_max_sz, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static int +ipc_dialer_get(void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + ipc_ep *ep = arg; + int rv; + + rv = nni_getopt(ipc_ep_options, name, ep, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_stream_dialer_get(ep->dialer, name, buf, szp, t); + } + return (rv); +} + +static int +ipc_dialer_set( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + ipc_ep *ep = arg; + int rv; + + rv = nni_setopt(ipc_ep_options, name, ep, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_stream_dialer_set(ep->dialer, name, buf, sz, t); + } + return (rv); +} + +static int +ipc_listener_get( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + ipc_ep *ep = arg; + int rv; + + rv = nni_getopt(ipc_ep_options, name, ep, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_stream_listener_get(ep->listener, name, buf, szp, t); + } + return (rv); +} + +static int +ipc_listener_set( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + ipc_ep *ep = arg; + int rv; + + rv = nni_setopt(ipc_ep_options, name, ep, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_stream_listener_set(ep->listener, name, buf, sz, t); + } + return (rv); +} + +static nni_tran_dialer_ops ipc_dialer_ops = { + .d_init = ipc_ep_init_dialer, + .d_fini = ipc_ep_fini, + .d_connect = ipc_ep_connect, + .d_close = ipc_ep_close, + .d_getopt = ipc_dialer_get, + .d_setopt = ipc_dialer_set, +}; + +static nni_tran_listener_ops ipc_listener_ops = { + .l_init = ipc_ep_init_listener, + .l_fini = ipc_ep_fini, + .l_bind = ipc_ep_bind, + .l_accept = ipc_ep_accept, + .l_close = ipc_ep_close, + .l_getopt = ipc_listener_get, + .l_setopt = ipc_listener_set, +}; + +static nni_tran ipc_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "ipc", + .tran_dialer = &ipc_dialer_ops, + .tran_listener = &ipc_listener_ops, + .tran_pipe = &ipc_tran_pipe_ops, + .tran_init = ipc_tran_init, + .tran_fini = ipc_tran_fini, +}; + +#ifdef NNG_PLATFORM_POSIX +static nni_tran ipc_tran_unix = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "unix", + .tran_dialer = &ipc_dialer_ops, + .tran_listener = &ipc_listener_ops, + .tran_pipe = &ipc_tran_pipe_ops, + .tran_init = ipc_tran_init, + .tran_fini = ipc_tran_fini, +}; +#endif + +#ifdef NNG_HAVE_ABSTRACT_SOCKETS +static nni_tran ipc_tran_abstract = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "abstract", + .tran_dialer = &ipc_dialer_ops, + .tran_listener = &ipc_listener_ops, + .tran_pipe = &ipc_tran_pipe_ops, + .tran_init = ipc_tran_init, + .tran_fini = ipc_tran_fini, +}; +#endif + +int +nng_ipc_register(void) +{ + int rv; + if (((rv = nni_tran_register(&ipc_tran)) != 0) +#ifdef NNG_PLATFORM_POSIX + || ((rv = nni_tran_register(&ipc_tran_unix)) != 0) +#endif +#ifdef NNG_HAVE_ABSTRACT_SOCKETS + || ((rv = nni_tran_register(&ipc_tran_abstract)) != 0) +#endif + ) { + return (rv); + } + + return (0); +} diff --git a/src/sp/transport/ipc/ipc_test.c b/src/sp/transport/ipc/ipc_test.c new file mode 100644 index 00000000..2fb4afa3 --- /dev/null +++ b/src/sp/transport/ipc/ipc_test.c @@ -0,0 +1,395 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Cody Piersall <cody.piersall@gmail.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <nuts.h> + +#ifdef NNG_PLATFORM_POSIX +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#endif + +void +test_path_too_long(void) +{ + nng_socket s1; + char addr[256]; + + // All our names have to be less than 128 bytes. + memset(addr, 'a', 255); + addr[255] = 0; + memcpy(addr, "ipc://", strlen("ipc://")); + + NUTS_ASSERT(strlen(addr) == 255); + NUTS_OPEN(s1); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_RECVTIMEO, 1000)); + NUTS_FAIL(nng_listen(s1, addr, NULL, 0), NNG_EADDRINVAL); + NUTS_FAIL(nng_dial(s1, addr, NULL, NNG_FLAG_NONBLOCK), NNG_EADDRINVAL); + + NUTS_CLOSE(s1); +} + +void +test_ipc_dialer_perms(void) +{ + nng_socket s; + nng_dialer d; + char * addr; + + NUTS_ADDR(addr, "ipc"); + NUTS_OPEN(s); + NUTS_PASS(nng_dialer_create(&d, s, addr)); + NUTS_FAIL( + nng_dialer_set_int(d, NNG_OPT_IPC_PERMISSIONS, 0444), NNG_ENOTSUP); + NUTS_CLOSE(s); +} + +void +test_ipc_dialer_properties(void) +{ + nng_socket s; + nng_dialer d; + nng_sockaddr sa; + size_t z; + char *addr; + + NUTS_ADDR(addr, "ipc"); + NUTS_OPEN(s); + NUTS_PASS(nng_dial(s, addr, &d, NNG_FLAG_NONBLOCK)); + // Dialers don't have local addresses. + NUTS_FAIL(nng_dialer_get_addr(d, NNG_OPT_LOCADDR, &sa), NNG_ENOTSUP); + + NUTS_FAIL( + nng_dialer_set(d, NNG_OPT_LOCADDR, &sa, sizeof(sa)), NNG_ENOTSUP); + + z = 8192; + NUTS_PASS(nng_dialer_set_size(d, NNG_OPT_RECVMAXSZ, z)); + z = 0; + NUTS_PASS(nng_dialer_get_size(d, NNG_OPT_RECVMAXSZ, &z)); + NUTS_TRUE(z == 8192); + NUTS_FAIL(nng_dialer_set_bool(d, NNG_OPT_RAW, true), NNG_ENOTSUP); + NUTS_CLOSE(s); +} + +void +test_ipc_listener_perms(void) +{ + nng_socket s; + nng_listener l; + char *addr; + +#ifndef _WIN32 + char * path; + struct stat st; +#endif + + NUTS_ADDR(addr, "ipc"); + NUTS_OPEN(s); + NUTS_PASS(nng_listener_create(&l, s, addr)); + +#ifdef _WIN32 + NUTS_FAIL(nng_listener_set_int(l, NNG_OPT_IPC_PERMISSIONS, 0444), + NNG_ENOTSUP); +#else + path = &addr[strlen("ipc://")]; + + // Attempt to set invalid permissions fails. + NUTS_FAIL(nng_listener_set_int(l, NNG_OPT_IPC_PERMISSIONS, S_IFREG), + NNG_EINVAL); + + NUTS_PASS(nng_listener_set_int(l, NNG_OPT_IPC_PERMISSIONS, 0444)); + NUTS_PASS(nng_listener_start(l, 0)); + NUTS_TRUE(stat(path, &st) == 0); + NUTS_TRUE((st.st_mode & 0777) == 0444); + + // Now that it's running, we cannot set it. + NUTS_FAIL( + nng_listener_set_int(l, NNG_OPT_IPC_PERMISSIONS, 0644), NNG_EBUSY); +#endif + + NUTS_CLOSE(s); +} + +void +test_ipc_listener_properties(void) +{ + nng_socket s; + nng_listener l; + nng_sockaddr sa; + size_t z; + char *addr; + + NUTS_ADDR(addr, "ipc"); + NUTS_OPEN(s); + NUTS_PASS(nng_listen(s, addr, &l, 0)); + NUTS_PASS(nng_listener_get_addr(l, NNG_OPT_LOCADDR, &sa)); + NUTS_TRUE(sa.s_ipc.sa_family == NNG_AF_IPC); + NUTS_MATCH(sa.s_ipc.sa_path, addr + strlen("ipc://")); + + NUTS_FAIL(nng_listener_set(l, NNG_OPT_LOCADDR, &sa, sizeof(sa)), + NNG_EREADONLY); + z = 8192; + NUTS_PASS(nng_listener_set_size(l, NNG_OPT_RECVMAXSZ, z)); + z = 0; + NUTS_PASS(nng_listener_get_size(l, NNG_OPT_RECVMAXSZ, &z)); + NUTS_TRUE(z == 8192); + NUTS_FAIL(nng_listener_set_bool(l, NNG_OPT_RAW, true), NNG_ENOTSUP); + NUTS_CLOSE(s); +} + +void +test_ipc_recv_max(void) +{ + char msg[256]; + char rcvbuf[256]; + nng_socket s0; + nng_socket s1; + nng_listener l; + size_t sz; + char *addr; + + NUTS_ADDR(addr, "ipc"); + NUTS_OPEN(s0); + NUTS_PASS(nng_socket_set_ms(s0, NNG_OPT_RECVTIMEO, 100)); + NUTS_PASS(nng_socket_set_size(s0, NNG_OPT_RECVMAXSZ, 200)); + NUTS_PASS(nng_listener_create(&l, s0, addr)); + NUTS_PASS(nng_socket_get_size(s0, NNG_OPT_RECVMAXSZ, &sz)); + NUTS_TRUE(sz == 200); + NUTS_PASS(nng_listener_set_size(l, NNG_OPT_RECVMAXSZ, 100)); + NUTS_PASS(nng_listener_start(l, 0)); + + NUTS_OPEN(s1); + NUTS_PASS(nng_dial(s1, addr, NULL, 0)); + NUTS_PASS(nng_send(s1, msg, 95, 0)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 100)); + NUTS_PASS(nng_recv(s0, rcvbuf, &sz, 0)); + NUTS_TRUE(sz == 95); + NUTS_PASS(nng_send(s1, msg, 150, 0)); + NUTS_FAIL(nng_recv(s0, rcvbuf, &sz, 0), NNG_ETIMEDOUT); + NUTS_CLOSE(s0); + NUTS_CLOSE(s1); +} + +void +test_abstract_sockets(void) +{ +#ifdef NNG_HAVE_ABSTRACT_SOCKETS + nng_socket s1; + nng_socket s2; + char *addr; + nng_pipe p1; + nng_pipe p2; + nng_sockaddr sa1; + nng_sockaddr sa2; + char * prefix = "abstract://"; + + NUTS_ADDR(addr, "abstract"); + NUTS_OPEN(s1); + NUTS_OPEN(s2); + NUTS_MARRY_EX(s1, s2, addr, &p1, &p2); + NUTS_PASS(nng_pipe_get_addr(p1, NNG_OPT_REMADDR, &sa1)); + NUTS_PASS(nng_pipe_get_addr(p2, NNG_OPT_LOCADDR, &sa2)); + NUTS_TRUE(sa1.s_family == sa2.s_family); + NUTS_TRUE(sa1.s_family == NNG_AF_ABSTRACT); + NUTS_TRUE(sa1.s_abstract.sa_len == strlen(addr) - strlen(prefix)); + NUTS_TRUE(sa2.s_abstract.sa_len == strlen(addr) - strlen(prefix)); + NUTS_SEND(s1, "ping"); + NUTS_RECV(s2, "ping"); + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +#endif +} + +void +test_abstract_auto_bind(void) +{ +#ifdef NNG_HAVE_ABSTRACT_SOCKETS + nng_socket s1; + nng_socket s2; + char addr[40]; + char name[12]; + nng_sockaddr sa; + nng_listener l; + size_t len; + + snprintf(addr, sizeof(addr), "abstract://"); + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s2, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_RECVTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s2, NNG_OPT_RECVTIMEO, 1000)); + NUTS_PASS(nng_listen(s1, addr, &l, 0)); + + NUTS_PASS(nng_listener_get_addr(l, NNG_OPT_LOCADDR, &sa)); + // Under linux there are either 8 or 5 hex characters. + NUTS_TRUE(sa.s_family == NNG_AF_ABSTRACT); + NUTS_TRUE(sa.s_abstract.sa_len < 10); + + len = sa.s_abstract.sa_len; + memcpy(name, sa.s_abstract.sa_name, len); + name[len] = '\0'; + NUTS_TRUE(strlen(name) == len); + + (void) snprintf(addr, sizeof(addr), "abstract://%s", name); + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + + // first send the ping + NUTS_SEND(s1, "ping"); + NUTS_RECV(s2, "ping"); + + NUTS_SEND(s2, "pong"); + NUTS_RECV(s1, "pong"); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +#endif +} + +void +test_abstract_too_long(void) +{ +#ifdef NNG_HAVE_ABSTRACT_SOCKETS + nng_socket s1; + char addr[256]; + + // All our names have to be less than 128 bytes. + memset(addr, 'a', 255); + addr[255] = 0; + memcpy(addr, "abstract://", strlen("abstract://")); + + NUTS_ASSERT(strlen(addr) == 255); + NUTS_OPEN(s1); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_RECVTIMEO, 1000)); + NUTS_FAIL(nng_listen(s1, addr, NULL, 0), NNG_EADDRINVAL); + NUTS_FAIL(nng_dial(s1, addr, NULL, NNG_FLAG_NONBLOCK), NNG_EADDRINVAL); + + NUTS_CLOSE(s1); +#endif +} + +void +test_abstract_null(void) +{ +#ifdef NNG_HAVE_ABSTRACT_SOCKETS + nng_socket s1; + nng_socket s2; + char addr[64]; + char name[40]; + char rng[20]; + + nng_sockaddr sa; + nng_listener l; + size_t len; + + snprintf(rng, sizeof(rng), "%08x%08x", nng_random(), nng_random()); + snprintf(name, sizeof(name), "a%%00b_%s", rng); + snprintf(addr, sizeof(addr), "abstract://%s", name); + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s2, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_RECVTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s2, NNG_OPT_RECVTIMEO, 1000)); + NUTS_PASS(nng_listen(s1, addr, &l, 0)); + + NUTS_PASS(nng_listener_get_addr(l, NNG_OPT_LOCADDR, &sa)); + // Under linux there are either 8 or 5 hex characters. + NUTS_TRUE(sa.s_family == NNG_AF_ABSTRACT); + NUTS_TRUE(sa.s_abstract.sa_len < 32); + len = sa.s_abstract.sa_len; + NUTS_TRUE(len == 20); + NUTS_TRUE(sa.s_abstract.sa_name[0] == 'a'); + NUTS_TRUE(sa.s_abstract.sa_name[1] == '\0'); + NUTS_TRUE(sa.s_abstract.sa_name[2] == 'b'); + NUTS_TRUE(sa.s_abstract.sa_name[3] == '_'); + NUTS_TRUE(memcmp(&sa.s_abstract.sa_name[4], rng, 16) == 0); + + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + + // first send the ping + NUTS_SEND(s1, "1234"); + NUTS_RECV(s2, "1234"); + + NUTS_SEND(s2, "5678"); + NUTS_RECV(s1, "5678"); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +#endif +} + +void +test_unix_alias(void) +{ +#ifdef NNG_PLATFORM_POSIX + nng_socket s1; + nng_socket s2; + char addr1[32]; + char addr2[32]; + char rng[20]; + nng_sockaddr sa1; + nng_sockaddr sa2; + nng_msg * msg; + nng_pipe p; + + // Presumes /tmp. + + (void) snprintf( + rng, sizeof(rng), "%08x%08x", nng_random(), nng_random()); + snprintf(addr1, sizeof(addr1), "ipc:///tmp/%s", rng); + snprintf(addr2, sizeof(addr2), "unix:///tmp/%s", rng); + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s2, NNG_OPT_SENDTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_RECVTIMEO, 1000)); + NUTS_PASS(nng_socket_set_ms(s2, NNG_OPT_RECVTIMEO, 1000)); + NUTS_PASS(nng_listen(s1, addr1, NULL, 0)); + NUTS_PASS(nng_dial(s2, addr2, NULL, 0)); + + // first send the ping + NUTS_SEND(s1, "ping"); + NUTS_PASS(nng_recvmsg(s2, &msg, 0)); + NUTS_ASSERT(msg != NULL); + NUTS_TRUE(nng_msg_len(msg) == 5); + NUTS_MATCH(nng_msg_body(msg), "ping"); + p = nng_msg_get_pipe(msg); + NUTS_PASS(nng_pipe_get_addr(p, NNG_OPT_REMADDR, &sa1)); + NUTS_PASS(nng_pipe_get_addr(p, NNG_OPT_REMADDR, &sa2)); + NUTS_TRUE(sa1.s_family == sa2.s_family); + NUTS_TRUE(sa1.s_family == NNG_AF_IPC); + NUTS_MATCH(sa1.s_ipc.sa_path, sa2.s_ipc.sa_path); + nng_msg_free(msg); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +#endif +} + +TEST_LIST = { + { "ipc path too long", test_path_too_long }, + { "ipc dialer perms", test_ipc_dialer_perms }, + { "ipc dialer props", test_ipc_dialer_properties }, + { "ipc listener perms", test_ipc_listener_perms }, + { "ipc listener props", test_ipc_listener_properties }, + { "ipc recv max", test_ipc_recv_max }, + { "ipc abstract sockets", test_abstract_sockets }, + { "ipc abstract auto bind", test_abstract_auto_bind }, + { "ipc abstract name too long", test_abstract_too_long }, + { "ipc abstract embedded null", test_abstract_null }, + { "ipc unix alias", test_unix_alias }, + { NULL, NULL }, +};
\ No newline at end of file diff --git a/src/sp/transport/tcp/CMakeLists.txt b/src/sp/transport/tcp/CMakeLists.txt new file mode 100644 index 00000000..d6022329 --- /dev/null +++ b/src/sp/transport/tcp/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +# Copyright 2018 Capitar IT Group BV <info@capitar.com> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# TCP protocol +nng_directory(tcp) + +nng_sources_if(NNG_TRANSPORT_TCP tcp.c) +nng_headers_if(NNG_TRANSPORT_TCP nng/transport/tcp/tcp.h) +nng_defines_if(NNG_TRANSPORT_TCP NNG_TRANSPORT_TCP) +nng_test(tcp_test)
\ No newline at end of file diff --git a/src/sp/transport/tcp/tcp.c b/src/sp/transport/tcp/tcp.c new file mode 100644 index 00000000..524c6988 --- /dev/null +++ b/src/sp/transport/tcp/tcp.c @@ -0,0 +1,1263 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// Copyright 2019 Devolutions <info@devolutions.net> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/nng_impl.h" + +// TCP transport. Platform specific TCP operations must be +// supplied as well. + +typedef struct tcptran_pipe tcptran_pipe; +typedef struct tcptran_ep tcptran_ep; + +// tcp_pipe is one end of a TCP connection. +struct tcptran_pipe { + nng_stream * conn; + nni_pipe * npipe; + uint16_t peer; + uint16_t proto; + size_t rcvmax; + bool closed; + nni_list_node node; + tcptran_ep * ep; + nni_atomic_flag reaped; + nni_reap_node reap; + uint8_t txlen[sizeof(uint64_t)]; + uint8_t rxlen[sizeof(uint64_t)]; + size_t gottxhead; + size_t gotrxhead; + size_t wanttxhead; + size_t wantrxhead; + nni_list recvq; + nni_list sendq; + nni_aio * txaio; + nni_aio * rxaio; + nni_aio * negoaio; + nni_msg * rxmsg; + nni_mtx mtx; +}; + +struct tcptran_ep { + nni_mtx mtx; + uint16_t proto; + size_t rcvmax; + bool fini; + bool started; + bool closed; + nng_url * url; + const char * host; // for dialers + nng_sockaddr src; + int refcnt; // active pipes + nni_aio * useraio; + nni_aio * connaio; + nni_aio * timeaio; + nni_list busypipes; // busy pipes -- ones passed to socket + nni_list waitpipes; // pipes waiting to match to socket + nni_list negopipes; // pipes busy negotiating + nni_reap_node reap; + nng_stream_dialer * dialer; + nng_stream_listener *listener; + +#ifdef NNG_ENABLE_STATS + nni_stat_item st_rcv_max; +#endif +}; + +static void tcptran_pipe_send_start(tcptran_pipe *); +static void tcptran_pipe_recv_start(tcptran_pipe *); +static void tcptran_pipe_send_cb(void *); +static void tcptran_pipe_recv_cb(void *); +static void tcptran_pipe_nego_cb(void *); +static void tcptran_ep_fini(void *); +static void tcptran_pipe_fini(void *); + +static nni_reap_list tcptran_ep_reap_list = { + .rl_offset = offsetof(tcptran_ep, reap), + .rl_func = tcptran_ep_fini, +}; + +static nni_reap_list tcptran_pipe_reap_list = { + .rl_offset = offsetof (tcptran_pipe, reap), + .rl_func = tcptran_pipe_fini, +}; + +static int +tcptran_init(void) +{ + return (0); +} + +static void +tcptran_fini(void) +{ +} + +static void +tcptran_pipe_close(void *arg) +{ + tcptran_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + p->closed = true; + nni_mtx_unlock(&p->mtx); + + nni_aio_close(p->rxaio); + nni_aio_close(p->txaio); + nni_aio_close(p->negoaio); + + nng_stream_close(p->conn); +} + +static void +tcptran_pipe_stop(void *arg) +{ + tcptran_pipe *p = arg; + + nni_aio_stop(p->rxaio); + nni_aio_stop(p->txaio); + nni_aio_stop(p->negoaio); +} + +static int +tcptran_pipe_init(void *arg, nni_pipe *npipe) +{ + tcptran_pipe *p = arg; + p->npipe = npipe; + + return (0); +} + +static void +tcptran_pipe_fini(void *arg) +{ + tcptran_pipe *p = arg; + tcptran_ep * ep; + + tcptran_pipe_stop(p); + if ((ep = p->ep) != NULL) { + nni_mtx_lock(&ep->mtx); + nni_list_node_remove(&p->node); + ep->refcnt--; + if (ep->fini && (ep->refcnt == 0)) { + nni_reap(&tcptran_ep_reap_list, ep); + } + nni_mtx_unlock(&ep->mtx); + } + + nni_aio_free(p->rxaio); + nni_aio_free(p->txaio); + nni_aio_free(p->negoaio); + nng_stream_free(p->conn); + nni_msg_free(p->rxmsg); + nni_mtx_fini(&p->mtx); + NNI_FREE_STRUCT(p); +} + +static void +tcptran_pipe_reap(tcptran_pipe *p) +{ + if (!nni_atomic_flag_test_and_set(&p->reaped)) { + if (p->conn != NULL) { + nng_stream_close(p->conn); + } + nni_reap(&tcptran_pipe_reap_list, p); + } +} + +static int +tcptran_pipe_alloc(tcptran_pipe **pipep) +{ + tcptran_pipe *p; + int rv; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&p->mtx); + if (((rv = nni_aio_alloc(&p->txaio, tcptran_pipe_send_cb, p)) != 0) || + ((rv = nni_aio_alloc(&p->rxaio, tcptran_pipe_recv_cb, p)) != 0) || + ((rv = nni_aio_alloc(&p->negoaio, tcptran_pipe_nego_cb, p)) != + 0)) { + tcptran_pipe_fini(p); + return (rv); + } + nni_aio_list_init(&p->recvq); + nni_aio_list_init(&p->sendq); + nni_atomic_flag_reset(&p->reaped); + + *pipep = p; + + return (0); +} + +static void +tcptran_ep_match(tcptran_ep *ep) +{ + nni_aio * aio; + tcptran_pipe *p; + + if (((aio = ep->useraio) == NULL) || + ((p = nni_list_first(&ep->waitpipes)) == NULL)) { + return; + } + nni_list_remove(&ep->waitpipes, p); + nni_list_append(&ep->busypipes, p); + ep->useraio = NULL; + p->rcvmax = ep->rcvmax; + nni_aio_set_output(aio, 0, p); + nni_aio_finish(aio, 0, 0); +} + +static void +tcptran_pipe_nego_cb(void *arg) +{ + tcptran_pipe *p = arg; + tcptran_ep * ep = p->ep; + nni_aio * aio = p->negoaio; + nni_aio * uaio; + int rv; + + nni_mtx_lock(&ep->mtx); + + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + // We start transmitting before we receive. + if (p->gottxhead < p->wanttxhead) { + p->gottxhead += nni_aio_count(aio); + } else if (p->gotrxhead < p->wantrxhead) { + p->gotrxhead += nni_aio_count(aio); + } + + if (p->gottxhead < p->wanttxhead) { + nni_iov iov; + iov.iov_len = p->wanttxhead - p->gottxhead; + iov.iov_buf = &p->txlen[p->gottxhead]; + // send it down... + nni_aio_set_iov(aio, 1, &iov); + nng_stream_send(p->conn, aio); + nni_mtx_unlock(&ep->mtx); + return; + } + if (p->gotrxhead < p->wantrxhead) { + nni_iov iov; + iov.iov_len = p->wantrxhead - p->gotrxhead; + iov.iov_buf = &p->rxlen[p->gotrxhead]; + nni_aio_set_iov(aio, 1, &iov); + nng_stream_recv(p->conn, aio); + nni_mtx_unlock(&ep->mtx); + return; + } + // We have both sent and received the headers. Lets check the + // receive side header. + if ((p->rxlen[0] != 0) || (p->rxlen[1] != 'S') || + (p->rxlen[2] != 'P') || (p->rxlen[3] != 0) || (p->rxlen[6] != 0) || + (p->rxlen[7] != 0)) { + rv = NNG_EPROTO; + goto error; + } + + NNI_GET16(&p->rxlen[4], p->peer); + + // We are all ready now. We put this in the wait list, and + // then try to run the matcher. + nni_list_remove(&ep->negopipes, p); + nni_list_append(&ep->waitpipes, p); + + tcptran_ep_match(ep); + nni_mtx_unlock(&ep->mtx); + + return; + +error: + nng_stream_close(p->conn); + + if ((uaio = ep->useraio) != NULL) { + ep->useraio = NULL; + nni_aio_finish_error(uaio, rv); + } + nni_mtx_unlock(&ep->mtx); + tcptran_pipe_reap(p); +} + +static void +tcptran_pipe_send_cb(void *arg) +{ + tcptran_pipe *p = arg; + int rv; + nni_aio * aio; + size_t n; + nni_msg * msg; + nni_aio * txaio = p->txaio; + + nni_mtx_lock(&p->mtx); + aio = nni_list_first(&p->sendq); + + if ((rv = nni_aio_result(txaio)) != 0) { + nni_pipe_bump_error(p->npipe, rv); + // Intentionally we do not queue up another transfer. + // There's an excellent chance that the pipe is no longer + // usable, with a partial transfer. + // The protocol should see this error, and close the + // pipe itself, we hope. + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + + n = nni_aio_count(txaio); + nni_aio_iov_advance(txaio, n); + if (nni_aio_iov_count(txaio) > 0) { + nng_stream_send(p->conn, txaio); + nni_mtx_unlock(&p->mtx); + return; + } + + nni_aio_list_remove(aio); + tcptran_pipe_send_start(p); + + msg = nni_aio_get_msg(aio); + n = nni_msg_len(msg); + nni_pipe_bump_tx(p->npipe, n); + nni_mtx_unlock(&p->mtx); + + nni_aio_set_msg(aio, NULL); + nni_msg_free(msg); + nni_aio_finish_sync(aio, 0, n); +} + +static void +tcptran_pipe_recv_cb(void *arg) +{ + tcptran_pipe *p = arg; + nni_aio * aio; + int rv; + size_t n; + nni_msg * msg; + nni_aio * rxaio = p->rxaio; + + nni_mtx_lock(&p->mtx); + aio = nni_list_first(&p->recvq); + + if ((rv = nni_aio_result(rxaio)) != 0) { + goto recv_error; + } + + n = nni_aio_count(rxaio); + nni_aio_iov_advance(rxaio, n); + if (nni_aio_iov_count(rxaio) > 0) { + nng_stream_recv(p->conn, rxaio); + nni_mtx_unlock(&p->mtx); + return; + } + + // If we don't have a message yet, we were reading the TCP message + // header, which is just the length. This tells us the size of the + // message to allocate and how much more to expect. + if (p->rxmsg == NULL) { + uint64_t len; + // We should have gotten a message header. + NNI_GET64(p->rxlen, len); + + // Make sure the message payload is not too big. If it is + // the caller will shut down the pipe. + if ((len > p->rcvmax) && (p->rcvmax > 0)) { + rv = NNG_EMSGSIZE; + goto recv_error; + } + + if ((rv = nni_msg_alloc(&p->rxmsg, (size_t) len)) != 0) { + goto recv_error; + } + + // Submit the rest of the data for a read -- we want to + // read the entire message now. + if (len != 0) { + nni_iov iov; + iov.iov_buf = nni_msg_body(p->rxmsg); + iov.iov_len = (size_t) len; + + nni_aio_set_iov(rxaio, 1, &iov); + nng_stream_recv(p->conn, rxaio); + nni_mtx_unlock(&p->mtx); + return; + } + } + + // We read a message completely. Let the user know the good news. + nni_aio_list_remove(aio); + msg = p->rxmsg; + p->rxmsg = NULL; + n = nni_msg_len(msg); + + nni_pipe_bump_rx(p->npipe, n); + tcptran_pipe_recv_start(p); + nni_mtx_unlock(&p->mtx); + + nni_aio_set_msg(aio, msg); + nni_aio_finish_sync(aio, 0, n); + return; + +recv_error: + nni_aio_list_remove(aio); + msg = p->rxmsg; + p->rxmsg = NULL; + nni_pipe_bump_error(p->npipe, rv); + // Intentionally, we do not queue up another receive. + // The protocol should notice this error and close the pipe. + nni_mtx_unlock(&p->mtx); + + nni_msg_free(msg); + nni_aio_finish_error(aio, rv); +} + +static void +tcptran_pipe_send_cancel(nni_aio *aio, void *arg, int rv) +{ + tcptran_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + if (!nni_aio_list_active(aio)) { + nni_mtx_unlock(&p->mtx); + return; + } + // If this is being sent, then cancel the pending transfer. + // The callback on the txaio will cause the user aio to + // be canceled too. + if (nni_list_first(&p->sendq) == aio) { + nni_aio_abort(p->txaio, rv); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + + nni_aio_finish_error(aio, rv); +} + +static void +tcptran_pipe_send_start(tcptran_pipe *p) +{ + nni_aio *aio; + nni_aio *txaio; + nni_msg *msg; + int niov; + nni_iov iov[3]; + uint64_t len; + + if (p->closed) { + while ((aio = nni_list_first(&p->sendq)) != NULL) { + nni_list_remove(&p->sendq, aio); + nni_aio_finish_error(aio, NNG_ECLOSED); + } + return; + } + + if ((aio = nni_list_first(&p->sendq)) == NULL) { + return; + } + + // This runs to send the message. + msg = nni_aio_get_msg(aio); + len = nni_msg_len(msg) + nni_msg_header_len(msg); + + NNI_PUT64(p->txlen, len); + + txaio = p->txaio; + niov = 0; + iov[0].iov_buf = p->txlen; + iov[0].iov_len = sizeof(p->txlen); + niov++; + if (nni_msg_header_len(msg) > 0) { + iov[niov].iov_buf = nni_msg_header(msg); + iov[niov].iov_len = nni_msg_header_len(msg); + niov++; + } + if (nni_msg_len(msg) > 0) { + iov[niov].iov_buf = nni_msg_body(msg); + iov[niov].iov_len = nni_msg_len(msg); + niov++; + } + nni_aio_set_iov(txaio, niov, iov); + nng_stream_send(p->conn, txaio); +} + +static void +tcptran_pipe_send(void *arg, nni_aio *aio) +{ + tcptran_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, tcptran_pipe_send_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + nni_list_append(&p->sendq, aio); + if (nni_list_first(&p->sendq) == aio) { + tcptran_pipe_send_start(p); + } + nni_mtx_unlock(&p->mtx); +} + +static void +tcptran_pipe_recv_cancel(nni_aio *aio, void *arg, int rv) +{ + tcptran_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + if (!nni_aio_list_active(aio)) { + nni_mtx_unlock(&p->mtx); + return; + } + // If receive in progress, then cancel the pending transfer. + // The callback on the rxaio will cause the user aio to + // be canceled too. + if (nni_list_first(&p->recvq) == aio) { + nni_aio_abort(p->rxaio, rv); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); +} + +static void +tcptran_pipe_recv_start(tcptran_pipe *p) +{ + nni_aio *rxaio; + nni_iov iov; + NNI_ASSERT(p->rxmsg == NULL); + + if (p->closed) { + nni_aio *aio; + while ((aio = nni_list_first(&p->recvq)) != NULL) { + nni_list_remove(&p->recvq, aio); + nni_aio_finish_error(aio, NNG_ECLOSED); + } + return; + } + if (nni_list_empty(&p->recvq)) { + return; + } + + // Schedule a read of the header. + rxaio = p->rxaio; + iov.iov_buf = p->rxlen; + iov.iov_len = sizeof(p->rxlen); + nni_aio_set_iov(rxaio, 1, &iov); + + nng_stream_recv(p->conn, rxaio); +} + +static void +tcptran_pipe_recv(void *arg, nni_aio *aio) +{ + tcptran_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, tcptran_pipe_recv_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + + nni_list_append(&p->recvq, aio); + if (nni_list_first(&p->recvq) == aio) { + tcptran_pipe_recv_start(p); + } + nni_mtx_unlock(&p->mtx); +} + +static uint16_t +tcptran_pipe_peer(void *arg) +{ + tcptran_pipe *p = arg; + + return (p->peer); +} + +static int +tcptran_pipe_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + tcptran_pipe *p = arg; + return (nni_stream_get(p->conn, name, buf, szp, t)); +} + +static void +tcptran_pipe_start(tcptran_pipe *p, nng_stream *conn, tcptran_ep *ep) +{ + nni_iov iov; + + ep->refcnt++; + + p->conn = conn; + p->ep = ep; + p->proto = ep->proto; + + p->txlen[0] = 0; + p->txlen[1] = 'S'; + p->txlen[2] = 'P'; + p->txlen[3] = 0; + NNI_PUT16(&p->txlen[4], p->proto); + NNI_PUT16(&p->txlen[6], 0); + + p->gotrxhead = 0; + p->gottxhead = 0; + p->wantrxhead = 8; + p->wanttxhead = 8; + iov.iov_len = 8; + iov.iov_buf = &p->txlen[0]; + nni_aio_set_iov(p->negoaio, 1, &iov); + nni_list_append(&ep->negopipes, p); + + nni_aio_set_timeout(p->negoaio, 10000); // 10 sec timeout to negotiate + nng_stream_send(p->conn, p->negoaio); +} + +static void +tcptran_ep_fini(void *arg) +{ + tcptran_ep *ep = arg; + + nni_mtx_lock(&ep->mtx); + ep->fini = true; + if (ep->refcnt != 0) { + nni_mtx_unlock(&ep->mtx); + return; + } + nni_mtx_unlock(&ep->mtx); + nni_aio_stop(ep->timeaio); + nni_aio_stop(ep->connaio); + nng_stream_dialer_free(ep->dialer); + nng_stream_listener_free(ep->listener); + nni_aio_free(ep->timeaio); + nni_aio_free(ep->connaio); + + nni_mtx_fini(&ep->mtx); + NNI_FREE_STRUCT(ep); +} + +static void +tcptran_ep_close(void *arg) +{ + tcptran_ep * ep = arg; + tcptran_pipe *p; + + nni_mtx_lock(&ep->mtx); + + ep->closed = true; + nni_aio_close(ep->timeaio); + if (ep->dialer != NULL) { + nng_stream_dialer_close(ep->dialer); + } + if (ep->listener != NULL) { + nng_stream_listener_close(ep->listener); + } + NNI_LIST_FOREACH (&ep->negopipes, p) { + tcptran_pipe_close(p); + } + NNI_LIST_FOREACH (&ep->waitpipes, p) { + tcptran_pipe_close(p); + } + NNI_LIST_FOREACH (&ep->busypipes, p) { + tcptran_pipe_close(p); + } + if (ep->useraio != NULL) { + nni_aio_finish_error(ep->useraio, NNG_ECLOSED); + ep->useraio = NULL; + } + + nni_mtx_unlock(&ep->mtx); +} + +// This parses off the optional source address that this transport uses. +// The special handling of this URL format is quite honestly an historical +// mistake, which we would remove if we could. +static int +tcptran_url_parse_source(nng_url *url, nng_sockaddr *sa, const nng_url *surl) +{ + int af; + char * semi; + char * src; + size_t len; + int rv; + nni_aio *aio; + + // We modify the URL. This relies on the fact that the underlying + // transport does not free this, so we can just use references. + + url->u_scheme = surl->u_scheme; + url->u_port = surl->u_port; + url->u_hostname = surl->u_hostname; + + if ((semi = strchr(url->u_hostname, ';')) == NULL) { + memset(sa, 0, sizeof(*sa)); + return (0); + } + + len = (size_t)(semi - url->u_hostname); + url->u_hostname = semi + 1; + + if (strcmp(surl->u_scheme, "tcp") == 0) { + af = NNG_AF_UNSPEC; + } else if (strcmp(surl->u_scheme, "tcp4") == 0) { + af = NNG_AF_INET; + } else if (strcmp(surl->u_scheme, "tcp6") == 0) { + af = NNG_AF_INET6; + } else { + return (NNG_EADDRINVAL); + } + + if ((src = nni_alloc(len + 1)) == NULL) { + return (NNG_ENOMEM); + } + memcpy(src, surl->u_hostname, len); + src[len] = '\0'; + + if ((rv = nni_aio_alloc(&aio, NULL, NULL)) != 0) { + nni_free(src, len + 1); + return (rv); + } + + nni_resolv_ip(src, "0", af, true, sa, aio); + nni_aio_wait(aio); + nni_aio_free(aio); + nni_free(src, len + 1); + return (rv); +} + +static void +tcptran_timer_cb(void *arg) +{ + tcptran_ep *ep = arg; + if (nni_aio_result(ep->timeaio) == 0) { + nng_stream_listener_accept(ep->listener, ep->connaio); + } +} + +static void +tcptran_accept_cb(void *arg) +{ + tcptran_ep * ep = arg; + nni_aio * aio = ep->connaio; + tcptran_pipe *p; + int rv; + nng_stream * conn; + + nni_mtx_lock(&ep->mtx); + + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + conn = nni_aio_get_output(aio, 0); + if ((rv = tcptran_pipe_alloc(&p)) != 0) { + nng_stream_free(conn); + goto error; + } + + if (ep->closed) { + tcptran_pipe_fini(p); + nng_stream_free(conn); + rv = NNG_ECLOSED; + goto error; + } + tcptran_pipe_start(p, conn, ep); + nng_stream_listener_accept(ep->listener, ep->connaio); + nni_mtx_unlock(&ep->mtx); + return; + +error: + // When an error here occurs, let's send a notice up to the consumer. + // That way it can be reported properly. + if ((aio = ep->useraio) != NULL) { + ep->useraio = NULL; + nni_aio_finish_error(aio, rv); + } + switch (rv) { + + case NNG_ENOMEM: + case NNG_ENOFILES: + nng_sleep_aio(10, ep->timeaio); + break; + + default: + if (!ep->closed) { + nng_stream_listener_accept(ep->listener, ep->connaio); + } + break; + } + nni_mtx_unlock(&ep->mtx); +} + +static void +tcptran_dial_cb(void *arg) +{ + tcptran_ep * ep = arg; + nni_aio * aio = ep->connaio; + tcptran_pipe *p; + int rv; + nng_stream * conn; + + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + conn = nni_aio_get_output(aio, 0); + if ((rv = tcptran_pipe_alloc(&p)) != 0) { + nng_stream_free(conn); + goto error; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + tcptran_pipe_fini(p); + nng_stream_free(conn); + rv = NNG_ECLOSED; + nni_mtx_unlock(&ep->mtx); + goto error; + } else { + tcptran_pipe_start(p, conn, ep); + } + nni_mtx_unlock(&ep->mtx); + return; + +error: + // Error connecting. We need to pass this straight back + // to the user. + nni_mtx_lock(&ep->mtx); + if ((aio = ep->useraio) != NULL) { + ep->useraio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&ep->mtx); +} + +static int +tcptran_ep_init(tcptran_ep **epp, nng_url *url, nni_sock *sock) +{ + tcptran_ep *ep; + + if ((ep = NNI_ALLOC_STRUCT(ep)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&ep->mtx); + NNI_LIST_INIT(&ep->busypipes, tcptran_pipe, node); + NNI_LIST_INIT(&ep->waitpipes, tcptran_pipe, node); + NNI_LIST_INIT(&ep->negopipes, tcptran_pipe, node); + + ep->proto = nni_sock_proto_id(sock); + ep->url = url; + +#ifdef NNG_ENABLE_STATS + static const nni_stat_info rcv_max_info = { + .si_name = "rcv_max", + .si_desc = "maximum receive size", + .si_type = NNG_STAT_LEVEL, + .si_unit = NNG_UNIT_BYTES, + .si_atomic = true, + }; + nni_stat_init(&ep->st_rcv_max, &rcv_max_info); +#endif + + *epp = ep; + return (0); +} + +static int +tcptran_dialer_init(void **dp, nng_url *url, nni_dialer *ndialer) +{ + tcptran_ep * ep; + int rv; + nng_sockaddr srcsa; + nni_sock * sock = nni_dialer_sock(ndialer); + nng_url myurl; + + // Check for invalid URL components. + if ((strlen(url->u_path) != 0) && (strcmp(url->u_path, "/") != 0)) { + return (NNG_EADDRINVAL); + } + if ((url->u_fragment != NULL) || (url->u_userinfo != NULL) || + (url->u_query != NULL) || (strlen(url->u_hostname) == 0) || + (strlen(url->u_port) == 0)) { + return (NNG_EADDRINVAL); + } + + if ((rv = tcptran_url_parse_source(&myurl, &srcsa, url)) != 0) { + return (rv); + } + + if ((rv = tcptran_ep_init(&ep, url, sock)) != 0) { + return (rv); + } + + if ((rv != 0) || + ((rv = nni_aio_alloc(&ep->connaio, tcptran_dial_cb, ep)) != 0) || + ((rv = nng_stream_dialer_alloc_url(&ep->dialer, &myurl)) != 0)) { + tcptran_ep_fini(ep); + return (rv); + } + if ((srcsa.s_family != NNG_AF_UNSPEC) && + ((rv = nni_stream_dialer_set(ep->dialer, NNG_OPT_LOCADDR, &srcsa, + sizeof(srcsa), NNI_TYPE_SOCKADDR)) != 0)) { + tcptran_ep_fini(ep); + return (rv); + } + +#ifdef NNG_ENABLE_STATS + nni_dialer_add_stat(ndialer, &ep->st_rcv_max); +#endif + *dp = ep; + return (0); +} + +static int +tcptran_listener_init(void **lp, nng_url *url, nni_listener *nlistener) +{ + tcptran_ep *ep; + int rv; + nni_sock * sock = nni_listener_sock(nlistener); + + // Check for invalid URL components. + if ((strlen(url->u_path) != 0) && (strcmp(url->u_path, "/") != 0)) { + return (NNG_EADDRINVAL); + } + if ((url->u_fragment != NULL) || (url->u_userinfo != NULL) || + (url->u_query != NULL)) { + return (NNG_EADDRINVAL); + } + + if ((rv = tcptran_ep_init(&ep, url, sock)) != 0) { + return (rv); + } + + if (((rv = nni_aio_alloc(&ep->connaio, tcptran_accept_cb, ep)) != 0) || + ((rv = nni_aio_alloc(&ep->timeaio, tcptran_timer_cb, ep)) != 0) || + ((rv = nng_stream_listener_alloc_url(&ep->listener, url)) != 0)) { + tcptran_ep_fini(ep); + return (rv); + } +#ifdef NNG_ENABLE_STATS + nni_listener_add_stat(nlistener, &ep->st_rcv_max); +#endif + + *lp = ep; + return (0); +} + +static void +tcptran_ep_cancel(nni_aio *aio, void *arg, int rv) +{ + tcptran_ep *ep = arg; + nni_mtx_lock(&ep->mtx); + if (ep->useraio == aio) { + ep->useraio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&ep->mtx); +} + +static void +tcptran_ep_connect(void *arg, nni_aio *aio) +{ + tcptran_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if (ep->useraio != NULL) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_EBUSY); + return; + } + if ((rv = nni_aio_schedule(aio, tcptran_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, rv); + return; + } + ep->useraio = aio; + + nng_stream_dialer_dial(ep->dialer, ep->connaio); + nni_mtx_unlock(&ep->mtx); +} + +static int +tcptran_ep_get_url(void *arg, void *v, size_t *szp, nni_opt_type t) +{ + tcptran_ep *ep = arg; + char * s; + int rv; + int port = 0; + + if (ep->listener != NULL) { + (void) nng_stream_listener_get_int( + ep->listener, NNG_OPT_TCP_BOUND_PORT, &port); + } + + if ((rv = nni_url_asprintf_port(&s, ep->url, port)) == 0) { + rv = nni_copyout_str(s, v, szp, t); + nni_strfree(s); + } + return (rv); +} + +static int +tcptran_ep_get_recvmaxsz(void *arg, void *v, size_t *szp, nni_opt_type t) +{ + tcptran_ep *ep = arg; + int rv; + + nni_mtx_lock(&ep->mtx); + rv = nni_copyout_size(ep->rcvmax, v, szp, t); + nni_mtx_unlock(&ep->mtx); + return (rv); +} + +static int +tcptran_ep_set_recvmaxsz(void *arg, const void *v, size_t sz, nni_opt_type t) +{ + tcptran_ep *ep = arg; + size_t val; + int rv; + if ((rv = nni_copyin_size(&val, v, sz, 0, NNI_MAXSZ, t)) == 0) { + tcptran_pipe *p; + nni_mtx_lock(&ep->mtx); + ep->rcvmax = val; + NNI_LIST_FOREACH (&ep->waitpipes, p) { + p->rcvmax = val; + } + NNI_LIST_FOREACH (&ep->negopipes, p) { + p->rcvmax = val; + } + NNI_LIST_FOREACH (&ep->busypipes, p) { + p->rcvmax = val; + } + nni_mtx_unlock(&ep->mtx); +#ifdef NNG_ENABLE_STATS + nni_stat_set_value(&ep->st_rcv_max, val); +#endif + } + return (rv); +} + +static int +tcptran_ep_bind(void *arg) +{ + tcptran_ep *ep = arg; + int rv; + + nni_mtx_lock(&ep->mtx); + rv = nng_stream_listener_listen(ep->listener); + nni_mtx_unlock(&ep->mtx); + + return (rv); +} + +static void +tcptran_ep_accept(void *arg, nni_aio *aio) +{ + tcptran_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if (ep->useraio != NULL) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_EBUSY); + return; + } + if ((rv = nni_aio_schedule(aio, tcptran_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, rv); + return; + } + ep->useraio = aio; + if (!ep->started) { + ep->started = true; + nng_stream_listener_accept(ep->listener, ep->connaio); + } else { + tcptran_ep_match(ep); + } + nni_mtx_unlock(&ep->mtx); +} + +static nni_tran_pipe_ops tcptran_pipe_ops = { + .p_init = tcptran_pipe_init, + .p_fini = tcptran_pipe_fini, + .p_stop = tcptran_pipe_stop, + .p_send = tcptran_pipe_send, + .p_recv = tcptran_pipe_recv, + .p_close = tcptran_pipe_close, + .p_peer = tcptran_pipe_peer, + .p_getopt = tcptran_pipe_getopt, +}; + +static const nni_option tcptran_ep_opts[] = { + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = tcptran_ep_get_recvmaxsz, + .o_set = tcptran_ep_set_recvmaxsz, + }, + { + .o_name = NNG_OPT_URL, + .o_get = tcptran_ep_get_url, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static int +tcptran_dialer_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + tcptran_ep *ep = arg; + int rv; + + rv = nni_stream_dialer_get(ep->dialer, name, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_getopt(tcptran_ep_opts, name, ep, buf, szp, t); + } + return (rv); +} + +static int +tcptran_dialer_setopt( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + tcptran_ep *ep = arg; + int rv; + + rv = nni_stream_dialer_set(ep->dialer, name, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_setopt(tcptran_ep_opts, name, ep, buf, sz, t); + } + return (rv); +} + +static int +tcptran_listener_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + tcptran_ep *ep = arg; + int rv; + + rv = nni_stream_listener_get(ep->listener, name, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_getopt(tcptran_ep_opts, name, ep, buf, szp, t); + } + return (rv); +} + +static int +tcptran_listener_setopt( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + tcptran_ep *ep = arg; + int rv; + + rv = nni_stream_listener_set(ep->listener, name, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_setopt(tcptran_ep_opts, name, ep, buf, sz, t); + } + return (rv); +} + +static nni_tran_dialer_ops tcptran_dialer_ops = { + .d_init = tcptran_dialer_init, + .d_fini = tcptran_ep_fini, + .d_connect = tcptran_ep_connect, + .d_close = tcptran_ep_close, + .d_getopt = tcptran_dialer_getopt, + .d_setopt = tcptran_dialer_setopt, +}; + +static nni_tran_listener_ops tcptran_listener_ops = { + .l_init = tcptran_listener_init, + .l_fini = tcptran_ep_fini, + .l_bind = tcptran_ep_bind, + .l_accept = tcptran_ep_accept, + .l_close = tcptran_ep_close, + .l_getopt = tcptran_listener_getopt, + .l_setopt = tcptran_listener_setopt, +}; + +static nni_tran tcp_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "tcp", + .tran_dialer = &tcptran_dialer_ops, + .tran_listener = &tcptran_listener_ops, + .tran_pipe = &tcptran_pipe_ops, + .tran_init = tcptran_init, + .tran_fini = tcptran_fini, +}; + +static nni_tran tcp4_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "tcp4", + .tran_dialer = &tcptran_dialer_ops, + .tran_listener = &tcptran_listener_ops, + .tran_pipe = &tcptran_pipe_ops, + .tran_init = tcptran_init, + .tran_fini = tcptran_fini, +}; + +static nni_tran tcp6_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "tcp6", + .tran_dialer = &tcptran_dialer_ops, + .tran_listener = &tcptran_listener_ops, + .tran_pipe = &tcptran_pipe_ops, + .tran_init = tcptran_init, + .tran_fini = tcptran_fini, +}; + +int +nng_tcp_register(void) +{ + int rv; + if (((rv = nni_tran_register(&tcp_tran)) != 0) || + ((rv = nni_tran_register(&tcp4_tran)) != 0) || + ((rv = nni_tran_register(&tcp6_tran)) != 0)) { + return (rv); + } + return (0); +} diff --git a/src/sp/transport/tcp/tcp_test.c b/src/sp/transport/tcp/tcp_test.c new file mode 100644 index 00000000..d23227d7 --- /dev/null +++ b/src/sp/transport/tcp/tcp_test.c @@ -0,0 +1,297 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// Copyright 2018 Devolutions <info@devolutions.net> +// Copyright 2018 Cody Piersall <cody.piersall@gmail.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + + +#include <nuts.h> + +// TCP tests. + +static void +test_tcp_wild_card_connect_fail(void) +{ + nng_socket s; + char addr[NNG_MAXADDRLEN]; + + NUTS_OPEN(s); + (void) snprintf(addr, sizeof(addr), "tcp://*:%u", nuts_next_port()); + NUTS_FAIL(nng_dial(s, addr, NULL, 0), NNG_EADDRINVAL); + NUTS_CLOSE(s); +} + +void +test_tcp_wild_card_bind(void) +{ + nng_socket s1; + nng_socket s2; + char addr[NNG_MAXADDRLEN]; + uint16_t port; + + port = nuts_next_port(); + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + (void) snprintf(addr, sizeof(addr), "tcp4://*:%u", port); + NUTS_PASS(nng_listen(s1, addr, NULL, 0)); + (void) snprintf(addr, sizeof(addr), "tcp://127.0.0.1:%u", port); + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + NUTS_CLOSE(s2); + NUTS_CLOSE(s1); +} + +void +test_tcp_local_address_connect(void) +{ + + nng_socket s1; + nng_socket s2; + char addr[NNG_MAXADDRLEN]; + uint16_t port; + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + port = nuts_next_port(); + (void) snprintf(addr, sizeof(addr), "tcp://127.0.0.1:%u", port); + NUTS_PASS(nng_listen(s1, addr, NULL, 0)); + (void) snprintf( + addr, sizeof(addr), "tcp://127.0.0.1;127.0.0.1:%u", port); + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + NUTS_CLOSE(s2); + NUTS_CLOSE(s1); +} + +void +test_tcp_port_zero_bind(void) +{ + nng_socket s1; + nng_socket s2; + nng_sockaddr sa; + nng_listener l; + char * addr; + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + NUTS_PASS(nng_listen(s1, "tcp://127.0.0.1:0", &l, 0)); + NUTS_PASS(nng_listener_get_string(l, NNG_OPT_URL, &addr)); + NUTS_TRUE(memcmp(addr, "tcp://", 6) == 0); + NUTS_PASS(nng_listener_get_addr(l, NNG_OPT_LOCADDR, &sa)); + NUTS_TRUE(sa.s_in.sa_family == NNG_AF_INET); + NUTS_TRUE(sa.s_in.sa_port != 0); + NUTS_TRUE(sa.s_in.sa_addr = nuts_be32(0x7f000001)); + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + nng_strfree(addr); + NUTS_CLOSE(s2); + NUTS_CLOSE(s1); +} + +void +test_tcp_bad_local_interface(void) +{ + nng_socket s1; + + NUTS_OPEN(s1); + NUTS_FAIL(nng_dial(s1, "tcp://bogus1;127.0.0.1:80", NULL, 0), + NNG_EADDRINVAL); + NUTS_CLOSE(s1); +} + +void +test_tcp_non_local_address(void) +{ + nng_socket s1; + + NUTS_OPEN(s1); + NUTS_FAIL(nng_dial(s1, "tcp://8.8.8.8;127.0.0.1:80", NULL, 0), + NNG_EADDRINVAL); + NUTS_CLOSE(s1); +} + +void +test_tcp_malformed_address(void) +{ + nng_socket s1; + + NUTS_OPEN(s1); + NUTS_FAIL( + nng_dial(s1, "tcp://127.0.0.1", NULL, 0), NNG_EADDRINVAL); + NUTS_FAIL( + nng_dial(s1, "tcp://127.0.0.1.32", NULL, 0), NNG_EADDRINVAL); + NUTS_FAIL( + nng_dial(s1, "tcp://127.0.x.1.32", NULL, 0), NNG_EADDRINVAL); + NUTS_FAIL( + nng_listen(s1, "tcp://127.0.0.1.32", NULL, 0), NNG_EADDRINVAL); + NUTS_FAIL( + nng_listen(s1, "tcp://127.0.x.1.32", NULL, 0), NNG_EADDRINVAL); + NUTS_CLOSE(s1); +} + +void +test_tcp_no_delay_option(void) +{ + nng_socket s; + nng_dialer d; + nng_listener l; + bool v; + int x; + char *addr; + + NUTS_ADDR(addr, "tcp"); + + NUTS_OPEN(s); +#ifndef NNG_ELIDE_DEPRECATED + NUTS_PASS(nng_socket_get_bool(s, NNG_OPT_TCP_NODELAY, &v)); + NUTS_TRUE(v); +#endif + NUTS_PASS(nng_dialer_create(&d, s, addr)); + NUTS_PASS(nng_dialer_get_bool(d, NNG_OPT_TCP_NODELAY, &v)); + NUTS_TRUE(v); + NUTS_PASS(nng_dialer_set_bool(d, NNG_OPT_TCP_NODELAY, false)); + NUTS_PASS(nng_dialer_get_bool(d, NNG_OPT_TCP_NODELAY, &v)); + NUTS_TRUE(v == false); + NUTS_FAIL( + nng_dialer_get_int(d, NNG_OPT_TCP_NODELAY, &x), NNG_EBADTYPE); + x = 0; + NUTS_FAIL( + nng_dialer_set_int(d, NNG_OPT_TCP_NODELAY, x), NNG_EBADTYPE); + // This assumes sizeof (bool) != sizeof (int) + if (sizeof(bool) != sizeof(int)) { + NUTS_FAIL( + nng_dialer_set(d, NNG_OPT_TCP_NODELAY, &x, sizeof(x)), + NNG_EINVAL); + } + + NUTS_PASS(nng_listener_create(&l, s, addr)); + NUTS_PASS(nng_listener_get_bool(l, NNG_OPT_TCP_NODELAY, &v)); + NUTS_TRUE(v == true); + x = 0; + NUTS_FAIL( + nng_listener_set_int(l, NNG_OPT_TCP_NODELAY, x), NNG_EBADTYPE); + // This assumes sizeof (bool) != sizeof (int) + NUTS_FAIL(nng_listener_set(l, NNG_OPT_TCP_NODELAY, &x, sizeof(x)), + NNG_EINVAL); + + NUTS_PASS(nng_dialer_close(d)); + NUTS_PASS(nng_listener_close(l)); + + // Make sure socket wide defaults apply. +#ifndef NNG_ELIDE_DEPRECATED + NUTS_PASS(nng_socket_set_bool(s, NNG_OPT_TCP_NODELAY, true)); + v = false; + NUTS_PASS(nng_socket_get_bool(s, NNG_OPT_TCP_NODELAY, &v)); + NUTS_TRUE(v); + NUTS_PASS(nng_socket_set_bool(s, NNG_OPT_TCP_NODELAY, false)); + NUTS_PASS(nng_dialer_create(&d, s, addr)); + NUTS_PASS(nng_dialer_get_bool(d, NNG_OPT_TCP_NODELAY, &v)); + NUTS_TRUE(v == false); +#endif + NUTS_CLOSE(s); +} + +void +test_tcp_keep_alive_option(void) +{ + nng_socket s; + nng_dialer d; + nng_listener l; + bool v; + int x; + char *addr; + + NUTS_ADDR(addr, "tcp"); + NUTS_OPEN(s); +#ifndef NNG_ELIDE_DEPRECATED + NUTS_PASS(nng_socket_get_bool(s, NNG_OPT_TCP_KEEPALIVE, &v)); + NUTS_TRUE(v == false); +#endif + NUTS_PASS(nng_dialer_create(&d, s, addr)); + NUTS_PASS(nng_dialer_get_bool(d, NNG_OPT_TCP_KEEPALIVE, &v)); + NUTS_TRUE(v == false); + NUTS_PASS(nng_dialer_set_bool(d, NNG_OPT_TCP_KEEPALIVE, true)); + NUTS_PASS(nng_dialer_get_bool(d, NNG_OPT_TCP_KEEPALIVE, &v)); + NUTS_TRUE(v); + NUTS_FAIL( + nng_dialer_get_int(d, NNG_OPT_TCP_KEEPALIVE, &x), NNG_EBADTYPE); + x = 1; + NUTS_FAIL( + nng_dialer_set_int(d, NNG_OPT_TCP_KEEPALIVE, x), NNG_EBADTYPE); + + NUTS_PASS(nng_listener_create(&l, s, addr)); + NUTS_PASS(nng_listener_get_bool(l, NNG_OPT_TCP_KEEPALIVE, &v)); + NUTS_TRUE(v == false); + x = 1; + NUTS_FAIL( + nng_listener_set_int(l, NNG_OPT_TCP_KEEPALIVE, x), NNG_EBADTYPE); + + NUTS_PASS(nng_dialer_close(d)); + NUTS_PASS(nng_listener_close(l)); + + // Make sure socket wide defaults apply. +#ifndef NNG_ELIDE_DEPRECATED + NUTS_PASS(nng_socket_set_bool(s, NNG_OPT_TCP_KEEPALIVE, false)); + v = true; + NUTS_PASS(nng_socket_get_bool(s, NNG_OPT_TCP_KEEPALIVE, &v)); + NUTS_TRUE(v == false); + NUTS_PASS(nng_socket_set_bool(s, NNG_OPT_TCP_KEEPALIVE, true)); + NUTS_PASS(nng_dialer_create(&d, s, addr)); + NUTS_PASS(nng_dialer_get_bool(d, NNG_OPT_TCP_KEEPALIVE, &v)); + NUTS_TRUE(v); +#endif + NUTS_CLOSE(s); +} + +void +test_tcp_recv_max(void) +{ + char msg[256]; + char buf[256]; + nng_socket s0; + nng_socket s1; + nng_listener l; + size_t sz; + char *addr; + + NUTS_ADDR(addr, "tcp"); + + NUTS_OPEN(s0); + NUTS_PASS(nng_socket_set_ms(s0, NNG_OPT_RECVTIMEO, 100)); + NUTS_PASS(nng_socket_set_size(s0, NNG_OPT_RECVMAXSZ, 200)); + NUTS_PASS(nng_listener_create(&l, s0, addr)); + NUTS_PASS(nng_socket_get_size(s0, NNG_OPT_RECVMAXSZ, &sz)); + NUTS_TRUE(sz == 200); + NUTS_PASS(nng_listener_set_size(l, NNG_OPT_RECVMAXSZ, 100)); + NUTS_PASS(nng_listener_start(l, 0)); + + NUTS_OPEN(s1); + NUTS_PASS(nng_dial(s1, addr, NULL, 0)); + NUTS_PASS(nng_send(s1, msg, 95, 0)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 100)); + NUTS_PASS(nng_recv(s0, buf, &sz, 0)); + NUTS_TRUE(sz == 95); + NUTS_PASS(nng_send(s1, msg, 150, 0)); + NUTS_FAIL(nng_recv(s0, buf, &sz, 0), NNG_ETIMEDOUT); + NUTS_PASS(nng_close(s0)); + NUTS_CLOSE(s1); +} + +NUTS_TESTS = { + + { "tcp wild card connect fail", test_tcp_wild_card_connect_fail }, + { "tcp wild card bind", test_tcp_wild_card_bind }, + { "tcp port zero bind", test_tcp_port_zero_bind }, + { "tcp local address connect", test_tcp_local_address_connect }, + { "tcp bad local interface", test_tcp_bad_local_interface }, + { "tcp non-local address", test_tcp_non_local_address }, + { "tcp malformed address", test_tcp_malformed_address }, + { "tcp no delay option", test_tcp_no_delay_option }, + { "tcp keep alive option", test_tcp_keep_alive_option }, + { "tcp recv max", test_tcp_recv_max }, + { NULL, NULL }, +};
\ No newline at end of file diff --git a/src/sp/transport/tls/CMakeLists.txt b/src/sp/transport/tls/CMakeLists.txt new file mode 100644 index 00000000..82f24c79 --- /dev/null +++ b/src/sp/transport/tls/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +# Copyright 2018 Capitar IT Group BV <info@capitar.com> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# TLS transport +nng_directory(tls) + +nng_sources_if(NNG_TRANSPORT_TLS tls.c) +nng_headers_if(NNG_TRANSPORT_TLS nng/transport/tls/tls.h) +nng_defines_if(NNG_TRANSPORT_TLS NNG_TRANSPORT_TLS)
\ No newline at end of file diff --git a/src/sp/transport/tls/tls.c b/src/sp/transport/tls/tls.c new file mode 100644 index 00000000..b6623733 --- /dev/null +++ b/src/sp/transport/tls/tls.c @@ -0,0 +1,1292 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// Copyright 2019 Devolutions <info@devolutions.net> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <stdbool.h> +#include <string.h> + +#include "core/nng_impl.h" + +#include "nng/supplemental/tls/tls.h" +#include "nng/transport/tls/tls.h" + +// TLS over TCP transport. Platform specific TCP operations must be +// supplied as well, and uses the supplemental TLS v1.2 code. It is not +// an accident that this very closely resembles the TCP transport itself. + +typedef struct tlstran_ep tlstran_ep; +typedef struct tlstran_dialer tlstran_dialer; +typedef struct tlstran_listener tlstran_listener; +typedef struct tlstran_pipe tlstran_pipe; + +// tlstran_pipe is one end of a TLS connection. +struct tlstran_pipe { + nng_stream * tls; + nni_pipe * npipe; + uint16_t peer; + uint16_t proto; + size_t rcvmax; + bool closed; + nni_list_node node; + nni_list sendq; + nni_list recvq; + tlstran_ep * ep; + nni_sockaddr sa; + nni_atomic_flag reaped; + nni_reap_node reap; + uint8_t txlen[sizeof(uint64_t)]; + uint8_t rxlen[sizeof(uint64_t)]; + size_t gottxhead; + size_t gotrxhead; + size_t wanttxhead; + size_t wantrxhead; + nni_aio * txaio; + nni_aio * rxaio; + nni_aio * negoaio; + nni_msg * rxmsg; + nni_mtx mtx; +}; + +// Stuff that is common to both dialers and listeners. +struct tlstran_ep { + nni_mtx mtx; + uint16_t proto; + size_t rcvmax; + bool started; + bool closed; + bool fini; + int refcnt; + int authmode; + nni_url * url; + nni_list pipes; + nni_reap_node reap; + nng_stream_dialer * dialer; + nng_stream_listener *listener; + nni_aio * useraio; + nni_aio * connaio; + nni_aio * timeaio; + nni_list busypipes; // busy pipes -- ones passed to socket + nni_list waitpipes; // pipes waiting to match to socket + nni_list negopipes; // pipes busy negotiating + const char * host; + nng_sockaddr src; + nng_sockaddr sa; + nni_stat_item st_rcv_max; +}; + +static void tlstran_pipe_send_start(tlstran_pipe *); +static void tlstran_pipe_recv_start(tlstran_pipe *); +static void tlstran_pipe_send_cb(void *); +static void tlstran_pipe_recv_cb(void *); +static void tlstran_pipe_nego_cb(void *); +static void tlstran_ep_fini(void *); +static void tlstran_pipe_fini(void *); + +static nni_reap_list tlstran_ep_reap_list = { + .rl_offset = offsetof(tlstran_ep, reap), + .rl_func = tlstran_ep_fini, +}; + +static nni_reap_list tlstran_pipe_reap_list = { + .rl_offset = offsetof(tlstran_pipe, reap), + .rl_func = tlstran_pipe_fini, +}; + +static int +tlstran_init(void) +{ + return (0); +} + +static void +tlstran_fini(void) +{ +} + +static void +tlstran_pipe_close(void *arg) +{ + tlstran_pipe *p = arg; + + nni_aio_close(p->rxaio); + nni_aio_close(p->txaio); + nni_aio_close(p->negoaio); + + nng_stream_close(p->tls); +} + +static void +tlstran_pipe_stop(void *arg) +{ + tlstran_pipe *p = arg; + + nni_aio_stop(p->rxaio); + nni_aio_stop(p->txaio); + nni_aio_stop(p->negoaio); +} + +static int +tlstran_pipe_init(void *arg, nni_pipe *npipe) +{ + tlstran_pipe *p = arg; + p->npipe = npipe; + return (0); +} + +static void +tlstran_pipe_fini(void *arg) +{ + tlstran_pipe *p = arg; + tlstran_ep * ep; + + tlstran_pipe_stop(p); + if ((ep = p->ep) != NULL) { + nni_mtx_lock(&ep->mtx); + nni_list_node_remove(&p->node); + ep->refcnt--; + if (ep->fini && (ep->refcnt == 0)) { + nni_reap(&tlstran_ep_reap_list, ep); + } + nni_mtx_unlock(&ep->mtx); + } + nni_aio_free(p->rxaio); + nni_aio_free(p->txaio); + nni_aio_free(p->negoaio); + nng_stream_free(p->tls); + nni_msg_free(p->rxmsg); + NNI_FREE_STRUCT(p); +} + +static int +tlstran_pipe_alloc(tlstran_pipe **pipep) +{ + tlstran_pipe *p; + int rv; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&p->mtx); + + if (((rv = nni_aio_alloc(&p->txaio, tlstran_pipe_send_cb, p)) != 0) || + ((rv = nni_aio_alloc(&p->rxaio, tlstran_pipe_recv_cb, p)) != 0) || + ((rv = nni_aio_alloc(&p->negoaio, tlstran_pipe_nego_cb, p)) != + 0)) { + tlstran_pipe_fini(p); + return (rv); + } + nni_aio_list_init(&p->recvq); + nni_aio_list_init(&p->sendq); + nni_atomic_flag_reset(&p->reaped); + + *pipep = p; + return (0); +} + +static void +tlstran_pipe_reap(tlstran_pipe *p) +{ + if (!nni_atomic_flag_test_and_set(&p->reaped)) { + if (p->tls != NULL) { + nng_stream_close(p->tls); + } + nni_reap(&tlstran_pipe_reap_list, p); + } +} + +static void +tlstran_ep_match(tlstran_ep *ep) +{ + nni_aio * aio; + tlstran_pipe *p; + + if (((aio = ep->useraio) == NULL) || + ((p = nni_list_first(&ep->waitpipes)) == NULL)) { + return; + } + nni_list_remove(&ep->waitpipes, p); + nni_list_append(&ep->busypipes, p); + ep->useraio = NULL; + p->rcvmax = ep->rcvmax; + nni_aio_set_output(aio, 0, p); + nni_aio_finish(aio, 0, 0); +} + +static void +tlstran_pipe_nego_cb(void *arg) +{ + tlstran_pipe *p = arg; + tlstran_ep * ep = p->ep; + nni_aio * aio = p->negoaio; + nni_aio * uaio; + int rv; + + nni_mtx_lock(&ep->mtx); + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + // We start transmitting before we receive. + if (p->gottxhead < p->wanttxhead) { + p->gottxhead += nni_aio_count(aio); + } else if (p->gotrxhead < p->wantrxhead) { + p->gotrxhead += nni_aio_count(aio); + } + + if (p->gottxhead < p->wanttxhead) { + nni_iov iov; + iov.iov_len = p->wanttxhead - p->gottxhead; + iov.iov_buf = &p->txlen[p->gottxhead]; + nni_aio_set_iov(aio, 1, &iov); + // send it down... + nng_stream_send(p->tls, aio); + nni_mtx_unlock(&ep->mtx); + return; + } + if (p->gotrxhead < p->wantrxhead) { + nni_iov iov; + iov.iov_len = p->wantrxhead - p->gotrxhead; + iov.iov_buf = &p->rxlen[p->gotrxhead]; + nni_aio_set_iov(aio, 1, &iov); + nng_stream_recv(p->tls, aio); + nni_mtx_unlock(&ep->mtx); + return; + } + // We have both sent and received the headers. Lets check the + // receive side header. + if ((p->rxlen[0] != 0) || (p->rxlen[1] != 'S') || + (p->rxlen[2] != 'P') || (p->rxlen[3] != 0) || (p->rxlen[6] != 0) || + (p->rxlen[7] != 0)) { + rv = NNG_EPROTO; + goto error; + } + + NNI_GET16(&p->rxlen[4], p->peer); + + // We are all ready now. We put this in the wait list, and + // then try to run the matcher. + nni_list_remove(&ep->negopipes, p); + nni_list_append(&ep->waitpipes, p); + + tlstran_ep_match(ep); + nni_mtx_unlock(&ep->mtx); + + return; + +error: + nng_stream_close(p->tls); + + if ((uaio = ep->useraio) != NULL) { + ep->useraio = NULL; + nni_aio_finish_error(uaio, rv); + } + nni_mtx_unlock(&ep->mtx); + tlstran_pipe_reap(p); +} + +static void +tlstran_pipe_send_cb(void *arg) +{ + tlstran_pipe *p = arg; + int rv; + nni_aio * aio; + size_t n; + nni_msg * msg; + nni_aio * txaio = p->txaio; + + nni_mtx_lock(&p->mtx); + aio = nni_list_first(&p->sendq); + + if ((rv = nni_aio_result(txaio)) != 0) { + // Intentionally we do not queue up another transfer. + // There's an excellent chance that the pipe is no longer + // usable, with a partial transfer. + // The protocol should see this error, and close the + // pipe itself, we hope. + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + nni_pipe_bump_error(p->npipe, rv); + return; + } + + n = nni_aio_count(txaio); + nni_aio_iov_advance(txaio, n); + if (nni_aio_iov_count(txaio) > 0) { + nng_stream_send(p->tls, txaio); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + tlstran_pipe_send_start(p); + + msg = nni_aio_get_msg(aio); + n = nni_msg_len(msg); + nni_pipe_bump_tx(p->npipe, n); + nni_mtx_unlock(&p->mtx); + nni_aio_set_msg(aio, NULL); + nni_msg_free(msg); + nni_aio_finish_sync(aio, 0, n); +} + +static void +tlstran_pipe_recv_cb(void *arg) +{ + tlstran_pipe *p = arg; + nni_aio * aio; + int rv; + size_t n; + nni_msg * msg; + nni_aio * rxaio = p->rxaio; + + nni_mtx_lock(&p->mtx); + aio = nni_list_first(&p->recvq); + + if ((rv = nni_aio_result(p->rxaio)) != 0) { + goto recv_error; + } + + n = nni_aio_count(rxaio); + nni_aio_iov_advance(rxaio, n); + if (nni_aio_iov_count(rxaio) > 0) { + // Was this a partial read? If so then resubmit for the rest. + nng_stream_recv(p->tls, rxaio); + nni_mtx_unlock(&p->mtx); + return; + } + + // If we don't have a message yet, we were reading the TCP message + // header, which is just the length. This tells us the size of the + // message to allocate and how much more to expect. + if (p->rxmsg == NULL) { + uint64_t len; + // We should have gotten a message header. + NNI_GET64(p->rxlen, len); + + // Make sure the message payload is not too big. If it is + // the caller will shut down the pipe. + if ((len > p->rcvmax) && (p->rcvmax > 0)) { + rv = NNG_EMSGSIZE; + goto recv_error; + } + + if ((rv = nni_msg_alloc(&p->rxmsg, (size_t) len)) != 0) { + goto recv_error; + } + + // Submit the rest of the data for a read -- we want to + // read the entire message now. + if (len != 0) { + nni_iov iov; + iov.iov_buf = nni_msg_body(p->rxmsg); + iov.iov_len = (size_t) len; + nni_aio_set_iov(rxaio, 1, &iov); + + nng_stream_recv(p->tls, rxaio); + nni_mtx_unlock(&p->mtx); + return; + } + } + + // We read a message completely. Let the user know the good news. + nni_aio_list_remove(aio); + msg = p->rxmsg; + p->rxmsg = NULL; + n = nni_msg_len(msg); + if (!nni_list_empty(&p->recvq)) { + tlstran_pipe_recv_start(p); + } + nni_pipe_bump_rx(p->npipe, n); + nni_mtx_unlock(&p->mtx); + + nni_aio_set_msg(aio, msg); + nni_aio_finish_sync(aio, 0, n); + return; + +recv_error: + nni_aio_list_remove(aio); + msg = p->rxmsg; + p->rxmsg = NULL; + nni_pipe_bump_error(p->npipe, rv); + // Intentionally, we do not queue up another receive. + // The protocol should notice this error and close the pipe. + nni_mtx_unlock(&p->mtx); + nni_msg_free(msg); + nni_aio_finish_error(aio, rv); +} + +static void +tlstran_pipe_send_cancel(nni_aio *aio, void *arg, int rv) +{ + tlstran_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + if (!nni_aio_list_active(aio)) { + nni_mtx_unlock(&p->mtx); + return; + } + // If this is being sent, then cancel the pending transfer. + // The callback on the txaio will cause the user aio to + // be canceled too. + if (nni_list_first(&p->sendq) == aio) { + nni_aio_abort(p->txaio, rv); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + + nni_aio_finish_error(aio, rv); +} + +static void +tlstran_pipe_send_start(tlstran_pipe *p) +{ + nni_aio *txaio; + nni_aio *aio; + nni_msg *msg; + int niov; + nni_iov iov[3]; + uint64_t len; + + if ((aio = nni_list_first(&p->sendq)) == NULL) { + return; + } + + msg = nni_aio_get_msg(aio); + len = nni_msg_len(msg) + nni_msg_header_len(msg); + + NNI_PUT64(p->txlen, len); + + txaio = p->txaio; + niov = 0; + iov[niov].iov_buf = p->txlen; + iov[niov].iov_len = sizeof(p->txlen); + niov++; + if (nni_msg_header_len(msg) > 0) { + iov[niov].iov_buf = nni_msg_header(msg); + iov[niov].iov_len = nni_msg_header_len(msg); + niov++; + } + if (nni_msg_len(msg) > 0) { + iov[niov].iov_buf = nni_msg_body(msg); + iov[niov].iov_len = nni_msg_len(msg); + niov++; + } + + nni_aio_set_iov(txaio, niov, iov); + nng_stream_send(p->tls, txaio); +} + +static void +tlstran_pipe_send(void *arg, nni_aio *aio) +{ + tlstran_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, tlstran_pipe_send_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + nni_list_append(&p->sendq, aio); + if (nni_list_first(&p->sendq) == aio) { + tlstran_pipe_send_start(p); + } + nni_mtx_unlock(&p->mtx); +} + +static void +tlstran_pipe_recv_cancel(nni_aio *aio, void *arg, int rv) +{ + tlstran_pipe *p = arg; + + nni_mtx_lock(&p->mtx); + if (!nni_aio_list_active(aio)) { + nni_mtx_unlock(&p->mtx); + return; + } + // If receive in progress, then cancel the pending transfer. + // The callback on the rxaio will cause the user aio to + // be canceled too. + if (nni_list_first(&p->recvq) == aio) { + nni_aio_abort(p->rxaio, rv); + nni_mtx_unlock(&p->mtx); + return; + } + nni_aio_list_remove(aio); + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); +} + +static void +tlstran_pipe_recv_start(tlstran_pipe *p) +{ + nni_aio *aio; + nni_iov iov; + NNI_ASSERT(p->rxmsg == NULL); + + // Schedule a read of the IPC header. + aio = p->rxaio; + iov.iov_buf = p->rxlen; + iov.iov_len = sizeof(p->rxlen); + nni_aio_set_iov(aio, 1, &iov); + + nng_stream_recv(p->tls, aio); +} + +static void +tlstran_pipe_recv(void *arg, nni_aio *aio) +{ + tlstran_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, tlstran_pipe_recv_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + + nni_aio_list_append(&p->recvq, aio); + if (nni_list_first(&p->recvq) == aio) { + tlstran_pipe_recv_start(p); + } + nni_mtx_unlock(&p->mtx); +} + +static uint16_t +tlstran_pipe_peer(void *arg) +{ + tlstran_pipe *p = arg; + + return (p->peer); +} + +static void +tlstran_pipe_start(tlstran_pipe *p, nng_stream *conn, tlstran_ep *ep) +{ + nni_iov iov; + + ep->refcnt++; + + p->tls = conn; + p->ep = ep; + p->proto = ep->proto; + + p->txlen[0] = 0; + p->txlen[1] = 'S'; + p->txlen[2] = 'P'; + p->txlen[3] = 0; + NNI_PUT16(&p->txlen[4], p->proto); + NNI_PUT16(&p->txlen[6], 0); + + p->gotrxhead = 0; + p->gottxhead = 0; + p->wantrxhead = 8; + p->wanttxhead = 8; + iov.iov_len = 8; + iov.iov_buf = &p->txlen[0]; + nni_aio_set_iov(p->negoaio, 1, &iov); + nni_list_append(&ep->negopipes, p); + + nni_aio_set_timeout(p->negoaio, 10000); // 10 sec timeout to negotiate + nng_stream_send(p->tls, p->negoaio); +} + +static void +tlstran_ep_fini(void *arg) +{ + tlstran_ep *ep = arg; + + nni_mtx_lock(&ep->mtx); + ep->fini = true; + if (ep->refcnt != 0) { + nni_mtx_unlock(&ep->mtx); + return; + } + nni_mtx_unlock(&ep->mtx); + nni_aio_stop(ep->timeaio); + nni_aio_stop(ep->connaio); + nng_stream_dialer_free(ep->dialer); + nng_stream_listener_free(ep->listener); + nni_aio_free(ep->timeaio); + nni_aio_free(ep->connaio); + + nni_mtx_fini(&ep->mtx); + NNI_FREE_STRUCT(ep); +} + +static void +tlstran_ep_close(void *arg) +{ + tlstran_ep * ep = arg; + tlstran_pipe *p; + + nni_mtx_lock(&ep->mtx); + ep->closed = true; + nni_aio_close(ep->timeaio); + + if (ep->dialer != NULL) { + nng_stream_dialer_close(ep->dialer); + } + if (ep->listener != NULL) { + nng_stream_listener_close(ep->listener); + } + NNI_LIST_FOREACH (&ep->negopipes, p) { + tlstran_pipe_close(p); + } + NNI_LIST_FOREACH (&ep->waitpipes, p) { + tlstran_pipe_close(p); + } + NNI_LIST_FOREACH (&ep->busypipes, p) { + tlstran_pipe_close(p); + } + if (ep->useraio != NULL) { + nni_aio_finish_error(ep->useraio, NNG_ECLOSED); + ep->useraio = NULL; + } + nni_mtx_unlock(&ep->mtx); +} + +// This parses off the optional source address that this transport uses. +// The special handling of this URL format is quite honestly an historical +// mistake, which we would remove if we could. +static int +tlstran_url_parse_source(nni_url *url, nng_sockaddr *sa, const nni_url *surl) +{ + int af; + char * semi; + char * src; + size_t len; + int rv; + nni_aio *aio; + + // We modify the URL. This relies on the fact that the underlying + // transport does not free this, so we can just use references. + + url->u_scheme = surl->u_scheme; + url->u_port = surl->u_port; + url->u_hostname = surl->u_hostname; + + if ((semi = strchr(url->u_hostname, ';')) == NULL) { + memset(sa, 0, sizeof(*sa)); + return (0); + } + + len = (size_t)(semi - url->u_hostname); + url->u_hostname = semi + 1; + + if (strcmp(surl->u_scheme, "tls+tcp") == 0) { + af = NNG_AF_UNSPEC; + } else if (strcmp(surl->u_scheme, "tls+tcp4") == 0) { + af = NNG_AF_INET; + } else if (strcmp(surl->u_scheme, "tls+tcp6") == 0) { + af = NNG_AF_INET6; + } else { + return (NNG_EADDRINVAL); + } + + if ((src = nni_alloc(len + 1)) == NULL) { + return (NNG_ENOMEM); + } + memcpy(src, surl->u_hostname, len); + src[len] = '\0'; + + if ((rv = nni_aio_alloc(&aio, NULL, NULL)) != 0) { + nni_free(src, len + 1); + return (rv); + } + + nni_resolv_ip(src, "0", af, 1, sa, aio); + nni_aio_wait(aio); + nni_aio_free(aio); + nni_free(src, len + 1); + return (rv); +} + +static void +tlstran_timer_cb(void *arg) +{ + tlstran_ep *ep = arg; + if (nni_aio_result(ep->timeaio) == 0) { + nng_stream_listener_accept(ep->listener, ep->connaio); + } +} + +static void +tlstran_accept_cb(void *arg) +{ + tlstran_ep * ep = arg; + nni_aio * aio = ep->connaio; + tlstran_pipe *p; + int rv; + nng_stream * conn; + + nni_mtx_lock(&ep->mtx); + + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + conn = nni_aio_get_output(aio, 0); + if ((rv = tlstran_pipe_alloc(&p)) != 0) { + nng_stream_free(conn); + goto error; + } + + if (ep->closed) { + tlstran_pipe_fini(p); + nng_stream_free(conn); + rv = NNG_ECLOSED; + goto error; + } + tlstran_pipe_start(p, conn, ep); + nng_stream_listener_accept(ep->listener, ep->connaio); + nni_mtx_unlock(&ep->mtx); + return; + +error: + // When an error here occurs, let's send a notice up to the consumer. + // That way it can be reported properly. + if ((aio = ep->useraio) != NULL) { + ep->useraio = NULL; + nni_aio_finish_error(aio, rv); + } + switch (rv) { + + case NNG_ENOMEM: + case NNG_ENOFILES: + // We need to cool down here, to avoid spinning. + nng_sleep_aio(10, ep->timeaio); + break; + + default: + // Start another accept. This is done because we want to + // ensure that TLS negotiations are disconnected from + // the upper layer accept logic. + if (!ep->closed) { + nng_stream_listener_accept(ep->listener, ep->connaio); + } + break; + } + nni_mtx_unlock(&ep->mtx); +} + +static void +tlstran_dial_cb(void *arg) +{ + tlstran_ep * ep = arg; + nni_aio * aio = ep->connaio; + tlstran_pipe *p; + int rv; + nng_stream * conn; + + if ((rv = nni_aio_result(aio)) != 0) { + goto error; + } + + conn = nni_aio_get_output(aio, 0); + if ((rv = tlstran_pipe_alloc(&p)) != 0) { + nng_stream_free(conn); + goto error; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + tlstran_pipe_fini(p); + nng_stream_free(conn); + rv = NNG_ECLOSED; + nni_mtx_unlock(&ep->mtx); + goto error; + } else { + tlstran_pipe_start(p, conn, ep); + } + nni_mtx_unlock(&ep->mtx); + return; + +error: + // Error connecting. We need to pass this straight back to the user. + nni_mtx_lock(&ep->mtx); + if ((aio = ep->useraio) != NULL) { + ep->useraio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&ep->mtx); +} + +static int +tlstran_ep_init(tlstran_ep **epp, nng_url *url, nni_sock *sock) +{ + tlstran_ep *ep; + + if ((ep = NNI_ALLOC_STRUCT(ep)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&ep->mtx); + NNI_LIST_INIT(&ep->busypipes, tlstran_pipe, node); + NNI_LIST_INIT(&ep->waitpipes, tlstran_pipe, node); + NNI_LIST_INIT(&ep->negopipes, tlstran_pipe, node); + + ep->proto = nni_sock_proto_id(sock); + ep->url = url; + +#ifdef NNG_ENABLE_STATS + static const nni_stat_info rcv_max_info = { + .si_name = "rcv_max", + .si_desc = "maximum receive size", + .si_type = NNG_STAT_LEVEL, + .si_unit = NNG_UNIT_BYTES, + .si_atomic = true, + }; + nni_stat_init(&ep->st_rcv_max, &rcv_max_info); +#endif + + *epp = ep; + return (0); +} + +static int +tlstran_ep_init_dialer(void **dp, nni_url *url, nni_dialer *ndialer) +{ + tlstran_ep * ep; + int rv; + nng_sockaddr srcsa; + nni_sock * sock = nni_dialer_sock(ndialer); + nni_url myurl; + + // Check for invalid URL components. + if ((strlen(url->u_path) != 0) && (strcmp(url->u_path, "/") != 0)) { + return (NNG_EADDRINVAL); + } + if ((url->u_fragment != NULL) || (url->u_userinfo != NULL) || + (url->u_query != NULL) || (strlen(url->u_hostname) == 0) || + (strlen(url->u_port) == 0)) { + return (NNG_EADDRINVAL); + } + + if ((rv = tlstran_url_parse_source(&myurl, &srcsa, url)) != 0) { + return (rv); + } + + if (((rv = tlstran_ep_init(&ep, url, sock)) != 0) || + ((rv = nni_aio_alloc(&ep->connaio, tlstran_dial_cb, ep)) != 0)) { + return (rv); + } + ep->authmode = NNG_TLS_AUTH_MODE_REQUIRED; + + if ((rv != 0) || + ((rv = nng_stream_dialer_alloc_url(&ep->dialer, &myurl)) != 0)) { + tlstran_ep_fini(ep); + return (rv); + } + if ((srcsa.s_family != NNG_AF_UNSPEC) && + ((rv = nni_stream_dialer_set(ep->dialer, NNG_OPT_LOCADDR, &srcsa, + sizeof(srcsa), NNI_TYPE_SOCKADDR)) != 0)) { + tlstran_ep_fini(ep); + return (rv); + } +#ifdef NNG_ENABLE_STATS + nni_dialer_add_stat(ndialer, &ep->st_rcv_max); +#endif + *dp = ep; + return (0); +} + +static int +tlstran_ep_init_listener(void **lp, nni_url *url, nni_listener *nlistener) +{ + tlstran_ep *ep; + int rv; + uint16_t af; + char * host = url->u_hostname; + nni_aio * aio; + nni_sock * sock = nni_listener_sock(nlistener); + + if (strcmp(url->u_scheme, "tls+tcp") == 0) { + af = NNG_AF_UNSPEC; + } else if (strcmp(url->u_scheme, "tls+tcp4") == 0) { + af = NNG_AF_INET; + } else if (strcmp(url->u_scheme, "tls+tcp6") == 0) { + af = NNG_AF_INET6; + } else { + return (NNG_EADDRINVAL); + } + + // Check for invalid URL components. + if ((strlen(url->u_path) != 0) && (strcmp(url->u_path, "/") != 0)) { + return (NNG_EADDRINVAL); + } + if ((url->u_fragment != NULL) || (url->u_userinfo != NULL) || + (url->u_query != NULL)) { + return (NNG_EADDRINVAL); + } + if (((rv = tlstran_ep_init(&ep, url, sock)) != 0) || + ((rv = nni_aio_alloc(&ep->connaio, tlstran_accept_cb, ep)) != 0) || + ((rv = nni_aio_alloc(&ep->timeaio, tlstran_timer_cb, ep)) != 0)) { + return (rv); + } + + ep->authmode = NNG_TLS_AUTH_MODE_NONE; + + if (strlen(host) == 0) { + host = NULL; + } + + // XXX: We are doing lookup at listener initialization. There is + // a valid argument that this should be done at bind time, but that + // would require making bind asynchronous. In some ways this would + // be worse than the cost of just waiting here. We always recommend + // using local IP addresses rather than names when possible. + + if ((rv = nni_aio_alloc(&aio, NULL, NULL)) != 0) { + tlstran_ep_fini(ep); + return (rv); + } + nni_resolv_ip(host, url->u_port, af, true, &ep->sa, aio); + nni_aio_wait(aio); + rv = nni_aio_result(aio); + nni_aio_free(aio); + + if ((rv != 0) || + ((rv = nng_stream_listener_alloc_url(&ep->listener, url)) != 0) || + ((rv = nni_stream_listener_set(ep->listener, NNG_OPT_TLS_AUTH_MODE, + &ep->authmode, sizeof(ep->authmode), NNI_TYPE_INT32)) != + 0)) { + tlstran_ep_fini(ep); + return (rv); + } +#ifdef NNG_ENABLE_STATS + nni_listener_add_stat(nlistener, &ep->st_rcv_max); +#endif + *lp = ep; + return (0); +} + +static void +tlstran_ep_cancel(nni_aio *aio, void *arg, int rv) +{ + tlstran_ep *ep = arg; + nni_mtx_lock(&ep->mtx); + if (ep->useraio == aio) { + ep->useraio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&ep->mtx); +} + +static void +tlstran_ep_connect(void *arg, nni_aio *aio) +{ + tlstran_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if (ep->useraio != NULL) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_EBUSY); + return; + } + if ((rv = nni_aio_schedule(aio, tlstran_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, rv); + return; + } + ep->useraio = aio; + + nng_stream_dialer_dial(ep->dialer, ep->connaio); + nni_mtx_unlock(&ep->mtx); +} + +static int +tlstran_ep_bind(void *arg) +{ + tlstran_ep *ep = arg; + int rv; + + nni_mtx_lock(&ep->mtx); + rv = nng_stream_listener_listen(ep->listener); + nni_mtx_unlock(&ep->mtx); + + return (rv); +} + +static void +tlstran_ep_accept(void *arg, nni_aio *aio) +{ + tlstran_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&ep->mtx); + if (ep->closed) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if (ep->useraio != NULL) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, NNG_EBUSY); + return; + } + if ((rv = nni_aio_schedule(aio, tlstran_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&ep->mtx); + nni_aio_finish_error(aio, rv); + return; + } + ep->useraio = aio; + if (!ep->started) { + ep->started = true; + nng_stream_listener_accept(ep->listener, ep->connaio); + } else { + tlstran_ep_match(ep); + } + nni_mtx_unlock(&ep->mtx); +} + +static int +tlstran_ep_set_recvmaxsz(void *arg, const void *v, size_t sz, nni_type t) +{ + tlstran_ep *ep = arg; + size_t val; + int rv; + if ((rv = nni_copyin_size(&val, v, sz, 0, NNI_MAXSZ, t)) == 0) { + tlstran_pipe *p; + nni_mtx_lock(&ep->mtx); + ep->rcvmax = val; + NNI_LIST_FOREACH (&ep->waitpipes, p) { + p->rcvmax = val; + } + NNI_LIST_FOREACH (&ep->negopipes, p) { + p->rcvmax = val; + } + NNI_LIST_FOREACH (&ep->busypipes, p) { + p->rcvmax = val; + } + nni_mtx_unlock(&ep->mtx); +#ifdef NNG_ENABLE_STATS + nni_stat_set_value(&ep->st_rcv_max, val); +#endif + } + return (rv); +} + +static int +tlstran_ep_get_recvmaxsz(void *arg, void *v, size_t *szp, nni_type t) +{ + tlstran_ep *ep = arg; + int rv; + nni_mtx_lock(&ep->mtx); + rv = nni_copyout_size(ep->rcvmax, v, szp, t); + nni_mtx_unlock(&ep->mtx); + return (rv); +} + +static int +tlstran_ep_get_url(void *arg, void *v, size_t *szp, nni_type t) +{ + tlstran_ep *ep = arg; + char * s; + int rv; + int port = 0; + + if (ep->listener != NULL) { + (void) nng_stream_listener_get_int( + ep->listener, NNG_OPT_TCP_BOUND_PORT, &port); + } + if ((rv = nni_url_asprintf_port(&s, ep->url, port)) == 0) { + rv = nni_copyout_str(s, v, szp, t); + nni_strfree(s); + } + return (rv); +} + +static const nni_option tlstran_pipe_opts[] = { + // terminate list + { + .o_name = NULL, + }, +}; + +static int +tlstran_pipe_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + tlstran_pipe *p = arg; + int rv; + + if ((rv = nni_stream_get(p->tls, name, buf, szp, t)) == NNG_ENOTSUP) { + rv = nni_getopt(tlstran_pipe_opts, name, p, buf, szp, t); + } + return (rv); +} + +static nni_tran_pipe_ops tlstran_pipe_ops = { + .p_init = tlstran_pipe_init, + .p_fini = tlstran_pipe_fini, + .p_stop = tlstran_pipe_stop, + .p_send = tlstran_pipe_send, + .p_recv = tlstran_pipe_recv, + .p_close = tlstran_pipe_close, + .p_peer = tlstran_pipe_peer, + .p_getopt = tlstran_pipe_getopt, +}; + +static nni_option tlstran_ep_options[] = { + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = tlstran_ep_get_recvmaxsz, + .o_set = tlstran_ep_set_recvmaxsz, + }, + { + .o_name = NNG_OPT_URL, + .o_get = tlstran_ep_get_url, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static int +tlstran_dialer_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + int rv; + tlstran_ep *ep = arg; + + rv = nni_stream_dialer_get(ep->dialer, name, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_getopt(tlstran_ep_options, name, ep, buf, szp, t); + } + return (rv); +} + +static int +tlstran_dialer_setopt( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + int rv; + tlstran_ep *ep = arg; + + rv = nni_stream_dialer_set( + ep != NULL ? ep->dialer : NULL, name, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_setopt(tlstran_ep_options, name, ep, buf, sz, t); + } + return (rv); +} + +static int +tlstran_listener_get( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + int rv; + tlstran_ep *ep = arg; + + rv = nni_stream_listener_get(ep->listener, name, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_getopt(tlstran_ep_options, name, ep, buf, szp, t); + } + return (rv); +} + +static int +tlstran_listener_set( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + int rv; + tlstran_ep *ep = arg; + + rv = nni_stream_listener_set( + ep != NULL ? ep->listener : NULL, name, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_setopt(tlstran_ep_options, name, ep, buf, sz, t); + } + return (rv); +} + +static nni_tran_dialer_ops tlstran_dialer_ops = { + .d_init = tlstran_ep_init_dialer, + .d_fini = tlstran_ep_fini, + .d_connect = tlstran_ep_connect, + .d_close = tlstran_ep_close, + .d_getopt = tlstran_dialer_getopt, + .d_setopt = tlstran_dialer_setopt, +}; + +static nni_tran_listener_ops tlstran_listener_ops = { + .l_init = tlstran_ep_init_listener, + .l_fini = tlstran_ep_fini, + .l_bind = tlstran_ep_bind, + .l_accept = tlstran_ep_accept, + .l_close = tlstran_ep_close, + .l_getopt = tlstran_listener_get, + .l_setopt = tlstran_listener_set, +}; + +static nni_tran tls_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "tls+tcp", + .tran_dialer = &tlstran_dialer_ops, + .tran_listener = &tlstran_listener_ops, + .tran_pipe = &tlstran_pipe_ops, + .tran_init = tlstran_init, + .tran_fini = tlstran_fini, +}; + +static nni_tran tls4_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "tls+tcp4", + .tran_dialer = &tlstran_dialer_ops, + .tran_listener = &tlstran_listener_ops, + .tran_pipe = &tlstran_pipe_ops, + .tran_init = tlstran_init, + .tran_fini = tlstran_fini, +}; + +static nni_tran tls6_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "tls+tcp6", + .tran_dialer = &tlstran_dialer_ops, + .tran_listener = &tlstran_listener_ops, + .tran_pipe = &tlstran_pipe_ops, + .tran_init = tlstran_init, + .tran_fini = tlstran_fini, +}; + +int +nng_tls_register(void) +{ + int rv; + if (((rv = nni_tran_register(&tls_tran)) != 0) || + ((rv = nni_tran_register(&tls4_tran)) != 0) || + ((rv = nni_tran_register(&tls6_tran)) != 0)) { + return (rv); + } + return (0); +} diff --git a/src/sp/transport/ws/CMakeLists.txt b/src/sp/transport/ws/CMakeLists.txt new file mode 100644 index 00000000..6e409b43 --- /dev/null +++ b/src/sp/transport/ws/CMakeLists.txt @@ -0,0 +1,24 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +# Copyright 2018 Capitar IT Group BV <info@capitar.com> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# WebSocket transport +nng_directory(ws) + +if (NNG_TRANSPORT_WS OR NNG_TRANSPORT_WSS) + set(WS_ON ON) +endif() + +nng_defines_if(NNG_TRANSPORT_WS NNG_TRANSPORT_WS) +nng_defines_if(NNG_TRANSPORT_WSS NNG_TRANSPORT_WSS) +nng_sources_if(WS_ON websocket.c) +nng_headers_if(WS_ON nng/transport/ws/websocket.h) +nng_test_if(WS_ON ws_test) + + diff --git a/src/sp/transport/ws/README.adoc b/src/sp/transport/ws/README.adoc new file mode 100644 index 00000000..e3101297 --- /dev/null +++ b/src/sp/transport/ws/README.adoc @@ -0,0 +1,38 @@ += websocket transport + +This transport provides support for SP over websocket using TCP or TLS. +When using TCP, it is compatible with the libnanomsg legacy transport. +It also is compatible with mangos (both TCP and TLS). + +TLS support requires the mbedTLS library. + +We set the "protocol" such as "pair.sp.nanomsg.org" in the +Sec-WebSocket-Protocol field -- the client sets to the the server's +protocol - i.e. the protocol that the server speaks. For example, +if the the server is a REP, then a REQ client would send "rep.sp.nanomsg.org". + +The server sends the same value (it's own), per the WebSocket specs. (Note +that the client's protocol is never sent, but assumed to be complementary +to the protocol in the Sec-WebSocket-Protocol field.) + +Each SP message is a WebSocket message. + +WebSocket is defined in RFC 6455. + +== Design + +We unfortunately need to implement our own design for this -- the only +reasonable client library would be libcurl, and there is a dearth of +suitable server libraries. Since we don't have to support full HTTP, but +just the initial handshake, this isn't too tragic. + +== Multiple Server Sockets + +In order to support Multiple Server sockets listening on the same port, +the application must be long lived. We will set up a listener on the +configured TCP (or TLS) port, and examine the PATH supplied in the GET. +This will be used to match against the URL requested, and if the URL +matches we will create the appropriate pipe. + +If no server endpoint at that address can be found, we return an +HTTP error, and close the socket. diff --git a/src/sp/transport/ws/websocket.c b/src/sp/transport/ws/websocket.c new file mode 100644 index 00000000..3f73f47f --- /dev/null +++ b/src/sp/transport/ws/websocket.c @@ -0,0 +1,740 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// Copyright 2019 Devolutions <info@devolutions.net> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <stdbool.h> +#include <stdio.h> +#include <string.h> + +#include "core/nng_impl.h" +#include "supplemental/websocket/websocket.h" + +#include <nng/supplemental/tls/tls.h> +#include <nng/transport/ws/websocket.h> + +typedef struct ws_dialer ws_dialer; +typedef struct ws_listener ws_listener; +typedef struct ws_pipe ws_pipe; + +struct ws_dialer { + uint16_t peer; // remote protocol + nni_list aios; + nni_mtx mtx; + nni_aio * connaio; + nng_stream_dialer *dialer; + bool started; +}; + +struct ws_listener { + uint16_t peer; // remote protocol + nni_list aios; + nni_mtx mtx; + nni_aio * accaio; + nng_stream_listener *listener; + bool started; +}; + +struct ws_pipe { + nni_mtx mtx; + bool closed; + uint16_t peer; + nni_aio * user_txaio; + nni_aio * user_rxaio; + nni_aio * txaio; + nni_aio * rxaio; + nng_stream *ws; +}; + +static void +wstran_pipe_send_cb(void *arg) +{ + ws_pipe *p = arg; + nni_aio *taio; + nni_aio *uaio; + + nni_mtx_lock(&p->mtx); + taio = p->txaio; + uaio = p->user_txaio; + p->user_txaio = NULL; + + if (uaio != NULL) { + int rv; + if ((rv = nni_aio_result(taio)) != 0) { + nni_aio_finish_error(uaio, rv); + } else { + nni_aio_finish(uaio, 0, 0); + } + } + nni_mtx_unlock(&p->mtx); +} + +static void +wstran_pipe_recv_cb(void *arg) +{ + ws_pipe *p = arg; + nni_aio *raio = p->rxaio; + nni_aio *uaio; + int rv; + + nni_mtx_lock(&p->mtx); + uaio = p->user_rxaio; + p->user_rxaio = NULL; + if ((rv = nni_aio_result(raio)) != 0) { + if (uaio != NULL) { + nni_aio_finish_error(uaio, rv); + } + } else { + nni_msg *msg = nni_aio_get_msg(raio); + if (uaio != NULL) { + nni_aio_finish_msg(uaio, msg); + } else { + nni_msg_free(msg); + } + } + nni_mtx_unlock(&p->mtx); +} + +static void +wstran_pipe_recv_cancel(nni_aio *aio, void *arg, int rv) +{ + ws_pipe *p = arg; + nni_mtx_lock(&p->mtx); + if (p->user_rxaio != aio) { + nni_mtx_unlock(&p->mtx); + return; + } + p->user_rxaio = NULL; + nni_aio_abort(p->rxaio, rv); + nni_aio_finish_error(aio, rv); + nni_mtx_unlock(&p->mtx); +} + +static void +wstran_pipe_recv(void *arg, nni_aio *aio) +{ + ws_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, wstran_pipe_recv_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + p->user_rxaio = aio; + nng_stream_recv(p->ws, p->rxaio); + nni_mtx_unlock(&p->mtx); +} + +static void +wstran_pipe_send_cancel(nni_aio *aio, void *arg, int rv) +{ + ws_pipe *p = arg; + nni_mtx_lock(&p->mtx); + if (p->user_txaio != aio) { + nni_mtx_unlock(&p->mtx); + return; + } + p->user_txaio = NULL; + nni_aio_abort(p->txaio, rv); + nni_aio_finish_error(aio, rv); + nni_mtx_unlock(&p->mtx); +} + +static void +wstran_pipe_send(void *arg, nni_aio *aio) +{ + ws_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&p->mtx); + if ((rv = nni_aio_schedule(aio, wstran_pipe_send_cancel, p)) != 0) { + nni_mtx_unlock(&p->mtx); + nni_aio_finish_error(aio, rv); + return; + } + p->user_txaio = aio; + nni_aio_set_msg(p->txaio, nni_aio_get_msg(aio)); + nni_aio_set_msg(aio, NULL); + + nng_stream_send(p->ws, p->txaio); + nni_mtx_unlock(&p->mtx); +} + +static void +wstran_pipe_stop(void *arg) +{ + ws_pipe *p = arg; + + nni_aio_stop(p->rxaio); + nni_aio_stop(p->txaio); +} + +static int +wstran_pipe_init(void *arg, nni_pipe *pipe) +{ + NNI_ARG_UNUSED(arg); + NNI_ARG_UNUSED(pipe); + return (0); +} + +static void +wstran_pipe_fini(void *arg) +{ + ws_pipe *p = arg; + + nni_aio_free(p->rxaio); + nni_aio_free(p->txaio); + + nng_stream_free(p->ws); + nni_mtx_fini(&p->mtx); + NNI_FREE_STRUCT(p); +} + +static void +wstran_pipe_close(void *arg) +{ + ws_pipe *p = arg; + + nni_aio_close(p->rxaio); + nni_aio_close(p->txaio); + + nni_mtx_lock(&p->mtx); + nng_stream_close(p->ws); + nni_mtx_unlock(&p->mtx); +} + +static int +wstran_pipe_alloc(ws_pipe **pipep, void *ws) +{ + ws_pipe *p; + int rv; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&p->mtx); + + // Initialize AIOs. + if (((rv = nni_aio_alloc(&p->txaio, wstran_pipe_send_cb, p)) != 0) || + ((rv = nni_aio_alloc(&p->rxaio, wstran_pipe_recv_cb, p)) != 0)) { + wstran_pipe_fini(p); + return (rv); + } + p->ws = ws; + + *pipep = p; + return (0); +} + +static uint16_t +wstran_pipe_peer(void *arg) +{ + ws_pipe *p = arg; + + return (p->peer); +} + +static int +ws_listener_bind(void *arg) +{ + ws_listener *l = arg; + int rv; + + if ((rv = nng_stream_listener_listen(l->listener)) == 0) { + l->started = true; + } + return (rv); +} + +static void +ws_listener_cancel(nni_aio *aio, void *arg, int rv) +{ + ws_listener *l = arg; + + nni_mtx_lock(&l->mtx); + if (nni_aio_list_active(aio)) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&l->mtx); +} + +static void +wstran_listener_accept(void *arg, nni_aio *aio) +{ + ws_listener *l = arg; + int rv; + + // We already bound, so we just need to look for an available + // pipe (created by the handler), and match it. + // Otherwise we stick the AIO in the accept list. + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&l->mtx); + if ((rv = nni_aio_schedule(aio, ws_listener_cancel, l)) != 0) { + nni_mtx_unlock(&l->mtx); + nni_aio_finish_error(aio, rv); + return; + } + nni_list_append(&l->aios, aio); + if (aio == nni_list_first(&l->aios)) { + nng_stream_listener_accept(l->listener, l->accaio); + } + nni_mtx_unlock(&l->mtx); +} + +static void +wstran_dialer_cancel(nni_aio *aio, void *arg, int rv) +{ + ws_dialer *d = arg; + + nni_mtx_lock(&d->mtx); + if (nni_aio_list_active(aio)) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&d->mtx); +} + +static void +wstran_dialer_connect(void *arg, nni_aio *aio) +{ + ws_dialer *d = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + + nni_mtx_lock(&d->mtx); + if ((rv = nni_aio_schedule(aio, wstran_dialer_cancel, d)) != 0) { + nni_mtx_unlock(&d->mtx); + nni_aio_finish_error(aio, rv); + return; + } + NNI_ASSERT(nni_list_empty(&d->aios)); + d->started = true; + nni_list_append(&d->aios, aio); + nng_stream_dialer_dial(d->dialer, d->connaio); + nni_mtx_unlock(&d->mtx); +} + +static const nni_option ws_pipe_options[] = { + // terminate list + { + .o_name = NULL, + } +}; + +static int +wstran_pipe_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + ws_pipe *p = arg; + int rv; + + if ((rv = nni_stream_get(p->ws, name, buf, szp, t)) == NNG_ENOTSUP) { + rv = nni_getopt(ws_pipe_options, name, p, buf, szp, t); + } + return (rv); +} + +static nni_tran_pipe_ops ws_pipe_ops = { + .p_init = wstran_pipe_init, + .p_fini = wstran_pipe_fini, + .p_stop = wstran_pipe_stop, + .p_send = wstran_pipe_send, + .p_recv = wstran_pipe_recv, + .p_close = wstran_pipe_close, + .p_peer = wstran_pipe_peer, + .p_getopt = wstran_pipe_getopt, +}; + +static void +wstran_dialer_fini(void *arg) +{ + ws_dialer *d = arg; + + nni_aio_stop(d->connaio); + nng_stream_dialer_free(d->dialer); + nni_aio_free(d->connaio); + nni_mtx_fini(&d->mtx); + NNI_FREE_STRUCT(d); +} + +static void +wstran_listener_fini(void *arg) +{ + ws_listener *l = arg; + + nni_aio_stop(l->accaio); + nng_stream_listener_free(l->listener); + nni_aio_free(l->accaio); + nni_mtx_fini(&l->mtx); + NNI_FREE_STRUCT(l); +} + +static void +wstran_connect_cb(void *arg) +{ + ws_dialer * d = arg; + ws_pipe * p; + nni_aio * caio = d->connaio; + nni_aio * uaio; + int rv; + nng_stream *ws = NULL; + + nni_mtx_lock(&d->mtx); + if (nni_aio_result(caio) == 0) { + ws = nni_aio_get_output(caio, 0); + } + if ((uaio = nni_list_first(&d->aios)) == NULL) { + // The client stopped caring about this! + nng_stream_free(ws); + nni_mtx_unlock(&d->mtx); + return; + } + nni_aio_list_remove(uaio); + NNI_ASSERT(nni_list_empty(&d->aios)); + if ((rv = nni_aio_result(caio)) != 0) { + nni_aio_finish_error(uaio, rv); + } else if ((rv = wstran_pipe_alloc(&p, ws)) != 0) { + nng_stream_free(ws); + nni_aio_finish_error(uaio, rv); + } else { + p->peer = d->peer; + + nni_aio_set_output(uaio, 0, p); + nni_aio_finish(uaio, 0, 0); + } + nni_mtx_unlock(&d->mtx); +} + +static void +wstran_dialer_close(void *arg) +{ + ws_dialer *d = arg; + + nni_aio_close(d->connaio); + nng_stream_dialer_close(d->dialer); +} + +static void +wstran_listener_close(void *arg) +{ + ws_listener *l = arg; + + nni_aio_close(l->accaio); + nng_stream_listener_close(l->listener); +} + +static void +wstran_accept_cb(void *arg) +{ + ws_listener *l = arg; + nni_aio * aaio = l->accaio; + nni_aio * uaio; + int rv; + + nni_mtx_lock(&l->mtx); + uaio = nni_list_first(&l->aios); + if ((rv = nni_aio_result(aaio)) != 0) { + if (uaio != NULL) { + nni_aio_list_remove(uaio); + nni_aio_finish_error(uaio, rv); + } + } else { + nng_stream *ws = nni_aio_get_output(aaio, 0); + if (uaio != NULL) { + ws_pipe *p; + // Make a pipe + nni_aio_list_remove(uaio); + if ((rv = wstran_pipe_alloc(&p, ws)) != 0) { + nng_stream_close(ws); + nni_aio_finish_error(uaio, rv); + } else { + p->peer = l->peer; + + nni_aio_set_output(uaio, 0, p); + nni_aio_finish(uaio, 0, 0); + } + } + } + if (!nni_list_empty(&l->aios)) { + nng_stream_listener_accept(l->listener, aaio); + } + nni_mtx_unlock(&l->mtx); +} + +static int +wstran_dialer_init(void **dp, nng_url *url, nni_dialer *ndialer) +{ + ws_dialer *d; + nni_sock * s = nni_dialer_sock(ndialer); + int rv; + char name[64]; + + if ((d = NNI_ALLOC_STRUCT(d)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&d->mtx); + + nni_aio_list_init(&d->aios); + + d->peer = nni_sock_peer_id(s); + + snprintf( + name, sizeof(name), "%s.sp.nanomsg.org", nni_sock_peer_name(s)); + + if (((rv = nni_ws_dialer_alloc(&d->dialer, url)) != 0) || + ((rv = nni_aio_alloc(&d->connaio, wstran_connect_cb, d)) != 0) || + ((rv = nng_stream_dialer_set_bool( + d->dialer, NNI_OPT_WS_MSGMODE, true)) != 0) || + ((rv = nng_stream_dialer_set_string( + d->dialer, NNG_OPT_WS_PROTOCOL, name)) != 0)) { + wstran_dialer_fini(d); + return (rv); + } + + *dp = d; + return (0); +} + +static int +wstran_listener_init(void **lp, nng_url *url, nni_listener *listener) +{ + ws_listener *l; + int rv; + nni_sock * s = nni_listener_sock(listener); + char name[64]; + + if ((l = NNI_ALLOC_STRUCT(l)) == NULL) { + return (NNG_ENOMEM); + } + nni_mtx_init(&l->mtx); + + nni_aio_list_init(&l->aios); + + l->peer = nni_sock_peer_id(s); + + snprintf( + name, sizeof(name), "%s.sp.nanomsg.org", nni_sock_proto_name(s)); + + if (((rv = nni_ws_listener_alloc(&l->listener, url)) != 0) || + ((rv = nni_aio_alloc(&l->accaio, wstran_accept_cb, l)) != 0) || + ((rv = nng_stream_listener_set_bool( + l->listener, NNI_OPT_WS_MSGMODE, true)) != 0) || + ((rv = nng_stream_listener_set_string( + l->listener, NNG_OPT_WS_PROTOCOL, name)) != 0)) { + wstran_listener_fini(l); + return (rv); + } + *lp = l; + return (0); +} + +static int +wstran_init(void) +{ + return (0); +} + +static void +wstran_fini(void) +{ +} + +static const nni_option wstran_ep_opts[] = { + // terminate list + { + .o_name = NULL, + }, +}; + +static int +wstran_dialer_getopt( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + ws_dialer *d = arg; + int rv; + + rv = nni_stream_dialer_get(d->dialer, name, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_getopt(wstran_ep_opts, name, d, buf, szp, t); + } + return (rv); +} + +static int +wstran_dialer_setopt( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + ws_dialer *d = arg; + int rv; + + rv = nni_stream_dialer_set(d->dialer, name, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_setopt(wstran_ep_opts, name, d, buf, sz, t); + } + return (rv); +} + +static int +wstran_listener_get( + void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + ws_listener *l = arg; + int rv; + + rv = nni_stream_listener_get(l->listener, name, buf, szp, t); + if (rv == NNG_ENOTSUP) { + rv = nni_getopt(wstran_ep_opts, name, l, buf, szp, t); + } + return (rv); +} + +static int +wstran_listener_set( + void *arg, const char *name, const void *buf, size_t sz, nni_type t) +{ + ws_listener *l = arg; + int rv; + + rv = nni_stream_listener_set(l->listener, name, buf, sz, t); + if (rv == NNG_ENOTSUP) { + rv = nni_setopt(wstran_ep_opts, name, l, buf, sz, t); + } + return (rv); +} + +static nni_tran_dialer_ops ws_dialer_ops = { + .d_init = wstran_dialer_init, + .d_fini = wstran_dialer_fini, + .d_connect = wstran_dialer_connect, + .d_close = wstran_dialer_close, + .d_setopt = wstran_dialer_setopt, + .d_getopt = wstran_dialer_getopt, +}; + +static nni_tran_listener_ops ws_listener_ops = { + .l_init = wstran_listener_init, + .l_fini = wstran_listener_fini, + .l_bind = ws_listener_bind, + .l_accept = wstran_listener_accept, + .l_close = wstran_listener_close, + .l_setopt = wstran_listener_set, + .l_getopt = wstran_listener_get, +}; + +static nni_tran ws_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "ws", + .tran_dialer = &ws_dialer_ops, + .tran_listener = &ws_listener_ops, + .tran_pipe = &ws_pipe_ops, + .tran_init = wstran_init, + .tran_fini = wstran_fini, +}; + +static nni_tran ws4_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "ws4", + .tran_dialer = &ws_dialer_ops, + .tran_listener = &ws_listener_ops, + .tran_pipe = &ws_pipe_ops, + .tran_init = wstran_init, + .tran_fini = wstran_fini, +}; + +static nni_tran ws6_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "ws6", + .tran_dialer = &ws_dialer_ops, + .tran_listener = &ws_listener_ops, + .tran_pipe = &ws_pipe_ops, + .tran_init = wstran_init, + .tran_fini = wstran_fini, +}; + +int +nng_ws_register(void) +{ + int rv; + if (((rv = nni_tran_register(&ws_tran)) != 0) || + ((rv = nni_tran_register(&ws4_tran)) != 0) || + ((rv = nni_tran_register(&ws6_tran)) != 0)) { + return (rv); + } + + return (0); +} + +#ifdef NNG_TRANSPORT_WSS + +static nni_tran wss_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "wss", + .tran_dialer = &ws_dialer_ops, + .tran_listener = &ws_listener_ops, + .tran_pipe = &ws_pipe_ops, + .tran_init = wstran_init, + .tran_fini = wstran_fini, +}; + +static nni_tran wss4_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "wss4", + .tran_dialer = &ws_dialer_ops, + .tran_listener = &ws_listener_ops, + .tran_pipe = &ws_pipe_ops, + .tran_init = wstran_init, + .tran_fini = wstran_fini, +}; + +static nni_tran wss6_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "wss6", + .tran_dialer = &ws_dialer_ops, + .tran_listener = &ws_listener_ops, + .tran_pipe = &ws_pipe_ops, + .tran_init = wstran_init, + .tran_fini = wstran_fini, +}; + +int +nng_wss_register(void) +{ + int rv; + if (((rv = nni_tran_register(&wss_tran)) != 0) || + ((rv = nni_tran_register(&wss4_tran)) != 0) || + ((rv = nni_tran_register(&wss6_tran)) != 0)) { + return (rv); + } + + return (0); +} + +#else + +int +nng_wss_register(void) +{ + return (0); +} + +#endif // NNG_TRANSPORT_WSS diff --git a/src/sp/transport/ws/ws_test.c b/src/sp/transport/ws/ws_test.c new file mode 100644 index 00000000..7cbcd9d7 --- /dev/null +++ b/src/sp/transport/ws/ws_test.c @@ -0,0 +1,181 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Cody Piersall <cody.piersall@gmail.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <nuts.h> + +static void +test_ws_url_path_filters(void) +{ + nng_socket s1; + nng_socket s2; + char addr[NNG_MAXADDRLEN]; + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + + nuts_scratch_addr("ws", sizeof(addr), addr); + NUTS_PASS(nng_listen(s1, addr, NULL, 0)); + + // Now try we just remove the last character for now. + // This will make the path different. + addr[strlen(addr) - 1] = '\0'; + NUTS_FAIL(nng_dial(s2, addr, NULL, 0), NNG_ECONNREFUSED); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +} + +static void +test_wild_card_port(void) +{ + nng_socket s1; + nng_socket s2; + nng_socket s3; + nng_socket s4; + nng_socket s5; + nng_socket s6; + + nng_listener l1; + nng_listener l2; + nng_listener l3; + int port1; + int port2; + int port3; + char ws_url[128]; + NUTS_OPEN(s1); + NUTS_OPEN(s2); + NUTS_OPEN(s3); + NUTS_OPEN(s4); + NUTS_OPEN(s5); + NUTS_OPEN(s6); + NUTS_PASS(nng_listen(s1, "ws://127.0.0.1:0/one", &l1, 0)); + NUTS_PASS( + nng_listener_get_int(l1, NNG_OPT_TCP_BOUND_PORT, &port1)); + NUTS_TRUE(port1 != 0); + snprintf(ws_url, sizeof(ws_url), "ws4://127.0.0.1:%d/two", port1); + NUTS_PASS(nng_listen(s2, ws_url, &l2, 0)); + NUTS_PASS( + nng_listener_get_int(l2, NNG_OPT_TCP_BOUND_PORT, &port2)); + NUTS_TRUE(port1 != 0); + NUTS_TRUE(port1 == port2); + // Now try a different wild card port. + NUTS_PASS(nng_listen(s3, "ws4://127.0.0.1:0/three", &l3, 0)); + NUTS_PASS( + nng_listener_get_int(l3, NNG_OPT_TCP_BOUND_PORT, &port3)); + NUTS_TRUE(port3 != 0); + NUTS_TRUE(port3 != port1); + + // Let's make sure can dial to each. + snprintf(ws_url, sizeof(ws_url), "ws://127.0.0.1:%d/one", port1); + NUTS_PASS(nng_dial(s4, ws_url, NULL, 0)); + snprintf(ws_url, sizeof(ws_url), "ws://127.0.0.1:%d/two", port2); + NUTS_PASS(nng_dial(s6, ws_url, NULL, 0)); + snprintf(ws_url, sizeof(ws_url), "ws://127.0.0.1:%d/three", port3); + NUTS_PASS(nng_dial(s6, ws_url, NULL, 0)); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); + NUTS_CLOSE(s3); + NUTS_CLOSE(s4); + NUTS_CLOSE(s5); + NUTS_CLOSE(s6); +} + +static void +test_wild_card_host(void) +{ + nng_socket s1; + nng_socket s2; + char addr[NNG_MAXADDRLEN]; + uint16_t port; + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + + port = nuts_next_port(); + + // we use ws4 to ensure 127.0.0.1 binding + snprintf(addr, sizeof(addr), "ws4://*:%u/test", port); + NUTS_PASS(nng_listen(s1, addr, NULL, 0)); + nng_msleep(100); + + snprintf(addr, sizeof(addr), "ws://127.0.0.1:%u/test", port); + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +} + +static void +test_empty_host(void) +{ + nng_socket s1; + nng_socket s2; + char addr[NNG_MAXADDRLEN]; + uint16_t port; + + NUTS_OPEN(s1); + NUTS_OPEN(s2); + + port = nuts_next_port(); + + // we use ws4 to ensure 127.0.0.1 binding + snprintf(addr, sizeof(addr), "ws4://:%u/test", port); + NUTS_PASS(nng_listen(s1, addr, NULL, 0)); + nng_msleep(100); + + snprintf(addr, sizeof(addr), "ws://127.0.0.1:%u/test", port); + NUTS_PASS(nng_dial(s2, addr, NULL, 0)); + + NUTS_CLOSE(s1); + NUTS_CLOSE(s2); +} + +void +test_ws_recv_max(void) +{ + char msg[256]; + char buf[256]; + nng_socket s0; + nng_socket s1; + nng_listener l; + size_t sz; + char *addr; + + NUTS_ADDR(addr, "ws"); + NUTS_OPEN(s0); + NUTS_PASS(nng_socket_set_ms(s0, NNG_OPT_RECVTIMEO, 100)); + NUTS_PASS(nng_socket_set_size(s0, NNG_OPT_RECVMAXSZ, 200)); + NUTS_PASS(nng_listener_create(&l, s0, addr)); + NUTS_PASS(nng_socket_get_size(s0, NNG_OPT_RECVMAXSZ, &sz)); + NUTS_TRUE(sz == 200); + NUTS_PASS(nng_listener_set_size(l, NNG_OPT_RECVMAXSZ, 100)); + NUTS_PASS(nng_listener_start(l, 0)); + + NUTS_OPEN(s1); + NUTS_PASS(nng_dial(s1, addr, NULL, 0)); + NUTS_PASS(nng_send(s1, msg, 95, 0)); + NUTS_PASS(nng_socket_set_ms(s1, NNG_OPT_SENDTIMEO, 100)); + NUTS_PASS(nng_recv(s0, buf, &sz, 0)); + NUTS_TRUE(sz == 95); + NUTS_PASS(nng_send(s1, msg, 150, 0)); + NUTS_FAIL(nng_recv(s0, buf, &sz, 0), NNG_ETIMEDOUT); + NUTS_CLOSE(s0); + NUTS_CLOSE(s1); +} + +TEST_LIST = { + { "ws url path filters", test_ws_url_path_filters }, + { "ws wild card port", test_wild_card_port }, + { "ws wild card host", test_wild_card_host }, + { "ws empty host", test_empty_host }, + { "ws recv max", test_ws_recv_max }, + { NULL, NULL }, +};
\ No newline at end of file diff --git a/src/sp/transport/zerotier/CMakeLists.txt b/src/sp/transport/zerotier/CMakeLists.txt new file mode 100644 index 00000000..903b7f56 --- /dev/null +++ b/src/sp/transport/zerotier/CMakeLists.txt @@ -0,0 +1,37 @@ +# +# Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +# Copyright 2018 Capitar IT Group BV <info@capitar.com> +# +# This software is supplied under the terms of the MIT License, a +# copy of which should be located in the distribution where this +# file was obtained (LICENSE.txt). A copy of the license may also be +# found online at https://opensource.org/licenses/MIT. +# + +# ZeroTier protocol + +option (NNG_TRANSPORT_ZEROTIER "Enable ZeroTier transport (requires libzerotiercore)." OFF) +mark_as_advanced(NNG_TRANSPORT_ZEROTIER) + +nng_directory(zerotier) + +if (NNG_TRANSPORT_ZEROTIER) + + # NB: As we wind up linking libzerotiercore.a into the application, + # this means that your application will *also* need to either be licensed + # under the GPLv3, or you will need to have a commercial license from + # ZeroTier permitting its use elsewhere. + + message(WARNING " + ************************************************************ + Linking against zerotiercore changes license terms. + Consult a lawyer and the license files for details. + ************************************************************") + + find_package(zerotiercore REQUIRED) + + nng_link_libraries(zerotiercore::zerotiercore) + nng_defines(NNG_TRANSPORT_ZEROTIER) + nng_sources(zerotier.c zthash.c) + nng_headers(nng/transport/zerotier/zerotier.h) +endif() diff --git a/src/sp/transport/zerotier/zerotier.c b/src/sp/transport/zerotier/zerotier.c new file mode 100644 index 00000000..896add29 --- /dev/null +++ b/src/sp/transport/zerotier/zerotier.c @@ -0,0 +1,3241 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/nng_impl.h" +#include "zthash.h" + +#include "nng/transport/zerotier/zerotier.h" + +#include <zerotiercore/ZeroTierOne.h> + +// ZeroTier Transport. This sits on the ZeroTier L2 network, which itself +// is implemented on top of UDP. This requires the 3rd party +// libzerotiercore library (which is GPLv3!) and platform specific UDP +// functionality to be built in. Note that care must be taken to link +// dynamically if one wishes to avoid making your entire application GPL3. +// (Alternatively ZeroTier offers commercial licenses which may prevent +// this particular problem.) This implementation does not make use of +// certain advanced capabilities in ZeroTier such as more sophisticated +// route management and TCP fallback. You need to have connectivity +// to the Internet to use this. (Or at least to your Planetary root.) +// +// Because ZeroTier takes a while to establish connectivity, it is even +// more important that applications using the ZeroTier transport not +// assume that a connection will be immediately available. It can take +// quite a few seconds for peer-to-peer connectivity to be established. +// +// The ZeroTier transport was funded by Capitar IT Group, BV. +// +// This transport is highly experimental. + +// ZeroTier and UDP are connectionless, but nng is designed around +// connection oriented paradigms. An "unreliable" connection is created +// on top using our own network protocol. The details of this are +// documented in the RFC. + +// Every participant has an "address", which is a 64-bit value constructed +// using the ZT node number in the upper 40-bits, and a 24-bit port number +// in the lower bits. We elect to operate primarily on these addresses, +// but the wire protocol relies on just conveying the 24-bit port along +// with the MAC address (from which the ZT node number can be derived, +// given the network ID.) + +typedef struct zt_pipe zt_pipe; +typedef struct zt_ep zt_ep; +typedef struct zt_node zt_node; +typedef struct zt_frag zt_frag; +typedef struct zt_fraglist zt_fraglist; + +// Port numbers are stored as 24-bit values in network byte order. +#define ZT_GET24(ptr, v) \ + v = (((uint32_t)((uint8_t)(ptr)[0])) << 16) + \ + (((uint32_t)((uint8_t)(ptr)[1])) << 8) + \ + (((uint32_t)(uint8_t)(ptr)[2])) + +#define ZT_PUT24(ptr, u) \ + do { \ + (ptr)[0] = (uint8_t)(((uint32_t)(u)) >> 16); \ + (ptr)[1] = (uint8_t)(((uint32_t)(u)) >> 8); \ + (ptr)[2] = (uint8_t)((uint32_t)(u)); \ + } while (0) + +static const uint16_t zt_ethertype = 0x901; +static const uint8_t zt_version = 0x01; +static const uint32_t zt_ephemeral = 0x800000u; // start of ephemeral ports +static const uint32_t zt_max_port = 0xffffffu; // largest port +static const uint32_t zt_port_mask = 0xffffffu; // mask of valid ports +static const uint32_t zt_port_shift = 24; +static const int zt_conn_tries = 240; // max connect attempts +static const nng_duration zt_conn_time = 500; // between attempts (msec) +static const int zt_ping_tries = 10; // max keepalive attempts +static const nng_duration zt_ping_time = 60000; // keepalive time (msec) + +// These are compile time tunables for now. +enum zt_tunables { + zt_listenq = 128, // backlog queue length + zt_listen_expire = 10000, // maximum time in backlog (msec) + zt_rcv_bufsize = 4096, // max UDP recv + zt_udp_sendq = 16, // outgoing UDP queue length + zt_recvq = 2, // max pending recv (per pipe) + zt_recv_stale = 1000, // frags older than are stale (msec) +}; + +enum zt_op_codes { + zt_op_data = 0x00, // data, final fragment + zt_op_conn_req = 0x10, // connect request + zt_op_conn_ack = 0x12, // connect accepted + zt_op_disc_req = 0x20, // disconnect request (no ack) + zt_op_ping = 0x30, // ping request + zt_op_pong = 0x32, // ping response + zt_op_error = 0x40, // error response +}; + +enum zt_offsets { + zt_offset_op = 0x00, + zt_offset_flags = 0x01, + zt_offset_version = 0x02, // protocol version number (2 bytes) + zt_offset_zero1 = 0x04, // reserved, must be zero (1 byte) + zt_offset_dst_port = 0x05, // destination port (3 bytes) + zt_offset_zero2 = 0x08, // reserved, must be zero (1 byte) + zt_offset_src_port = 0x09, // source port number (3 bytes) + zt_offset_creq_proto = 0x0C, // SP protocol number (2 bytes) + zt_offset_cack_proto = 0x0C, // SP protocol number (2 bytes) + zt_offset_err_code = 0x0C, // error code (1 byte) + zt_offset_err_msg = 0x0D, // error message (string) + zt_offset_data_id = 0x0C, // message ID (2 bytes) + zt_offset_data_fragsz = 0x0E, // fragment size + zt_offset_data_frag = 0x10, // fragment number, first is 1 (2 bytes) + zt_offset_data_nfrag = 0x12, // total fragments (2 bytes) + zt_offset_data_data = 0x14, // user payload + zt_size_headers = 0x0C, // size of headers + zt_size_conn_req = 0x0E, // size of conn_req (connect request) + zt_size_conn_ack = 0x0E, // size of conn_ack (connect reply) + zt_size_disc_req = 0x0C, // size of disc_req (disconnect) + zt_size_ping = 0x0C, // size of ping request + zt_size_pong = 0x0C, // size of ping reply + zt_size_data = 0x14, // size of data message (w/o payload) +}; + +enum zt_errors { + zt_err_refused = 0x01, // Connection refused + zt_err_notconn = 0x02, // Connection does not exit + zt_err_wrongsp = 0x03, // SP protocol mismatch + zt_err_proto = 0x04, // Other protocol error + zt_err_msgsize = 0x05, // Message to large + zt_err_unknown = 0x06, // Other errors +}; + +// This node structure is wrapped around the ZT_node; this allows us to +// have multiple endpoints referencing the same ZT_node, but also to +// support different nodes (identities) based on different home dirs. +// This means we need to stick these on a global linked list, manage +// them with a reference count, and uniquely identify them using the +// homedir. +struct zt_node { + char zn_path[NNG_MAXADDRLEN]; // ought to be sufficient + nni_file_lockh *zn_flock; + ZT_Node * zn_znode; + uint64_t zn_self; + nni_list_node zn_link; + bool zn_closed; + nni_plat_udp * zn_udp4; + nni_plat_udp * zn_udp6; + nni_list zn_eplist; + nni_list zn_plist; + zt_hash * zn_ports; + zt_hash * zn_eps; + zt_hash * zn_lpipes; + zt_hash * zn_rpipes; + nni_aio * zn_rcv4_aio; + uint8_t * zn_rcv4_buf; + nng_sockaddr zn_rcv4_addr; + nni_aio * zn_rcv6_aio; + uint8_t * zn_rcv6_buf; + nng_sockaddr zn_rcv6_addr; + nni_thr zn_bgthr; + int64_t zn_bgtime; + nni_cv zn_bgcv; + nni_cv zn_snd6_cv; +}; + +// The fragment list is used to keep track of incoming received +// fragments for reassembly into a complete message. +struct zt_fraglist { + nni_time fl_time; // time first frag was received + uint32_t fl_msgid; // message id + int fl_ready; // we have all messages + size_t fl_fragsz; + unsigned int fl_nfrags; + uint8_t * fl_missing; + size_t fl_missingsz; + nni_msg * fl_msg; +}; + +struct zt_pipe { + nni_list_node zp_link; + zt_node * zp_ztn; + nni_pipe * zp_npipe; + uint64_t zp_nwid; + uint64_t zp_laddr; + uint64_t zp_raddr; + uint16_t zp_peer; + uint16_t zp_proto; + uint16_t zp_next_msgid; + size_t zp_rcvmax; + size_t zp_mtu; + nni_aio * zp_user_rxaio; + nni_time zp_last_recv; + zt_fraglist zp_recvq[zt_recvq]; + int zp_ping_try; + int zp_ping_tries; + bool zp_closed; + nni_duration zp_ping_time; + nni_aio * zp_ping_aio; + uint8_t * zp_send_buf; + nni_atomic_flag zp_reaped; + nni_reap_item zp_reap; +}; + +typedef struct zt_creq zt_creq; +struct zt_creq { + uint64_t cr_expire; + uint64_t cr_raddr; + uint16_t cr_proto; +}; + +struct zt_ep { + nni_list_node ze_link; + char ze_home[NNG_MAXADDRLEN]; // should be enough + zt_node * ze_ztn; + uint64_t ze_nwid; + bool ze_running; + uint64_t ze_raddr; // remote node address + uint64_t ze_laddr; // local node address + uint16_t ze_proto; + size_t ze_rcvmax; + nni_aio * ze_aio; + nni_aio * ze_creq_aio; + bool ze_creq_active; + int ze_creq_try; + nni_list ze_aios; + int ze_mtu; + int ze_ping_tries; + nni_duration ze_ping_time; + nni_duration ze_conn_time; + int ze_conn_tries; + + // Incoming connection requests (server only). We only + // only have "accepted" requests -- that is we won't have an + // established connection/pipe unless the application calls + // accept. Since the "application" is our library, that should + // be pretty much as fast we can run. + zt_creq ze_creqs[zt_listenq]; + int ze_creq_head; + int ze_creq_tail; + nni_dialer * ze_ndialer; + nni_listener *ze_nlistener; +}; + +// Locking strategy. At present the ZeroTier core is not reentrant or fully +// threadsafe. (We expect this will be fixed.) Furthermore, there are +// some significant challenges in dealing with locks associated with the +// callbacks, etc. So we take a big-hammer approach, and just use a single +// global lock for everything. We hold this lock when calling into the +// ZeroTier framework. Since ZeroTier has no independent threads, that +// means that it will always hold this lock in its core, and the lock will +// also be held automatically in any of our callbacks. We never hold any +// other locks across ZeroTier core calls. We may not acquire the global +// lock in callbacks (they will already have it held). Any other locks +// can be acquired as long as they are not held during calls into ZeroTier. +// +// This will have a detrimental impact on performance, but to be completely +// honest we don't think anyone will be using the ZeroTier transport in +// performance critical applications; scalability may become a factor for +// large servers sitting in a ZeroTier hub situation. (Then again, since +// only the zerotier processing is single threaded, it may not +// be that much of a bottleneck -- really depends on how expensive these +// operations are. We can use lockstat or other lock-hotness tools to +// check for this later.) + +static nni_mtx zt_lk; +static nni_list zt_nodes; + +static void zt_ep_send_conn_req(zt_ep *); +static void zt_ep_conn_req_cb(void *); +static void zt_ep_doaccept(zt_ep *); +static void zt_pipe_dorecv(zt_pipe *); +static int zt_pipe_alloc(zt_pipe **, zt_ep *, uint64_t, uint64_t, bool); +static void zt_pipe_ping_cb(void *); +static void zt_fraglist_clear(zt_fraglist *); +static void zt_fraglist_free(zt_fraglist *); +static void zt_virtual_recv(ZT_Node *, void *, void *, uint64_t, void **, + uint64_t, uint64_t, unsigned int, unsigned int, const void *, + unsigned int); +static void zt_pipe_start_ping(zt_pipe *); + +static int64_t +zt_now(void) +{ + // We return msec + return ((int64_t) nni_clock()); +} + +static void +zt_bgthr(void *arg) +{ + zt_node *ztn = arg; + int64_t now; + + nni_mtx_lock(&zt_lk); + for (;;) { + now = zt_now(); + + if (ztn->zn_closed) { + break; + } + + if (now < ztn->zn_bgtime) { + nni_cv_until(&ztn->zn_bgcv, (nni_time) ztn->zn_bgtime); + continue; + } + + ztn->zn_bgtime = 0; + ZT_Node_processBackgroundTasks(ztn->zn_znode, NULL, now, &now); + + ztn->zn_bgtime = now; + } + nni_mtx_unlock(&zt_lk); +} + +static void +zt_node_resched(zt_node *ztn, int64_t msec) +{ + if (msec > ztn->zn_bgtime && ztn->zn_bgtime != 0) { + return; + } + ztn->zn_bgtime = msec; + nni_cv_wake1(&ztn->zn_bgcv); +} + +static void +zt_node_rcv4_cb(void *arg) +{ + zt_node * ztn = arg; + nni_aio * aio = ztn->zn_rcv4_aio; + struct sockaddr_storage sa; + struct sockaddr_in * sin; + nng_sockaddr_in * nsin; + int64_t now; + + if (nni_aio_result(aio) != 0) { + // Outside of memory exhaustion, we can't really think + // of any reason for this to legitimately fail. + // Arguably we should inject a fallback delay, but for + // now we just carry on. + return; + } + + memset(&sa, 0, sizeof(sa)); + sin = (void *) &sa; + nsin = &ztn->zn_rcv4_addr.s_in; + sin->sin_family = AF_INET; + sin->sin_port = nsin->sa_port; + sin->sin_addr.s_addr = nsin->sa_addr; + + nni_mtx_lock(&zt_lk); + now = zt_now(); + + // We are not going to perform any validation of the data; we + // just pass this straight into the ZeroTier core. + // XXX: CHECK THIS, if it fails then we have a fatal error with + // the znode, and have to shut everything down. + ZT_Node_processWirePacket(ztn->zn_znode, NULL, now, 0, (void *) &sa, + ztn->zn_rcv4_buf, nni_aio_count(aio), &now); + + // Schedule background work + zt_node_resched(ztn, now); + + // Schedule another receive. + if (ztn->zn_udp4 != NULL) { + nni_iov iov; + iov.iov_buf = ztn->zn_rcv4_buf; + iov.iov_len = zt_rcv_bufsize; + nni_aio_set_iov(aio, 1, &iov); + + nni_aio_set_input(aio, 0, &ztn->zn_rcv4_addr); + + nni_plat_udp_recv(ztn->zn_udp4, aio); + } + nni_mtx_unlock(&zt_lk); +} + +static void +zt_node_rcv6_cb(void *arg) +{ + zt_node * ztn = arg; + nni_aio * aio = ztn->zn_rcv6_aio; + struct sockaddr_storage sa; + struct sockaddr_in6 * sin6; + struct nng_sockaddr_in6 *nsin6; + int64_t now; + + if (nni_aio_result(aio) != 0) { + // Outside of memory exhaustion, we can't really think + // of any reason for this to legitimately fail. + // Arguably we should inject a fallback delay, but for + // now we just carry on. + return; + } + + memset(&sa, 0, sizeof(sa)); + sin6 = (void *) &sa; + nsin6 = &ztn->zn_rcv6_addr.s_in6; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = nsin6->sa_port; + memcpy(&sin6->sin6_addr, nsin6->sa_addr, 16); + + nni_mtx_lock(&zt_lk); + now = (uint64_t) zt_now(); // msec + + // We are not going to perform any validation of the data; we + // just pass this straight into the ZeroTier core. + ZT_Node_processWirePacket(ztn->zn_znode, NULL, now, 0, (void *) &sa, + ztn->zn_rcv6_buf, nni_aio_count(aio), &now); + + // Schedule background work + zt_node_resched(ztn, now); + + // Schedule another receive. + if (ztn->zn_udp6 != NULL) { + nni_iov iov; + iov.iov_buf = ztn->zn_rcv6_buf; + iov.iov_len = zt_rcv_bufsize; + nni_aio_set_iov(aio, 1, &iov); + nni_aio_set_input(aio, 0, &ztn->zn_rcv6_addr); + nni_plat_udp_recv(ztn->zn_udp6, aio); + } + nni_mtx_unlock(&zt_lk); +} + +static uint64_t +zt_mac_to_node(uint64_t mac, uint64_t nwid) +{ + uint64_t node; + // This extracts a node address from a mac address. The + // network ID is mixed in, and has to be extricated. We + // the node ID is located in the lower 40 bits, and scrambled + // against the nwid. + node = mac & 0xffffffffffull; + node ^= ((nwid >> 8) & 0xff) << 32; + node ^= ((nwid >> 16) & 0xff) << 24; + node ^= ((nwid >> 24) & 0xff) << 16; + node ^= ((nwid >> 32) & 0xff) << 8; + node ^= (nwid >> 40) & 0xff; + return (node); +} + +static uint64_t +zt_node_to_mac(uint64_t node, uint64_t nwid) +{ + uint64_t mac; + // We use LSB of network ID, and make sure that we clear + // multicast and set local administration -- this is the first + // octet of the 48 bit mac address. We also avoid 0x52, which + // is known to be used in KVM, libvirt, etc. + mac = ((uint8_t)(nwid & 0xfe) | 0x02); + if (mac == 0x52) { + mac = 0x32; + } + mac <<= 40; + mac |= node; + // The rest of the network ID is XOR'd in, in reverse byte + // order. + mac ^= ((nwid >> 8) & 0xff) << 32; + mac ^= ((nwid >> 16) & 0xff) << 24; + mac ^= ((nwid >> 24) & 0xff) << 16; + mac ^= ((nwid >> 32) & 0xff) << 8; + mac ^= (nwid >> 40) & 0xff; + return (mac); +} + +static int +zt_result(enum ZT_ResultCode rv) +{ + switch (rv) { + case ZT_RESULT_OK: + return (0); + case ZT_RESULT_OK_IGNORED: + return (0); + case ZT_RESULT_FATAL_ERROR_OUT_OF_MEMORY: + return (NNG_ENOMEM); + case ZT_RESULT_FATAL_ERROR_DATA_STORE_FAILED: + return (NNG_EPERM); + case ZT_RESULT_FATAL_ERROR_INTERNAL: + return (NNG_EINTERNAL); + case ZT_RESULT_ERROR_NETWORK_NOT_FOUND: + return (NNG_EADDRINVAL); + case ZT_RESULT_ERROR_UNSUPPORTED_OPERATION: + return (NNG_ENOTSUP); + case ZT_RESULT_ERROR_BAD_PARAMETER: + return (NNG_EINVAL); + default: + return (NNG_ETRANERR + (int) rv); + } +} + +// ZeroTier Node API callbacks +static int +zt_virtual_config(ZT_Node *node, void *userptr, void *thr, uint64_t nwid, + void **netptr, enum ZT_VirtualNetworkConfigOperation op, + const ZT_VirtualNetworkConfig *config) +{ + zt_node *ztn = userptr; + zt_ep * ep; + + NNI_ARG_UNUSED(thr); + NNI_ARG_UNUSED(netptr); + + NNI_ASSERT(node == ztn->zn_znode); + + // Maybe we don't have to create taps or anything like that. + // We do get our mac and MTUs from this, so there's that. + switch (op) { + case ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_UP: + case ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_CONFIG_UPDATE: + + // We only really care about changes to the MTU. From + // an API perspective the MAC could change, but that + // cannot really happen because the node identity and + // the nwid are fixed. + NNI_LIST_FOREACH (&ztn->zn_eplist, ep) { + NNI_ASSERT(nwid == config->nwid); + if (ep->ze_nwid != config->nwid) { + continue; + } + ep->ze_mtu = config->mtu; + } + break; + case ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_DESTROY: + case ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_DOWN: + // XXX: tear down endpoints? + default: + break; + } + return (0); +} + +// zt_send modifies the start of the supplied buffer to update the +// message headers with protocol specific details (version, port numbers, +// etc.) and then sends it over the virtual network. +static void +zt_send(zt_node *ztn, uint64_t nwid, uint8_t op, uint64_t raddr, + uint64_t laddr, uint8_t *data, size_t len) +{ + uint64_t srcmac = zt_node_to_mac(laddr >> 24, nwid); + uint64_t dstmac = zt_node_to_mac(raddr >> 24, nwid); + int64_t now = zt_now(); + + NNI_ASSERT(len >= zt_size_headers); + data[zt_offset_op] = op; + data[zt_offset_flags] = 0; + data[zt_offset_zero1] = 0; + data[zt_offset_zero2] = 0; + NNI_PUT16(data + zt_offset_version, zt_version); + ZT_PUT24(data + zt_offset_dst_port, raddr & zt_port_mask); + ZT_PUT24(data + zt_offset_src_port, laddr & zt_port_mask); + + (void) ZT_Node_processVirtualNetworkFrame(ztn->zn_znode, NULL, now, + nwid, srcmac, dstmac, zt_ethertype, 0, data, len, &now); + + zt_node_resched(ztn, now); +} + +static void +zt_send_err(zt_node *ztn, uint64_t nwid, uint64_t raddr, uint64_t laddr, + uint8_t err, const char *msg) +{ + uint8_t data[128]; + + NNI_ASSERT((strlen(msg) + zt_offset_err_msg) < sizeof(data)); + + data[zt_offset_err_code] = err; + nni_strlcpy((char *) data + zt_offset_err_msg, msg, + sizeof(data) - zt_offset_err_msg); + + zt_send(ztn, nwid, zt_op_error, raddr, laddr, data, + strlen(msg) + zt_offset_err_msg); +} + +static void +zt_pipe_send_err(zt_pipe *p, uint8_t err, const char *msg) +{ + zt_send_err(p->zp_ztn, p->zp_nwid, p->zp_raddr, p->zp_laddr, err, msg); +} + +static void +zt_pipe_send_disc_req(zt_pipe *p) +{ + uint8_t data[zt_size_disc_req]; + + zt_send(p->zp_ztn, p->zp_nwid, zt_op_disc_req, p->zp_raddr, + p->zp_laddr, data, sizeof(data)); +} + +static void +zt_pipe_send_ping(zt_pipe *p) +{ + uint8_t data[zt_size_ping]; + + zt_send(p->zp_ztn, p->zp_nwid, zt_op_ping, p->zp_raddr, p->zp_laddr, + data, sizeof(data)); +} + +static void +zt_pipe_send_pong(zt_pipe *p) +{ + uint8_t data[zt_size_ping]; + + zt_send(p->zp_ztn, p->zp_nwid, zt_op_pong, p->zp_raddr, p->zp_laddr, + data, sizeof(data)); +} + +static void +zt_pipe_send_conn_ack(zt_pipe *p) +{ + uint8_t data[zt_size_conn_ack]; + + NNI_PUT16(data + zt_offset_cack_proto, p->zp_proto); + zt_send(p->zp_ztn, p->zp_nwid, zt_op_conn_ack, p->zp_raddr, + p->zp_laddr, data, sizeof(data)); +} + +static void +zt_ep_send_conn_req(zt_ep *ep) +{ + uint8_t data[zt_size_conn_req]; + + NNI_PUT16(data + zt_offset_creq_proto, ep->ze_proto); + zt_send(ep->ze_ztn, ep->ze_nwid, zt_op_conn_req, ep->ze_raddr, + ep->ze_laddr, data, sizeof(data)); +} + +static void +zt_ep_recv_conn_ack(zt_ep *ep, uint64_t raddr, const uint8_t *data, size_t len) +{ + zt_node *ztn = ep->ze_ztn; + nni_aio *aio = ep->ze_creq_aio; + zt_pipe *p; + int rv; + + if (ep->ze_ndialer == NULL) { + zt_send_err(ztn, ep->ze_nwid, raddr, ep->ze_laddr, + zt_err_proto, "Inappropriate operation"); + return; + } + + if (len != zt_size_conn_ack) { + zt_send_err(ztn, ep->ze_nwid, raddr, ep->ze_laddr, + zt_err_proto, "Bad message length"); + return; + } + + if (ep->ze_creq_try == 0) { + return; + } + + // Do we already have a matching pipe? If so, we can discard + // the operation. This should not happen, since we normally, + // deregister the endpoint when we create the pipe. + if ((zt_hash_find(ztn->zn_lpipes, ep->ze_laddr, (void **) &p)) == 0) { + return; + } + + if ((rv = zt_pipe_alloc(&p, ep, raddr, ep->ze_laddr, false)) != 0) { + // We couldn't create the pipe, just drop it. + nni_aio_finish_error(aio, rv); + return; + } + NNI_GET16(data + zt_offset_cack_proto, p->zp_peer); + + // Reset the address of the endpoint, so that the next call to + // ep_connect will bind a new one -- we are using this one for the + // pipe. + zt_hash_remove(ztn->zn_eps, ep->ze_laddr); + ep->ze_laddr = 0; + + nni_aio_set_output(aio, 0, p); + nni_aio_finish(aio, 0, 0); +} + +static void +zt_ep_recv_conn_req(zt_ep *ep, uint64_t raddr, const uint8_t *data, size_t len) +{ + zt_node *ztn = ep->ze_ztn; + zt_pipe *p; + int i; + + if (ep->ze_nlistener == NULL) { + zt_send_err(ztn, ep->ze_nwid, raddr, ep->ze_laddr, + zt_err_proto, "Inappropriate operation"); + return; + } + if (len != zt_size_conn_req) { + zt_send_err(ztn, ep->ze_nwid, raddr, ep->ze_laddr, + zt_err_proto, "Bad message length"); + return; + } + + // If we already have created a pipe for this connection + // then just reply the conn ack. + if ((zt_hash_find(ztn->zn_rpipes, raddr, (void **) &p)) == 0) { + zt_pipe_send_conn_ack(p); + return; + } + + // We may already have a connection request queued (if this was + // a resend for example); if that's the case we just ignore + // this one. + for (i = ep->ze_creq_tail; i != ep->ze_creq_head; i++) { + if (ep->ze_creqs[i % zt_listenq].cr_raddr == raddr) { + return; + } + } + // We may already have filled our listenq, in which case we just drop. + if ((ep->ze_creq_tail + zt_listenq) == ep->ze_creq_head) { + // We have taken as many as we can, so just drop it. + return; + } + + // Record the connection request, and then process any + // pending acceptors. + i = ep->ze_creq_head % zt_listenq; + + NNI_GET16(data + zt_offset_creq_proto, ep->ze_creqs[i].cr_proto); + ep->ze_creqs[i].cr_raddr = raddr; + ep->ze_creqs[i].cr_expire = nni_clock() + zt_listen_expire; + ep->ze_creq_head++; + + zt_ep_doaccept(ep); +} + +static void +zt_ep_recv_error(zt_ep *ep, const uint8_t *data, size_t len) +{ + int code; + + // Most of the time we don't care about errors. The exception here + // is that when we have an outstanding CON_REQ, we would like to + // process that appropriately. + + if (ep->ze_ndialer == NULL) { + // Not a dialer. Drop it. + return; + } + + if (len < zt_offset_err_msg) { + // Malformed error frame. + return; + } + + code = data[zt_offset_err_code]; + switch (code) { + case zt_err_refused: + code = NNG_ECONNREFUSED; + break; + case zt_err_notconn: + code = NNG_ECLOSED; + break; + case zt_err_wrongsp: + code = NNG_EPROTO; + break; + default: + code = NNG_ETRANERR; + break; + } + + if (ep->ze_creq_active) { + ep->ze_creq_try = 0; + ep->ze_creq_active = 0; + nni_aio_finish_error(ep->ze_creq_aio, code); + } +} + +static void +zt_ep_virtual_recv( + zt_ep *ep, uint8_t op, uint64_t raddr, const uint8_t *data, size_t len) +{ + // Only listeners should be receiving. Dialers receive on the pipe, + // rather than the endpoint. The only message that endpoints can + // receive are connection requests. + switch (op) { + case zt_op_conn_req: + zt_ep_recv_conn_req(ep, raddr, data, len); + return; + case zt_op_conn_ack: + zt_ep_recv_conn_ack(ep, raddr, data, len); + return; + case zt_op_error: + zt_ep_recv_error(ep, data, len); + return; + default: + zt_send_err(ep->ze_ztn, ep->ze_nwid, raddr, ep->ze_laddr, + zt_err_proto, "Bad operation"); + return; + } +} + +static void +zt_pipe_close_err(zt_pipe *p, int err, uint8_t code, const char *msg) +{ + nni_aio *aio; + if ((aio = p->zp_user_rxaio) != NULL) { + p->zp_user_rxaio = NULL; + nni_aio_finish_error(aio, err); + } + nni_aio_close(p->zp_ping_aio); + p->zp_closed = true; + if (msg != NULL) { + zt_pipe_send_err(p, code, msg); + } +} + +static void +zt_pipe_recv_data(zt_pipe *p, const uint8_t *data, size_t len) +{ + uint16_t msgid; + uint16_t fragno; + uint16_t nfrags; + size_t fragsz; + zt_fraglist *fl; + int i; + int slot; + uint8_t bit; + uint8_t * body; + + if (len < zt_size_data) { + // Runt frame. Drop it and close pipe with a protocol error. + zt_pipe_close_err(p, NNG_EPROTO, zt_err_proto, "Runt frame"); + return; + } + + NNI_GET16(data + zt_offset_data_id, msgid); + NNI_GET16(data + zt_offset_data_fragsz, fragsz); + NNI_GET16(data + zt_offset_data_frag, fragno); + NNI_GET16(data + zt_offset_data_nfrag, nfrags); + len -= zt_offset_data_data; + data += zt_offset_data_data; + + // Check for cases where message size is clearly too large. Note + // that we only can catch the case where a message is larger by + // more than a fragment, since the final fragment may be shorter, + // and we won't know that until we receive it. + if ((p->zp_rcvmax > 0) && + ((nfrags * fragsz) >= (p->zp_rcvmax + fragsz))) { + // Discard, as the forwarder might be on the other side + // of a device. This is gentler than just shutting the pipe + // down. Sending a remote error might be polite, but since + // most peers will close the pipe on such an error, we + // simply silently discard it. + return; + } + + // We run the recv logic once, to clear stale fragment entries. + zt_pipe_dorecv(p); + + // Find a suitable fragment slot. + slot = -1; + for (i = 0; i < zt_recvq; i++) { + fl = &p->zp_recvq[i]; + // This was our message ID, we always use it. + if (msgid == fl->fl_msgid) { + slot = i; + break; + } + + if (slot < 0) { + slot = i; + } else if (fl->fl_time < p->zp_recvq[slot].fl_time) { + // This has an earlier expiration, so lets choose it. + slot = i; + } + } + + NNI_ASSERT(slot >= 0); + + fl = &p->zp_recvq[slot]; + if (fl->fl_msgid != msgid) { + // First fragment we've received for this message (but might + // not be first fragment for message!) + zt_fraglist_clear(fl); + + if (nni_msg_alloc(&fl->fl_msg, nfrags * fragsz) != 0) { + // Out of memory. We don't close the pipe, but + // just fail to receive the message. Bump a stat? + return; + } + + fl->fl_nfrags = nfrags; + fl->fl_fragsz = fragsz; + fl->fl_msgid = msgid; + fl->fl_time = nni_clock(); + + // Set the missing mask. + memset(fl->fl_missing, 0xff, nfrags / 8); + fl->fl_missing[nfrags / 8] |= ((1 << (nfrags % 8)) - 1); + } + if ((nfrags != fl->fl_nfrags) || (fragsz != fl->fl_fragsz) || + (fragno >= nfrags) || (fragsz == 0) || (nfrags == 0) || + ((fragno != (nfrags - 1)) && (len != fragsz))) { + // Protocol error, message parameters changed. + zt_pipe_close_err( + p, NNG_EPROTO, zt_err_proto, "Invalid message parameters"); + zt_fraglist_clear(fl); + return; + } + + bit = (uint8_t)(1 << (fragno % 8)); + if ((fl->fl_missing[fragno / 8] & bit) == 0) { + // We've already got this fragment, ignore it. We don't + // bother to check for changed data. + return; + } + + fl->fl_missing[fragno / 8] &= ~(bit); + body = nni_msg_body(fl->fl_msg); + body += fragno * fragsz; + memcpy(body, data, len); + if (fragno == (nfrags - 1)) { + // Last frag, maybe shorten the message. + nni_msg_chop(fl->fl_msg, (fragsz - len)); + if ((nni_msg_len(fl->fl_msg) > p->zp_rcvmax) && + (p->zp_rcvmax > 0)) { + // Strict enforcement of max recv. + zt_fraglist_clear(fl); + // Just discard the message. + return; + } + } + + for (i = 0; i < ((nfrags + 7) / 8); i++) { + if (fl->fl_missing[i]) { + return; + } + } + + // We got all fragments... try to send it up. + fl->fl_ready = 1; + zt_pipe_dorecv(p); +} + +static void +zt_pipe_recv_ping(zt_pipe *p, const uint8_t *data, size_t len) +{ + NNI_ARG_UNUSED(data); + + if (len != zt_size_ping) { + zt_pipe_send_err(p, zt_err_proto, "Incorrect ping size"); + return; + } + zt_pipe_send_pong(p); +} + +static void +zt_pipe_recv_pong(zt_pipe *p, const uint8_t *data, size_t len) +{ + NNI_ARG_UNUSED(data); + + if (len != zt_size_pong) { + zt_pipe_send_err(p, zt_err_proto, "Incorrect pong size"); + } +} + +static void +zt_pipe_recv_disc_req(zt_pipe *p, const uint8_t *data, size_t len) +{ + nni_aio *aio; + NNI_ARG_UNUSED(data); + NNI_ARG_UNUSED(len); + + // NB: lock held already. + // Don't bother to check the length, going to disconnect anyway. + if ((aio = p->zp_user_rxaio) != NULL) { + p->zp_user_rxaio = NULL; + p->zp_closed = true; + nni_aio_finish_error(aio, NNG_ECLOSED); + } +} + +static void +zt_pipe_recv_error(zt_pipe *p, const uint8_t *data, size_t len) +{ + nni_aio *aio; + NNI_ARG_UNUSED(data); + NNI_ARG_UNUSED(len); + + // Perhaps we should log an error message, but at the end of + // the day, the details are just not that interesting. + if ((aio = p->zp_user_rxaio) != NULL) { + p->zp_user_rxaio = NULL; + p->zp_closed = true; + nni_aio_finish_error(aio, NNG_ETRANERR); + } +} + +// This function is called when we have determined that a frame has +// arrived for a pipe. The remote and local addresses were both +// matched by the caller. +static void +zt_pipe_virtual_recv(zt_pipe *p, uint8_t op, const uint8_t *data, size_t len) +{ + // We got data, so update our recv time. + p->zp_last_recv = nni_clock(); + p->zp_ping_try = 0; + + switch (op) { + case zt_op_data: + zt_pipe_recv_data(p, data, len); + return; + case zt_op_disc_req: + zt_pipe_recv_disc_req(p, data, len); + return; + case zt_op_ping: + zt_pipe_recv_ping(p, data, len); + return; + case zt_op_pong: + zt_pipe_recv_pong(p, data, len); + return; + case zt_op_error: + zt_pipe_recv_error(p, data, len); + return; + case zt_op_conn_req: + zt_pipe_send_conn_ack(p); + return; + } +} + +// This function is called when a frame arrives on the +// *virtual* network. +static void +zt_virtual_recv(ZT_Node *node, void *userptr, void *thr, uint64_t nwid, + void **netptr, uint64_t srcmac, uint64_t dstmac, unsigned int ethertype, + unsigned int vlanid, const void *payload, unsigned int len) +{ + zt_node * ztn = userptr; + uint8_t op; + const uint8_t *data = payload; + uint16_t version; + uint32_t rport; + uint32_t lport; + zt_ep * ep; + zt_pipe * p; + uint64_t raddr; + uint64_t laddr; + + NNI_ARG_UNUSED(node); + NNI_ARG_UNUSED(thr); + NNI_ARG_UNUSED(netptr); + + if ((ethertype != zt_ethertype) || (len < zt_size_headers) || + (data[zt_offset_flags] != 0) || (data[zt_offset_zero1] != 0) || + (data[zt_offset_zero2] != 0)) { + return; + } + NNI_GET16(data + zt_offset_version, version); + if (version != zt_version) { + return; + } + if (vlanid != 0) { // for now we only use vlan 0. + return; + } + + op = data[zt_offset_op]; + + ZT_GET24(data + zt_offset_dst_port, lport); + ZT_GET24(data + zt_offset_src_port, rport); + + raddr = zt_mac_to_node(srcmac, nwid); + raddr <<= 24; + raddr |= rport; + + laddr = zt_mac_to_node(dstmac, nwid); + laddr <<= 24; + laddr |= lport; + + // NB: We are holding the zt_lock. + + // Look up a pipe, but also we use this chance to check that + // the source address matches what the pipe was established with. + // If the pipe does not match then we nak it. Note that pipes can + // appear on the znode twice (loopback), so we have to be careful + // to check the entire set of parameters, and to check for server + // vs. client pipes separately. + + // If its a local address match on a client pipe, process it. + if ((zt_hash_find(ztn->zn_lpipes, laddr, (void *) &p) == 0) && + (p->zp_nwid == nwid) && (p->zp_raddr == raddr)) { + zt_pipe_virtual_recv(p, op, data, len); + return; + } + + // If its a remote address match on a server pipe, process it. + if ((zt_hash_find(ztn->zn_rpipes, raddr, (void *) &p) == 0) && + (p->zp_nwid == nwid) && (p->zp_laddr == laddr)) { + zt_pipe_virtual_recv(p, op, data, len); + return; + } + + // No pipe, so look for an endpoint. + if ((zt_hash_find(ztn->zn_eps, laddr, (void **) &ep) == 0) && + (ep->ze_nwid == nwid)) { + // direct this to an endpoint. + zt_ep_virtual_recv(ep, op, raddr, data, len); + return; + } + + // We have a request for which we have no listener, and no + // pipe. For some of these we send back a NAK, but for others + // we just drop the frame. + switch (op) { + case zt_op_conn_req: + // No listener. Connection refused. + zt_send_err(ztn, nwid, raddr, laddr, zt_err_refused, + "Connection refused"); + return; + case zt_op_data: + case zt_op_ping: + case zt_op_conn_ack: + zt_send_err(ztn, nwid, raddr, laddr, zt_err_notconn, + "Connection not found"); + break; + case zt_op_error: + case zt_op_pong: + case zt_op_disc_req: + default: + // Just drop these. + break; + } +} + +static void +zt_event_cb(ZT_Node *node, void *userptr, void *thr, enum ZT_Event event, + const void *payload) +{ + NNI_ARG_UNUSED(node); + NNI_ARG_UNUSED(userptr); + NNI_ARG_UNUSED(thr); + NNI_ARG_UNUSED(payload); + + switch (event) { + case ZT_EVENT_ONLINE: // Connected to the virtual net. + case ZT_EVENT_UP: // Node initialized (may not be connected). + case ZT_EVENT_DOWN: // Teardown of the node. + case ZT_EVENT_OFFLINE: // Removal of the node from the net. + case ZT_EVENT_TRACE: // Local trace events. + // printf("TRACE: %s\n", (const char *) payload); + break; + case ZT_EVENT_REMOTE_TRACE: // Remote trace, not supported. + default: + break; + } +} + +static const char *zt_files[] = { + // clang-format off + NULL, // none, i.e. not used at all + "identity.public", + "identity.secret", + "planet", + "moon.%llx", + NULL, // peer, e.g. peers.d/<ID> -- we don't persist this + "network.%llx", + // clang-format on +}; + +static struct { + size_t len; + void * data; +} zt_ephemeral_state[ZT_STATE_OBJECT_NETWORK_CONFIG + 1]; + +static void +zt_state_put(ZT_Node *node, void *userptr, void *thr, + enum ZT_StateObjectType objtype, const uint64_t objid[2], const void *data, + int len) +{ + zt_node *ztn = userptr; + char * path; + const char *template; + char fname[32]; + + NNI_ARG_UNUSED(node); + NNI_ARG_UNUSED(thr); + NNI_ARG_UNUSED(objid); // only use global files + + if ((objtype > ZT_STATE_OBJECT_NETWORK_CONFIG) || + ((template = zt_files[(int) objtype]) == NULL)) { + return; + } + + (void) snprintf(fname, sizeof(fname), template, + (unsigned long long) objid[0], (unsigned long long) objid[1]); + + // If we have no valid path, then we just use ephemeral data. + // Note that for moons, and so forth, we wind up just storing them + // all in the same place, but it does not matter since we don't + // really persist them anyway. + if (strlen(ztn->zn_path) == 0) { + void * ndata = NULL; + void * odata = zt_ephemeral_state[objtype].data; + size_t olen = zt_ephemeral_state[objtype].len; + if ((len >= 0) && ((ndata = nni_alloc(len)) != NULL)) { + memcpy(ndata, data, len); + zt_ephemeral_state[objtype].data = ndata; + zt_ephemeral_state[objtype].len = len; + } else if (len < 0) { + zt_ephemeral_state[objtype].data = NULL; + zt_ephemeral_state[objtype].len = 0; + } + + if (olen > 0) { + nni_free(odata, olen); + } + return; + } + + if ((path = nni_file_join(ztn->zn_path, fname)) == NULL) { + return; + } + + if (len < 0) { + (void) nni_file_delete(path); + } else { + (void) nni_file_put(path, data, len); + } + nni_strfree(path); +} + +static int +zt_state_get(ZT_Node *node, void *userptr, void *thr, + enum ZT_StateObjectType objtype, const uint64_t objid[2], void *data, + unsigned int len) +{ + zt_node *ztn = userptr; + char * path; + char fname[32]; + const char *template; + size_t sz; + void * buf; + + NNI_ARG_UNUSED(node); + NNI_ARG_UNUSED(thr); + NNI_ARG_UNUSED(objid); // we only use global files + + if ((objtype > ZT_STATE_OBJECT_NETWORK_CONFIG) || + ((template = zt_files[(int) objtype]) == NULL)) { + return (-1); + } + snprintf(fname, sizeof(fname), template, objid[0], objid[1]); + + // If no base directory, we are using ephemeral data. + if (strlen(ztn->zn_path) == 0) { + if (zt_ephemeral_state[objtype].data == NULL) { + return (-1); + } + if (zt_ephemeral_state[objtype].len > len) { + return (-1); + } + len = zt_ephemeral_state[objtype].len; + memcpy(data, zt_ephemeral_state[objtype].data, len); + return (len); + } + + if ((path = nni_file_join(ztn->zn_path, fname)) == NULL) { + return (-1); + } + + if (nni_file_get(path, &buf, &sz) != 0) { + nni_strfree(path); + return (-1); + } + nni_strfree(path); + if (sz > len) { + nni_free(buf, sz); + return (-1); + } + memcpy(data, buf, sz); + nni_free(buf, sz); + return ((int) sz); +} + +typedef struct zt_send_hdr { + nni_sockaddr sa; + size_t len; +} zt_send_hdr; + +// This function is called when ZeroTier desires to send a +// physical frame. The data is a UDP payload, the rest of the +// payload should be set over vanilla UDP. +static int +zt_wire_packet_send(ZT_Node *node, void *userptr, void *thr, int64_t socket, + const struct sockaddr_storage *remaddr, const void *data, unsigned int len, + unsigned int ttl) +{ + nni_aio * aio; + nni_sockaddr addr; + struct sockaddr_in * sin = (void *) remaddr; + struct sockaddr_in6 *sin6 = (void *) remaddr; + zt_node * ztn = userptr; + nni_plat_udp * udp; + uint8_t * buf; + zt_send_hdr * hdr; + nni_iov iov; + + NNI_ARG_UNUSED(node); + NNI_ARG_UNUSED(thr); + NNI_ARG_UNUSED(socket); + NNI_ARG_UNUSED(ttl); + + // Kind of unfortunate, but we have to convert the + // sockaddr to a neutral form, and then back again in + // the platform layer. + switch (sin->sin_family) { + case AF_INET: + addr.s_in.sa_family = NNG_AF_INET; + addr.s_in.sa_port = sin->sin_port; + addr.s_in.sa_addr = sin->sin_addr.s_addr; + udp = ztn->zn_udp4; + break; + case AF_INET6: + addr.s_in6.sa_family = NNG_AF_INET6; + addr.s_in6.sa_port = sin6->sin6_port; + udp = ztn->zn_udp6; + memcpy(addr.s_in6.sa_addr, sin6->sin6_addr.s6_addr, 16); + break; + default: + // No way to understand the address. + return (-1); + } + + if (nni_aio_alloc(&aio, NULL, NULL) != 0) { + // Out of memory + return (-1); + } + if ((buf = nni_alloc(sizeof(*hdr) + len)) == NULL) { + nni_aio_free(aio); + return (-1); + } + + hdr = (void *) buf; + buf += sizeof(*hdr); + + memcpy(buf, data, len); + nni_aio_set_data(aio, 0, hdr); + hdr->sa = addr; + hdr->len = len; + nni_aio_set_input(aio, 0, &hdr->sa); + + iov.iov_buf = buf; + iov.iov_len = len; + nni_aio_set_iov(aio, 1, &iov); + + // This should be non-blocking/best-effort, so while + // not great that we're holding the lock, also not tragic. + nni_plat_udp_send(udp, aio); + + // UDP sending is "fast" on all platforms -- given that its + // best effort only, this will complete immediately, resulting + // in either a message on the wire, or a discarded frame. We don't + // care which. (There may be a few thread context switches, but + // none of them are going to have to wait for some unbounded time.) + nni_aio_wait(aio); + nni_aio_free(aio); + nni_free(hdr, hdr->len + sizeof(*hdr)); + + return (0); +} + +static struct ZT_Node_Callbacks zt_callbacks = { + .version = 0, + .statePutFunction = zt_state_put, + .stateGetFunction = zt_state_get, + .wirePacketSendFunction = zt_wire_packet_send, + .virtualNetworkFrameFunction = zt_virtual_recv, + .virtualNetworkConfigFunction = zt_virtual_config, + .eventCallback = zt_event_cb, + .pathCheckFunction = NULL, + .pathLookupFunction = NULL, +}; + +static void +zt_node_destroy(zt_node *ztn) +{ + nni_aio_stop(ztn->zn_rcv4_aio); + nni_aio_stop(ztn->zn_rcv6_aio); + + // Wait for background thread to exit! + nni_thr_fini(&ztn->zn_bgthr); + + if (ztn->zn_znode != NULL) { + ZT_Node_delete(ztn->zn_znode); + } + + if (ztn->zn_udp4 != NULL) { + nni_plat_udp_close(ztn->zn_udp4); + } + if (ztn->zn_udp6 != NULL) { + nni_plat_udp_close(ztn->zn_udp6); + } + + if (ztn->zn_rcv4_buf != NULL) { + nni_free(ztn->zn_rcv4_buf, zt_rcv_bufsize); + } + if (ztn->zn_rcv6_buf != NULL) { + nni_free(ztn->zn_rcv6_buf, zt_rcv_bufsize); + } + if (ztn->zn_flock != NULL) { + nni_file_unlock(ztn->zn_flock); + } + nni_aio_free(ztn->zn_rcv4_aio); + nni_aio_free(ztn->zn_rcv6_aio); + zt_hash_fini(ztn->zn_eps); + zt_hash_fini(ztn->zn_lpipes); + zt_hash_fini(ztn->zn_rpipes); + nni_cv_fini(&ztn->zn_bgcv); + NNI_FREE_STRUCT(ztn); +} + +static int +zt_node_create(zt_node **ztnp, const char *path) +{ + zt_node * ztn; + nng_sockaddr sa4; + nng_sockaddr sa6; + int rv; + enum ZT_ResultCode zrv; + nni_iov iov; + + // XXX: Right now we depend on having both IPv6 and IPv4 available. + // Probably we should support coping with the lack of either of them. + + // We want to bind to any address we can (for now). + memset(&sa4, 0, sizeof(sa4)); + sa4.s_in.sa_family = NNG_AF_INET; + memset(&sa6, 0, sizeof(sa6)); + sa6.s_in6.sa_family = NNG_AF_INET6; + + if ((ztn = NNI_ALLOC_STRUCT(ztn)) == NULL) { + return (NNG_ENOMEM); + } + NNI_LIST_INIT(&ztn->zn_eplist, zt_ep, ze_link); + NNI_LIST_INIT(&ztn->zn_plist, zt_pipe, zp_link); + nni_cv_init(&ztn->zn_bgcv, &zt_lk); + nni_aio_alloc(&ztn->zn_rcv4_aio, zt_node_rcv4_cb, ztn); + nni_aio_alloc(&ztn->zn_rcv6_aio, zt_node_rcv6_cb, ztn); + + if (((ztn->zn_rcv4_buf = nni_alloc(zt_rcv_bufsize)) == NULL) || + ((ztn->zn_rcv6_buf = nni_alloc(zt_rcv_bufsize)) == NULL)) { + zt_node_destroy(ztn); + return (NNG_ENOMEM); + } + if (((rv = zt_hash_init(&ztn->zn_ports)) != 0) || + ((rv = zt_hash_init(&ztn->zn_eps)) != 0) || + ((rv = zt_hash_init(&ztn->zn_lpipes)) != 0) || + ((rv = zt_hash_init(&ztn->zn_rpipes)) != 0) || + ((rv = nni_thr_init(&ztn->zn_bgthr, zt_bgthr, ztn)) != 0) || + ((rv = nni_plat_udp_open(&ztn->zn_udp4, &sa4)) != 0) || + ((rv = nni_plat_udp_open(&ztn->zn_udp6, &sa6)) != 0)) { + zt_node_destroy(ztn); + return (rv); + } + nni_thr_set_name(&ztn->zn_bgthr, "nng:zt"); + + if (strlen(path) > 0) { + char *lkfile; + if ((lkfile = nni_file_join(path, "lock")) == NULL) { + zt_node_destroy(ztn); + return (NNG_ENOMEM); + } + + if ((rv = nni_file_lock(lkfile, &ztn->zn_flock)) != 0) { + zt_node_destroy(ztn); + nni_strfree(lkfile); + return (rv); + } + nni_strfree(lkfile); + } + + // Setup for dynamic ephemeral port allocations. We + // set the range to allow for ephemeral ports, but not + // higher than the max port, and starting with an + // initial random value. Note that this should give us + // about 8 million possible ephemeral ports. + zt_hash_limits(ztn->zn_ports, zt_ephemeral, zt_max_port, + (nni_random() % (zt_max_port - zt_ephemeral)) + zt_ephemeral); + + nni_strlcpy(ztn->zn_path, path, sizeof(ztn->zn_path)); + zrv = ZT_Node_new(&ztn->zn_znode, ztn, NULL, &zt_callbacks, zt_now()); + if (zrv != ZT_RESULT_OK) { + zt_node_destroy(ztn); + return (zt_result(zrv)); + } + + nni_list_append(&zt_nodes, ztn); + + ztn->zn_self = ZT_Node_address(ztn->zn_znode); + + nni_thr_run(&ztn->zn_bgthr); + + // Schedule an initial background run. + zt_node_resched(ztn, 1); + + // Schedule receive + iov.iov_buf = ztn->zn_rcv4_buf; + iov.iov_len = zt_rcv_bufsize; + nni_aio_set_iov(ztn->zn_rcv4_aio, 1, &iov); + nni_aio_set_input(ztn->zn_rcv4_aio, 0, &ztn->zn_rcv4_addr); + iov.iov_buf = ztn->zn_rcv6_buf; + iov.iov_len = zt_rcv_bufsize; + nni_aio_set_iov(ztn->zn_rcv6_aio, 1, &iov); + nni_aio_set_input(ztn->zn_rcv6_aio, 0, &ztn->zn_rcv6_addr); + + nni_plat_udp_recv(ztn->zn_udp4, ztn->zn_rcv4_aio); + nni_plat_udp_recv(ztn->zn_udp6, ztn->zn_rcv6_aio); + + *ztnp = ztn; + return (0); +} + +static int +zt_walk_moons(const char *path, void *arg) +{ + zt_node * ztn = arg; + const char *bn = nni_file_basename(path); + char * end; + uint64_t moonid; + + if (strncmp(bn, "moon.", 5) != 0) { + return (NNI_FILE_WALK_CONTINUE); + } + if (((moonid = (uint64_t) strtoull(bn + 5, &end, 16)) != 0) && + (*end == '\0')) { + ZT_Node_orbit(ztn->zn_znode, NULL, moonid, 0); + } + return (NNI_FILE_WALK_CONTINUE); +} + +static int +zt_node_find(zt_ep *ep) +{ + zt_node * ztn; + int rv; + ZT_VirtualNetworkConfig *cf; + + NNI_LIST_FOREACH (&zt_nodes, ztn) { + if (strcmp(ep->ze_home, ztn->zn_path) == 0) { + goto done; + } + } + + // We didn't find a node, so make one. And try to + // initialize it. + if ((rv = zt_node_create(&ztn, ep->ze_home)) != 0) { + return (rv); + } + + // Load moons + if (strlen(ep->ze_home) != 0) { + (void) nni_file_walk(ep->ze_home, zt_walk_moons, ztn, + NNI_FILE_WALK_FILES_ONLY | NNI_FILE_WALK_SHALLOW); + } + +done: + + ep->ze_ztn = ztn; + if (nni_list_node_active(&ep->ze_link)) { + nni_list_node_remove(&ep->ze_link); + } + nni_list_append(&ztn->zn_eplist, ep); + + (void) ZT_Node_join(ztn->zn_znode, ep->ze_nwid, ztn, NULL); + + if ((cf = ZT_Node_networkConfig(ztn->zn_znode, ep->ze_nwid)) != NULL) { + NNI_ASSERT(cf->nwid == ep->ze_nwid); + ep->ze_mtu = cf->mtu; + ZT_Node_freeQueryResult(ztn->zn_znode, cf); + } + + return (0); +} + +static int +zt_tran_init(void) +{ + nni_mtx_init(&zt_lk); + NNI_LIST_INIT(&zt_nodes, zt_node, zn_link); + return (0); +} + +static void +zt_tran_fini(void) +{ + zt_node *ztn; + + nni_mtx_lock(&zt_lk); + while ((ztn = nni_list_first(&zt_nodes)) != 0) { + nni_list_remove(&zt_nodes, ztn); + ztn->zn_closed = true; + nni_cv_wake(&ztn->zn_bgcv); + nni_mtx_unlock(&zt_lk); + + zt_node_destroy(ztn); + + nni_mtx_lock(&zt_lk); + } + nni_mtx_unlock(&zt_lk); + + for (int i = 0; i <= ZT_STATE_OBJECT_NETWORK_CONFIG; i++) { + if (zt_ephemeral_state[i].len > 0) { + nni_free(zt_ephemeral_state[i].data, + zt_ephemeral_state[i].len); + } + } + NNI_ASSERT(nni_list_empty(&zt_nodes)); + nni_mtx_fini(&zt_lk); +} + +static int +zt_check_recvmaxsz(const void *v, size_t sz, nni_type t) +{ + return (nni_copyin_size(NULL, v, sz, 0, NNI_MAXSZ, t)); +} + +static int +zt_check_orbit(const void *v, size_t sz, nni_type t) +{ + NNI_ARG_UNUSED(v); + if ((t != NNI_TYPE_UINT64) && (t != NNI_TYPE_OPAQUE)) { + return (NNG_EBADTYPE); + } + if (sz != sizeof(uint64_t) && sz != sizeof(uint64_t) * 2) { + return (NNG_EINVAL); + } + return (0); +} + +static int +zt_check_deorbit(const void *v, size_t sz, nni_type t) +{ + return (nni_copyin_u64(NULL, v, sz, t)); +} + +static int +zt_check_string(const void *v, size_t sz, nni_type t) +{ + size_t len; + + if ((t != NNI_TYPE_OPAQUE) && (t != NNI_TYPE_STRING)) { + return (NNG_EBADTYPE); + } + len = nni_strnlen(v, sz); + if ((len >= sz) || (len >= NNG_MAXADDRLEN)) { + return (NNG_EINVAL); + } + return (0); +} + +static int +zt_check_time(const void *v, size_t sz, nni_type t) +{ + return (nni_copyin_ms(NULL, v, sz, t)); +} + +static int +zt_check_tries(const void *v, size_t sz, nni_type t) +{ + return (nni_copyin_int(NULL, v, sz, 0, 1000000, t)); +} + +static void +zt_pipe_close(void *arg) +{ + zt_pipe *p = arg; + nni_aio *aio; + + nni_mtx_lock(&zt_lk); + p->zp_closed = true; + nni_aio_close(p->zp_ping_aio); + if ((aio = p->zp_user_rxaio) != NULL) { + p->zp_user_rxaio = NULL; + nni_aio_finish_error(aio, NNG_ECLOSED); + } + zt_pipe_send_disc_req(p); + nni_mtx_unlock(&zt_lk); +} + +static int +zt_pipe_init(void *arg, nni_pipe *npipe) +{ + zt_pipe *p = arg; + p->zp_npipe = npipe; + return (0); +} + +static void +zt_pipe_fini(void *arg) +{ + zt_pipe *p = arg; + zt_node *ztn = p->zp_ztn; + + nni_aio_free(p->zp_ping_aio); + + // This tosses the connection details and all state. + nni_mtx_lock(&zt_lk); + zt_hash_remove(ztn->zn_ports, p->zp_laddr & zt_port_mask); + zt_hash_remove(ztn->zn_lpipes, p->zp_laddr); + zt_hash_remove(ztn->zn_rpipes, p->zp_raddr); + nni_mtx_unlock(&zt_lk); + + for (int i = 0; i < zt_recvq; i++) { + zt_fraglist_free(&p->zp_recvq[i]); + } + nni_free(p->zp_send_buf, ZT_MAX_MTU); + NNI_FREE_STRUCT(p); +} + +static void +zt_pipe_reap(zt_pipe *p) +{ + if (!nni_atomic_flag_test_and_set(&p->zp_reaped)) { + nni_reap(&p->zp_reap, zt_pipe_fini, p); + } +} + +static int +zt_pipe_alloc( + zt_pipe **pipep, zt_ep *ep, uint64_t raddr, uint64_t laddr, bool listener) +{ + zt_pipe *p; + int rv; + zt_node *ztn = ep->ze_ztn; + int i; + size_t maxfrag; + size_t maxfrags = 0; + + if ((p = NNI_ALLOC_STRUCT(p)) == NULL) { + return (NNG_ENOMEM); + } + if ((p->zp_send_buf = nni_alloc(ZT_MAX_MTU)) == NULL) { + NNI_FREE_STRUCT(p); + return (NNG_ENOMEM); + } + p->zp_ztn = ztn; + p->zp_raddr = raddr; + p->zp_laddr = laddr; + p->zp_proto = ep->ze_proto; + p->zp_nwid = ep->ze_nwid; + p->zp_mtu = ep->ze_mtu; + p->zp_rcvmax = ep->ze_rcvmax; + p->zp_ping_tries = ep->ze_ping_tries; + p->zp_ping_time = ep->ze_ping_time; + p->zp_next_msgid = (uint16_t) nni_random(); + p->zp_ping_try = 0; + nni_atomic_flag_reset(&p->zp_reaped); + + if (listener) { + // listener + rv = zt_hash_insert(ztn->zn_rpipes, raddr, p); + } else { + // dialer + rv = zt_hash_insert(ztn->zn_lpipes, laddr, p); + } + if ((rv != 0) || + ((rv = nni_aio_alloc(&p->zp_ping_aio, zt_pipe_ping_cb, p)) != 0)) { + zt_pipe_reap(p); + return (rv); + } + + // The largest fragment we can accept on this pipe. The MTU is + // configurable by the network administrator. Probably ZT would + // pass a larger one (up to MAXMTU), but we honor the network + // administration's configuration. + maxfrag = p->zp_mtu - zt_offset_data_data; + + // The largest fragment count we can accept on this pipe. + // This is rounded up to account for alignment. + if (p->zp_rcvmax > 0) { + maxfrags = (p->zp_rcvmax + (maxfrag - 1)) / maxfrag; + } + + if ((maxfrags > 0xffff) || (maxfrags == 0)) { + maxfrags = 0xffff; + } + + for (i = 0; i < zt_recvq; i++) { + zt_fraglist *fl = &p->zp_recvq[i]; + fl->fl_time = NNI_TIME_ZERO; + fl->fl_msgid = 0; + fl->fl_ready = 0; + fl->fl_missingsz = (maxfrags + 7) / 8; + fl->fl_missing = nni_alloc(fl->fl_missingsz); + if (fl->fl_missing == NULL) { + zt_pipe_reap(p); + return (NNG_ENOMEM); + } + } + + *pipep = p; + return (0); +} + +static void +zt_pipe_send(void *arg, nni_aio *aio) +{ + // As we are sending UDP, and there is no callback to worry + // about, we just go ahead and send out a stream of messages + // synchronously. + zt_pipe *p = arg; + uint8_t *data = p->zp_send_buf; + size_t offset; + uint16_t id; + uint16_t nfrags; + uint16_t fragno; + size_t fragsz; + size_t bytes; + nni_msg *m; + + if (nni_aio_begin(aio) != 0) { + return; + } + if ((m = nni_aio_get_msg(aio)) == NULL) { + nni_aio_finish_error(aio, NNG_EINVAL); + return; + } + + nni_mtx_lock(&zt_lk); + + if (p->zp_closed) { + nni_mtx_unlock(&zt_lk); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + + fragsz = p->zp_mtu - zt_offset_data_data; + NNI_ASSERT(fragsz < 0x10000); // Because zp_mtu is 16 bits + + bytes = nni_msg_header_len(m) + nni_msg_len(m); + if (bytes >= (0xfffe * fragsz)) { + nni_aio_finish_error(aio, NNG_EMSGSIZE); + nni_mtx_unlock(&zt_lk); + return; + } + // above check means nfrags will fit in 16-bits. + nfrags = (uint16_t)((bytes + (fragsz - 1)) / fragsz); + + // get the next message ID, but skip 0 + if ((id = p->zp_next_msgid++) == 0) { + id = p->zp_next_msgid++; + } + + offset = 0; + fragno = 0; + do { + uint8_t *dest = data + zt_offset_data_data; + size_t room = fragsz; + size_t fraglen = 0; + size_t len; + + // Prepend the header first. + if ((len = nni_msg_header_len(m)) > 0) { + if (len > fragsz) { + // This shouldn't happen! SP headers are + // supposed to be quite small. + nni_aio_finish_error(aio, NNG_EMSGSIZE); + nni_mtx_unlock(&zt_lk); + return; + } + memcpy(dest, nni_msg_header(m), len); + dest += len; + room -= len; + offset += len; + fraglen += len; + nni_msg_header_clear(m); + } + + len = nni_msg_len(m); + if (len > room) { + len = room; + } + memcpy(dest, nni_msg_body(m), len); + + nng_msg_trim(m, len); + NNI_PUT16(data + zt_offset_data_id, id); + NNI_PUT16(data + zt_offset_data_fragsz, (uint16_t) fragsz); + NNI_PUT16(data + zt_offset_data_frag, fragno); + NNI_PUT16(data + zt_offset_data_nfrag, nfrags); + offset += len; + fraglen += len; + fragno++; + zt_send(p->zp_ztn, p->zp_nwid, zt_op_data, p->zp_raddr, + p->zp_laddr, data, fraglen + zt_offset_data_data); + } while (nni_msg_len(m) != 0); + nni_mtx_unlock(&zt_lk); + + // NB, We never bothered to call nn_aio_sched, because we run this + // synchronously, relying on UDP to simply discard messages if we + // cannot deliver them. This means that pipe send operations with + // this transport are not cancellable. + + nni_aio_set_msg(aio, NULL); + nni_msg_free(m); + nni_aio_finish(aio, 0, offset); +} + +static void +zt_pipe_cancel_recv(nni_aio *aio, void *arg, int rv) +{ + zt_pipe *p = arg; + nni_mtx_lock(&zt_lk); + if (p->zp_user_rxaio == aio) { + p->zp_user_rxaio = NULL; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&zt_lk); +} + +static void +zt_fraglist_clear(zt_fraglist *fl) +{ + nni_msg *msg; + + fl->fl_ready = 0; + fl->fl_msgid = 0; + fl->fl_time = NNI_TIME_ZERO; + if ((msg = fl->fl_msg) != NULL) { + fl->fl_msg = NULL; + nni_msg_free(msg); + } + memset(fl->fl_missing, 0, fl->fl_missingsz); +} + +static void +zt_fraglist_free(zt_fraglist *fl) +{ + zt_fraglist_clear(fl); + nni_free(fl->fl_missing, fl->fl_missingsz); + fl->fl_missing = NULL; +} + +static void +zt_pipe_dorecv(zt_pipe *p) +{ + nni_aio *aio = p->zp_user_rxaio; + nni_time now = nni_clock(); + + if (aio == NULL) { + return; + } + + for (int i = 0; i < zt_recvq; i++) { + zt_fraglist *fl = &p->zp_recvq[i]; + nni_msg * msg; + + if (now > (fl->fl_time + zt_recv_stale)) { + // fragment list is stale, clean it. + zt_fraglist_clear(fl); + continue; + } + if (!fl->fl_ready) { + continue; + } + + // Got data. Let's pass it up. + msg = fl->fl_msg; + fl->fl_msg = NULL; + NNI_ASSERT(msg != NULL); + + p->zp_user_rxaio = NULL; + nni_aio_finish_msg(aio, msg); + zt_fraglist_clear(fl); + return; + } +} + +static void +zt_pipe_recv(void *arg, nni_aio *aio) +{ + zt_pipe *p = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&zt_lk); + if (p->zp_closed) { + nni_mtx_unlock(&zt_lk); + nni_aio_finish_error(aio, NNG_ECLOSED); + return; + } + if ((rv = nni_aio_schedule(aio, zt_pipe_cancel_recv, p)) != 0) { + nni_mtx_unlock(&zt_lk); + nni_aio_finish_error(aio, rv); + return; + } + p->zp_user_rxaio = aio; + zt_pipe_dorecv(p); + nni_mtx_unlock(&zt_lk); +} + +static uint16_t +zt_pipe_peer(void *arg) +{ + zt_pipe *pipe = arg; + + return (pipe->zp_peer); +} + +static int +zt_get_nw_status(zt_node *ztn, uint64_t nwid, int *statusp) +{ + ZT_VirtualNetworkConfig *vcfg; + int status; + + vcfg = ZT_Node_networkConfig(ztn->zn_znode, nwid); + if (vcfg == NULL) { + return (NNG_ECLOSED); + } + switch (vcfg->status) { + case ZT_NETWORK_STATUS_REQUESTING_CONFIGURATION: + status = NNG_ZT_STATUS_CONFIG; + break; + case ZT_NETWORK_STATUS_OK: + status = NNG_ZT_STATUS_UP; + break; + case ZT_NETWORK_STATUS_ACCESS_DENIED: + status = NNG_ZT_STATUS_DENIED; + break; + case ZT_NETWORK_STATUS_NOT_FOUND: + status = NNG_ZT_STATUS_NOTFOUND; + break; + case ZT_NETWORK_STATUS_PORT_ERROR: + status = NNG_ZT_STATUS_ERROR; + break; + case ZT_NETWORK_STATUS_CLIENT_TOO_OLD: + status = NNG_ZT_STATUS_OBSOLETE; + break; + default: + status = NNG_ZT_STATUS_UNKNOWN; + break; + } + ZT_Node_freeQueryResult(ztn->zn_znode, vcfg); + + *statusp = status; + return (0); +} + +static int +zt_get_nw_name(zt_node *ztn, uint64_t nwid, void *buf, size_t *szp, nni_type t) +{ + ZT_VirtualNetworkConfig *vcfg; + int rv; + + vcfg = ZT_Node_networkConfig(ztn->zn_znode, nwid); + if (vcfg == NULL) { + return (NNG_ECLOSED); + } + + rv = nni_copyout_str(vcfg->name, buf, szp, t); + ZT_Node_freeQueryResult(ztn->zn_znode, vcfg); + + return (rv); +} + +static int +zt_pipe_get_recvmaxsz(void *arg, void *buf, size_t *szp, nni_type t) +{ + zt_pipe *p = arg; + return (nni_copyout_size(p->zp_rcvmax, buf, szp, t)); +} + +static int +zt_pipe_get_nwid(void *arg, void *buf, size_t *szp, nni_type t) +{ + zt_pipe *p = arg; + return (nni_copyout_u64(p->zp_nwid, buf, szp, t)); +} + +static int +zt_pipe_get_node(void *arg, void *buf, size_t *szp, nni_type t) +{ + zt_pipe *p = arg; + return (nni_copyout_u64(p->zp_laddr >> 24, buf, szp, t)); +} + +static void +zt_pipe_ping_cb(void *arg) +{ + zt_pipe *p = arg; + nni_aio *aio = p->zp_ping_aio; + int rv; + + if ((rv = nni_aio_result(aio)) != 0) { + // We were canceled. That means we're done. + return; + } + nni_mtx_lock(&zt_lk); + if (p->zp_closed || aio == NULL || (p->zp_ping_tries == 0) || + (p->zp_ping_time == NNG_DURATION_INFINITE) || + (p->zp_ping_time == NNG_DURATION_ZERO)) { + nni_mtx_unlock(&zt_lk); + return; + } + if (p->zp_ping_try > p->zp_ping_tries) { + // Ping count exceeded; the other side is AFK. + // Close the pipe, but no need to send a reason to the peer. + zt_pipe_close_err(p, NNG_ECLOSED, 0, NULL); + nni_mtx_unlock(&zt_lk); + return; + } + + if (nni_clock() > (p->zp_last_recv + p->zp_ping_time)) { + p->zp_ping_try++; + zt_pipe_send_ping(p); + } + + nni_sleep_aio(p->zp_ping_time, aio); // Schedule a recheck. + nni_mtx_unlock(&zt_lk); +} + +static void +zt_pipe_start_ping(zt_pipe *p) +{ + // send a gratuitous ping, and start the ping interval timer. + if ((p->zp_ping_tries > 0) && (p->zp_ping_time != NNG_DURATION_ZERO) && + (p->zp_ping_time != NNG_DURATION_INFINITE)) { + p->zp_ping_try = 0; + zt_pipe_send_ping(p); + nni_sleep_aio(p->zp_ping_time, p->zp_ping_aio); + } +} + +static void +zt_ep_fini(void *arg) +{ + zt_ep *ep = arg; + nni_aio_stop(ep->ze_creq_aio); + nni_aio_free(ep->ze_creq_aio); + NNI_FREE_STRUCT(ep); +} + +static int +zt_parsehex(const char **sp, uint64_t *valp, bool wildok) +{ + int n; + const char *s = *sp; + char c; + uint64_t v; + + if (wildok && *s == '*') { + *valp = 0; + s++; + *sp = s; + return (0); + } + + for (v = 0, n = 0; (n < 16) && isxdigit(c = tolower(*s)); n++, s++) { + v *= 16; + if (isdigit(c)) { + v += (c - '0'); + } else { + v += ((c - 'a') + 10); + } + } + + *sp = s; + *valp = v; + return (n ? 0 : NNG_EINVAL); +} + +static int +zt_parsedec(const char **sp, uint64_t *valp) +{ + int n; + const char *s = *sp; + char c; + uint64_t v; + + for (v = 0, n = 0; (n < 20) && isdigit(c = *s); n++, s++) { + v *= 10; + v += (c - '0'); + } + *sp = s; + *valp = v; + return (n ? 0 : NNG_EINVAL); +} + +static int +zt_ep_init(void **epp, nni_url *url, nni_sock *sock, nni_dialer *ndialer, + nni_listener *nlistener) +{ + zt_ep * ep; + uint64_t node; + uint64_t port; + int rv; + const char *h; + + if ((ep = NNI_ALLOC_STRUCT(ep)) == NULL) { + return (NNG_ENOMEM); + } + + ep->ze_mtu = ZT_MIN_MTU; + ep->ze_aio = NULL; + ep->ze_ping_tries = zt_ping_tries; + ep->ze_ping_time = zt_ping_time; + ep->ze_conn_time = zt_conn_time; + ep->ze_conn_tries = zt_conn_tries; + ep->ze_proto = nni_sock_proto_id(sock); + ep->ze_ndialer = ndialer; + ep->ze_nlistener = nlistener; + + nni_aio_list_init(&ep->ze_aios); + + rv = nni_aio_alloc(&ep->ze_creq_aio, zt_ep_conn_req_cb, ep); + if (rv != 0) { + zt_ep_fini(ep); + return (rv); + } + + // Our URL format is: + // + // zt://<nodeid>.<nwid>:<port> + // + // The port must be specified, but may be zero. The nodeid + // may be '*' to refer to ourself. There may be a trailing slash + // which will be ignored. + + h = url->u_hostname; + if (((strlen(url->u_path) == 1) && (url->u_path[0] != '/')) || + (strlen(url->u_path) > 1) || (url->u_fragment != NULL) || + (url->u_query != NULL) || (url->u_userinfo != NULL) || + (zt_parsehex(&h, &node, true) != 0) || (*h++ != '.') || + (zt_parsehex(&h, &ep->ze_nwid, false) != 0) || + (node > 0xffffffffffull)) { + return (NNG_EADDRINVAL); + } + h = url->u_port; + if ((zt_parsedec(&h, &port) != 0) || (port > zt_max_port)) { + return (NNG_EADDRINVAL); + } + + // Parse the URL. + if (nlistener != NULL) { + // listener + ep->ze_laddr = node; + ep->ze_laddr <<= 24; + ep->ze_laddr |= port; + ep->ze_raddr = 0; + ep->ze_nlistener = nlistener; + } else { + // dialer + if (port == 0) { + return (NNG_EADDRINVAL); + } + ep->ze_raddr = node; + ep->ze_raddr <<= 24; + ep->ze_raddr |= port; + ep->ze_laddr = 0; + ep->ze_ndialer = ndialer; + } + + *epp = ep; + return (0); +} + +static int +zt_dialer_init(void **epp, nni_url *url, nni_dialer *d) +{ + return (zt_ep_init(epp, url, nni_dialer_sock(d), d, NULL)); +} + +static int +zt_listener_init(void **epp, nni_url *url, nni_listener *l) +{ + return (zt_ep_init(epp, url, nni_listener_sock(l), NULL, l)); +} + +static void +zt_ep_close(void *arg) +{ + zt_ep * ep = arg; + zt_node *ztn; + nni_aio *aio; + + nni_aio_abort(ep->ze_creq_aio, NNG_ECLOSED); + + // Cancel any outstanding user operation(s) - they should have + // been aborted by the above cancellation, but we need to be + // sure, as the cancellation callback may not have run yet. + + nni_mtx_lock(&zt_lk); + while ((aio = nni_list_first(&ep->ze_aios)) != NULL) { + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, NNG_ECLOSED); + } + + // Endpoint framework guarantees to only call us once, + // and to not call other things while we are closed. + ztn = ep->ze_ztn; + // If we're on the ztn node list, pull us off. + if (ztn != NULL) { + nni_list_node_remove(&ep->ze_link); + zt_hash_remove(ztn->zn_ports, ep->ze_laddr & zt_port_mask); + zt_hash_remove(ztn->zn_eps, ep->ze_laddr); + } + + nni_mtx_unlock(&zt_lk); +} + +static int +zt_ep_bind_locked(zt_ep *ep) +{ + int rv; + uint64_t port; + uint64_t node; + zt_node *ztn; + + // If we haven't already got a ZT node, get one. + if ((ztn = ep->ze_ztn) == NULL) { + if ((rv = zt_node_find(ep)) != 0) { + return (rv); + } + ztn = ep->ze_ztn; + } + + node = ep->ze_laddr >> 24; + if ((node != 0) && (node != ztn->zn_self)) { + // User requested node id, but it doesn't match our + // own. + return (NNG_EADDRINVAL); + } + + if ((ep->ze_laddr & zt_port_mask) == 0) { + // ask for an ephemeral port + if ((rv = zt_hash_alloc(ztn->zn_ports, &port, ep)) != 0) { + return (rv); + } + NNI_ASSERT(port & zt_ephemeral); + } else { + void *conflict; + // make sure port requested is free. + port = ep->ze_laddr & zt_port_mask; + + if (zt_hash_find(ztn->zn_ports, port, &conflict) == 0) { + return (NNG_EADDRINUSE); + } + if ((rv = zt_hash_insert(ztn->zn_ports, port, ep)) != 0) { + return (rv); + } + } + NNI_ASSERT(port <= zt_max_port); + NNI_ASSERT(port > 0); + + ep->ze_laddr = ztn->zn_self; + ep->ze_laddr <<= 24; + ep->ze_laddr |= port; + ep->ze_running = true; + + if ((rv = zt_hash_insert(ztn->zn_eps, ep->ze_laddr, ep)) != 0) { + zt_hash_remove(ztn->zn_ports, port); + return (rv); + } + + return (0); +} + +static int +zt_ep_bind(void *arg) +{ + int rv; + zt_ep *ep = arg; + + nni_mtx_lock(&zt_lk); + rv = zt_ep_bind_locked(ep); + nni_mtx_unlock(&zt_lk); + + return (rv); +} + +static void +zt_ep_cancel(nni_aio *aio, void *arg, int rv) +{ + zt_ep *ep = arg; + + nni_mtx_lock(&zt_lk); + if (nni_aio_list_active(aio)) { + if (ep->ze_aio != NULL) { + nni_aio_abort(ep->ze_aio, rv); + } + nni_aio_list_remove(aio); + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&zt_lk); +} + +static void +zt_ep_doaccept(zt_ep *ep) +{ + // Call with ep lock held. + nni_time now; + zt_pipe *p; + int rv; + + now = nni_clock(); + // Consume any timedout connect requests. + while (ep->ze_creq_tail != ep->ze_creq_head) { + zt_creq creq; + nni_aio *aio; + + creq = ep->ze_creqs[ep->ze_creq_tail % zt_listenq]; + // Discard old connection requests. + if (creq.cr_expire < now) { + ep->ze_creq_tail++; + continue; + } + + if ((aio = nni_list_first(&ep->ze_aios)) == NULL) { + // No outstanding accept. We're done. + break; + } + + // We have both conn request, and a place to accept it. + + // Advance the tail. + ep->ze_creq_tail++; + + // We remove this AIO. This keeps it from being canceled. + nni_aio_list_remove(aio); + + rv = zt_pipe_alloc(&p, ep, creq.cr_raddr, ep->ze_laddr, true); + if (rv != 0) { + zt_send_err(ep->ze_ztn, ep->ze_nwid, creq.cr_raddr, + ep->ze_laddr, zt_err_unknown, + "Failed creating pipe"); + nni_aio_finish_error(aio, rv); + continue; + } + p->zp_peer = creq.cr_proto; + zt_pipe_send_conn_ack(p); + zt_pipe_start_ping(p); + nni_aio_set_output(aio, 0, p); + nni_aio_finish(aio, 0, 0); + } +} + +static void +zt_ep_accept(void *arg, nni_aio *aio) +{ + zt_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + nni_mtx_lock(&zt_lk); + if ((rv = nni_aio_schedule(aio, zt_ep_cancel, ep)) != 0) { + nni_mtx_unlock(&zt_lk); + nni_aio_finish_error(aio, rv); + return; + } + nni_aio_list_append(&ep->ze_aios, aio); + zt_ep_doaccept(ep); + nni_mtx_unlock(&zt_lk); +} + +static void +zt_ep_conn_req_cancel(nni_aio *aio, void *arg, int rv) +{ + zt_ep *ep = arg; + // We don't have much to do here. The AIO will have been + // canceled as a result of the "parent" AIO canceling. + nni_mtx_lock(&zt_lk); + if (ep->ze_creq_active) { + ep->ze_creq_active = false; + nni_aio_finish_error(aio, rv); + } + nni_mtx_unlock(&zt_lk); +} + +static void +zt_ep_conn_req_cb(void *arg) +{ + zt_ep * ep = arg; + zt_pipe *p; + nni_aio *aio = ep->ze_creq_aio; + nni_aio *uaio; + int rv; + + nni_mtx_lock(&zt_lk); + + ep->ze_creq_active = false; + switch ((rv = nni_aio_result(aio))) { + case 0: + p = nni_aio_get_output(aio, 0); + // Already canceled, or already handled? + if ((uaio = nni_list_first(&ep->ze_aios)) != NULL) { + nni_aio_list_remove(uaio); + zt_pipe_start_ping(p); + nni_aio_set_output(uaio, 0, p); + nni_aio_finish(uaio, 0, 0); + } else { + // We have a pipe, but nowhere to stick it. + // Just discard it. + zt_pipe_fini(p); + } + ep->ze_creq_try = 0; + break; + + case NNG_ETIMEDOUT: + if ((ep->ze_creq_try > ep->ze_conn_tries) && + (ep->ze_conn_tries > 0)) { + // Final timeout attempt. + if ((uaio = nni_list_first(&ep->ze_aios)) != NULL) { + nni_aio_list_remove(uaio); + nni_aio_finish_error(uaio, rv); + // reset the counter. + ep->ze_creq_try = 0; + } + } + break; + + default: + // Failed hard? + if ((uaio = nni_list_first(&ep->ze_aios)) != NULL) { + nni_aio_list_remove(uaio); + nni_aio_finish_error(uaio, rv); + } + ep->ze_creq_try = 0; + break; + } + + if (nni_list_first(&ep->ze_aios) != NULL) { + nni_aio_set_timeout(aio, ep->ze_conn_time); + if (nni_aio_begin(aio) == 0) { + rv = nni_aio_schedule(aio, zt_ep_conn_req_cancel, ep); + if (rv != 0) { + nni_aio_finish_error(aio, rv); + } else { + ep->ze_creq_active = true; + ep->ze_creq_try++; + zt_ep_send_conn_req(ep); + } + } + } + + nni_mtx_unlock(&zt_lk); +} + +static void +zt_ep_connect(void *arg, nni_aio *aio) +{ + zt_ep *ep = arg; + int rv; + + if (nni_aio_begin(aio) != 0) { + return; + } + // We bind locally. We'll use the address later when we give + // it to the pipe, but this allows us to receive the initial + // ack back from the server. (This gives us an ephemeral + // address to work with.) + nni_mtx_lock(&zt_lk); + + // Clear the port so we get an ephemeral port. + ep->ze_laddr &= ~((uint64_t) zt_port_mask); + + if ((rv = zt_ep_bind_locked(ep)) != 0) { + nni_aio_finish_error(aio, rv); + nni_mtx_unlock(&zt_lk); + return; + } + + if ((ep->ze_raddr >> 24) == 0) { + ep->ze_raddr |= (ep->ze_ztn->zn_self << zt_port_shift); + } + if ((rv = nni_aio_schedule(aio, zt_ep_cancel, ep)) != 0) { + nni_aio_finish_error(aio, rv); + nni_mtx_unlock(&zt_lk); + return; + } + nni_aio_list_append(&ep->ze_aios, aio); + ep->ze_running = true; + + nni_aio_set_timeout(ep->ze_creq_aio, ep->ze_conn_time); + if (nni_aio_begin(ep->ze_creq_aio) == 0) { + rv = nni_aio_schedule( + ep->ze_creq_aio, zt_ep_conn_req_cancel, ep); + if (rv != 0) { + nni_aio_finish_error(ep->ze_creq_aio, rv); + } else { + // Send out the first connect message; if not + // yet attached to network message will be dropped. + ep->ze_creq_try = 1; + ep->ze_creq_active = true; + zt_ep_send_conn_req(ep); + } + } + nni_mtx_unlock(&zt_lk); +} + +static int +zt_ep_set_recvmaxsz(void *arg, const void *data, size_t sz, nni_type t) +{ + zt_ep *ep = arg; + size_t val; + int rv; + + if ((rv = nni_copyin_size(&val, data, sz, 0, NNI_MAXSZ, t)) == 0) { + nni_mtx_lock(&zt_lk); + ep->ze_rcvmax = val; + nni_mtx_unlock(&zt_lk); + } + return (rv); +} + +static int +zt_ep_get_recvmaxsz(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + nni_mtx_lock(&zt_lk); + rv = nni_copyout_size(ep->ze_rcvmax, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_set_home(void *arg, const void *data, size_t sz, nni_type t) +{ + int rv; + zt_ep *ep = arg; + + if ((rv = zt_check_string(data, sz, t)) == 0) { + nni_mtx_lock(&zt_lk); + if (ep->ze_running) { + rv = NNG_ESTATE; + } else { + nni_strlcpy(ep->ze_home, data, sizeof(ep->ze_home)); + if ((rv = zt_node_find(ep)) != 0) { + ep->ze_ztn = NULL; + } + } + nni_mtx_unlock(&zt_lk); + } + + return (rv); +} + +static int +zt_ep_get_home(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + rv = nni_copyout_str(ep->ze_home, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_get_url(void *arg, void *data, size_t *szp, nni_type t) +{ + char ustr[64]; // more than plenty + zt_ep * ep = arg; + uint64_t addr; + + nni_mtx_lock(&zt_lk); + addr = ep->ze_nlistener != NULL ? ep->ze_laddr : ep->ze_raddr; + snprintf(ustr, sizeof(ustr), "zt://%llx.%llx:%u", + (unsigned long long) addr >> zt_port_shift, + (unsigned long long) ep->ze_nwid, + (unsigned) (addr & zt_port_mask)); + nni_mtx_unlock(&zt_lk); + return (nni_copyout_str(ustr, data, szp, t)); +} + +static int +zt_ep_set_orbit(void *arg, const void *data, size_t sz, nni_type t) +{ + uint64_t moonid; + uint64_t peerid; + zt_ep * ep = arg; + int rv; + enum ZT_ResultCode zrv; + + if ((t != NNI_TYPE_UINT64) && (t != NNI_TYPE_OPAQUE)) { + return (NNG_EBADTYPE); + } + if (sz == sizeof(uint64_t)) { + memcpy(&moonid, data, sizeof(moonid)); + peerid = 0; + } else if (sz == sizeof(uint64_t) * 2) { + memcpy(&moonid, data, sizeof(moonid)); + memcpy(&peerid, ((char *) data) + sizeof(uint64_t), + sizeof(peerid)); + } else { + return (NNG_EINVAL); + } + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + zrv = ZT_Node_orbit(ep->ze_ztn->zn_znode, NULL, moonid, peerid); + nni_mtx_unlock(&zt_lk); + + return (zt_result(zrv)); +} + +static int +zt_ep_set_deorbit(void *arg, const void *data, size_t sz, nni_type t) +{ + uint64_t moonid; + zt_ep * ep = arg; + int rv; + + if ((rv = nni_copyin_u64(&moonid, data, sz, t)) == 0) { + enum ZT_ResultCode zrv; + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + zrv = ZT_Node_deorbit(ep->ze_ztn->zn_znode, NULL, moonid); + nni_mtx_unlock(&zt_lk); + rv = zt_result(zrv); + } + return (rv); +} + +static int +zt_ep_set_add_local_addr(void *arg, const void *data, size_t sz, nni_type t) +{ + nng_sockaddr sa; + zt_ep * ep = arg; + int rv; + + if ((rv = nni_copyin_sockaddr(&sa, data, sz, t)) == 0) { + enum ZT_ResultCode zrv; + zt_node * ztn; + struct sockaddr_storage ss; + struct sockaddr_in * sin; + struct sockaddr_in6 * sin6; + + memset(&ss, 0, sizeof(ss)); + switch (sa.s_family) { + case NNG_AF_INET: + sin = (void *) &ss; + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = sa.s_in.sa_addr; + sin->sin_port = 0; + break; + case NNG_AF_INET6: + sin6 = (void *) &ss; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + memcpy(&sin6->sin6_addr, sa.s_in6.sa_addr, 16); + break; + default: + return (NNG_EINVAL); + } + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + ztn = ep->ze_ztn; + zrv = ZT_Node_addLocalInterfaceAddress(ztn->zn_znode, &ss); + nni_mtx_unlock(&zt_lk); + rv = zt_result(zrv); + } + return (rv); +} + +static int +zt_ep_set_clear_local_addrs(void *arg, const void *data, size_t sz, nni_type t) +{ + zt_ep *ep = arg; + int rv; + NNI_ARG_UNUSED(data); + NNI_ARG_UNUSED(sz); + NNI_ARG_UNUSED(t); + + ZT_Node *zn; + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + zn = ep->ze_ztn; + ZT_Node_clearLocalInterfaceAddresses(zn); + nni_mtx_unlock(&zt_lk); + return (0); +} + +static int +zt_ep_get_node(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + + rv = nni_copyout_u64(ep->ze_ztn->zn_self, data, szp, t); + + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_get_nwid(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + rv = nni_copyout_u64(ep->ze_nwid, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_get_nw_name(void *arg, void *buf, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + rv = zt_get_nw_name(ep->ze_ztn, ep->ze_nwid, buf, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_get_nw_status(void *arg, void *buf, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + int status; + + nni_mtx_lock(&zt_lk); + if ((ep->ze_ztn == NULL) && ((rv = zt_node_find(ep)) != 0)) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + if ((rv = zt_get_nw_status(ep->ze_ztn, ep->ze_nwid, &status)) != 0) { + nni_mtx_unlock(&zt_lk); + return (rv); + } + nni_mtx_unlock(&zt_lk); + return (nni_copyout_int(status, buf, szp, t)); +} + +static int +zt_ep_set_ping_time(void *arg, const void *data, size_t sz, nni_type t) +{ + zt_ep * ep = arg; + nng_duration val; + int rv; + + if ((rv = nni_copyin_ms(&val, data, sz, t)) == 0) { + nni_mtx_lock(&zt_lk); + ep->ze_ping_time = val; + nni_mtx_unlock(&zt_lk); + } + return (rv); +} + +static int +zt_ep_get_ping_time(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + rv = nni_copyout_ms(ep->ze_ping_time, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_set_ping_tries(void *arg, const void *data, size_t sz, nni_type t) +{ + zt_ep *ep = arg; + int val; + int rv; + + if ((rv = nni_copyin_int(&val, data, sz, 0, 1000000, t)) == 0) { + nni_mtx_lock(&zt_lk); + ep->ze_ping_tries = val; + nni_mtx_unlock(&zt_lk); + } + return (rv); +} + +static int +zt_ep_get_ping_tries(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + rv = nni_copyout_int(ep->ze_ping_tries, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_set_conn_time(void *arg, const void *data, size_t sz, nni_type t) +{ + zt_ep * ep = arg; + nng_duration val; + int rv; + + if ((rv = nni_copyin_ms(&val, data, sz, t)) == 0) { + nni_mtx_lock(&zt_lk); + ep->ze_conn_time = val; + nni_mtx_unlock(&zt_lk); + } + return (rv); +} + +static int +zt_ep_get_conn_time(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + rv = nni_copyout_ms(ep->ze_conn_time, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_set_conn_tries(void *arg, const void *data, size_t sz, nni_type t) +{ + zt_ep *ep = arg; + int val; + int rv; + + if ((rv = nni_copyin_int(&val, data, sz, 0, 1000000, t)) == 0) { + nni_mtx_lock(&zt_lk); + ep->ze_conn_tries = val; + nni_mtx_unlock(&zt_lk); + } + return (rv); +} + +static int +zt_ep_get_conn_tries(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep *ep = arg; + int rv; + + nni_mtx_lock(&zt_lk); + rv = nni_copyout_int(ep->ze_conn_tries, data, szp, t); + nni_mtx_unlock(&zt_lk); + return (rv); +} + +static int +zt_ep_get_locaddr(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_ep * ep = arg; + nng_sockaddr sa; + + memset(&sa, 0, sizeof(sa)); + sa.s_zt.sa_family = NNG_AF_ZT; + nni_mtx_lock(&zt_lk); + sa.s_zt.sa_nwid = ep->ze_nwid; + sa.s_zt.sa_nodeid = ep->ze_laddr >> zt_port_shift; + sa.s_zt.sa_port = ep->ze_laddr & zt_port_mask; + nni_mtx_unlock(&zt_lk); + return (nni_copyout_sockaddr(&sa, data, szp, t)); +} + +static int +zt_pipe_get_locaddr(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_pipe * p = arg; + nng_sockaddr sa; + + memset(&sa, 0, sizeof(sa)); + sa.s_zt.sa_family = NNG_AF_ZT; + sa.s_zt.sa_nwid = p->zp_nwid; + sa.s_zt.sa_nodeid = p->zp_laddr >> zt_port_shift; + sa.s_zt.sa_port = p->zp_laddr & zt_port_mask; + return (nni_copyout_sockaddr(&sa, data, szp, t)); +} + +static int +zt_pipe_get_remaddr(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_pipe * p = arg; + nng_sockaddr sa; + + memset(&sa, 0, sizeof(sa)); + sa.s_zt.sa_family = NNG_AF_ZT; + sa.s_zt.sa_nwid = p->zp_nwid; + sa.s_zt.sa_nodeid = p->zp_raddr >> zt_port_shift; + sa.s_zt.sa_port = p->zp_raddr & zt_port_mask; + return (nni_copyout_sockaddr(&sa, data, szp, t)); +} + +static int +zt_pipe_get_mtu(void *arg, void *data, size_t *szp, nni_type t) +{ + zt_pipe *p = arg; + return (nni_copyout_size(p->zp_mtu, data, szp, t)); +} + +static const nni_option zt_pipe_options[] = { + { + .o_name = NNG_OPT_LOCADDR, + .o_get = zt_pipe_get_locaddr, + }, + { + .o_name = NNG_OPT_REMADDR, + .o_get = zt_pipe_get_remaddr, + }, + { + .o_name = NNG_OPT_ZT_MTU, + .o_get = zt_pipe_get_mtu, + }, + { + .o_name = NNG_OPT_ZT_NWID, + .o_get = zt_pipe_get_nwid, + }, + { + .o_name = NNG_OPT_ZT_NODE, + .o_get = zt_pipe_get_node, + }, + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = zt_pipe_get_recvmaxsz, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static int +zt_pipe_getopt(void *arg, const char *name, void *buf, size_t *szp, nni_type t) +{ + zt_pipe *p = arg; + return (nni_getopt(zt_pipe_options, name, p, buf, szp, t)); +} + +static nni_tran_pipe_ops zt_pipe_ops = { + .p_init = zt_pipe_init, + .p_fini = zt_pipe_fini, + .p_send = zt_pipe_send, + .p_recv = zt_pipe_recv, + .p_close = zt_pipe_close, + .p_peer = zt_pipe_peer, + .p_getopt = zt_pipe_getopt, +}; + +static nni_option zt_dialer_options[] = { + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = zt_ep_get_recvmaxsz, + .o_set = zt_ep_set_recvmaxsz, + }, + { + .o_name = NNG_OPT_URL, + .o_get = zt_ep_get_url, + }, + { + .o_name = NNG_OPT_ZT_HOME, + .o_get = zt_ep_get_home, + .o_set = zt_ep_set_home, + }, + { + .o_name = NNG_OPT_ZT_NODE, + .o_get = zt_ep_get_node, + }, + { + .o_name = NNG_OPT_ZT_NWID, + .o_get = zt_ep_get_nwid, + }, + { + .o_name = NNG_OPT_ZT_NETWORK_STATUS, + .o_get = zt_ep_get_nw_status, + }, + { + .o_name = NNG_OPT_ZT_NETWORK_NAME, + .o_get = zt_ep_get_nw_name, + }, + { + .o_name = NNG_OPT_ZT_PING_TIME, + .o_get = zt_ep_get_ping_time, + .o_set = zt_ep_set_ping_time, + }, + { + .o_name = NNG_OPT_ZT_PING_TRIES, + .o_get = zt_ep_get_ping_tries, + .o_set = zt_ep_set_ping_tries, + }, + { + .o_name = NNG_OPT_ZT_CONN_TIME, + .o_get = zt_ep_get_conn_time, + .o_set = zt_ep_set_conn_time, + }, + { + .o_name = NNG_OPT_ZT_CONN_TRIES, + .o_get = zt_ep_get_conn_tries, + .o_set = zt_ep_set_conn_tries, + }, + { + .o_name = NNG_OPT_ZT_ORBIT, + .o_set = zt_ep_set_orbit, + }, + { + .o_name = NNG_OPT_ZT_DEORBIT, + .o_set = zt_ep_set_deorbit, + }, + { + .o_name = NNG_OPT_ZT_ADD_LOCAL_ADDR, + .o_set = zt_ep_set_add_local_addr, + }, + { + .o_name = NNG_OPT_ZT_CLEAR_LOCAL_ADDRS, + .o_set = zt_ep_set_clear_local_addrs, + }, + + // terminate list + { + .o_name = NULL, + }, +}; + +static nni_option zt_listener_options[] = { + { + .o_name = NNG_OPT_RECVMAXSZ, + .o_get = zt_ep_get_recvmaxsz, + .o_set = zt_ep_set_recvmaxsz, + }, + { + .o_name = NNG_OPT_URL, + .o_get = zt_ep_get_url, + }, + { + .o_name = NNG_OPT_ZT_HOME, + .o_get = zt_ep_get_home, + .o_set = zt_ep_set_home, + }, + { + .o_name = NNG_OPT_ZT_NODE, + .o_get = zt_ep_get_node, + }, + { + .o_name = NNG_OPT_ZT_NWID, + .o_get = zt_ep_get_nwid, + }, + { + .o_name = NNG_OPT_ZT_NETWORK_STATUS, + .o_get = zt_ep_get_nw_status, + }, + { + .o_name = NNG_OPT_ZT_NETWORK_NAME, + .o_get = zt_ep_get_nw_name, + }, + { + .o_name = NNG_OPT_ZT_PING_TIME, + .o_get = zt_ep_get_ping_time, + .o_set = zt_ep_set_ping_time, + }, + { + .o_name = NNG_OPT_ZT_PING_TRIES, + .o_get = zt_ep_get_ping_tries, + .o_set = zt_ep_set_ping_tries, + }, + { + .o_name = NNG_OPT_ZT_ORBIT, + .o_set = zt_ep_set_orbit, + }, + { + .o_name = NNG_OPT_ZT_DEORBIT, + .o_set = zt_ep_set_deorbit, + }, + { + .o_name = NNG_OPT_LOCADDR, + .o_get = zt_ep_get_locaddr, + }, + // terminate list + { + .o_name = NULL, + }, +}; + +static nni_tran_dialer_ops zt_dialer_ops = { + .d_init = zt_dialer_init, + .d_fini = zt_ep_fini, + .d_connect = zt_ep_connect, + .d_close = zt_ep_close, + .d_options = zt_dialer_options, +}; + +static nni_tran_listener_ops zt_listener_ops = { + .l_init = zt_listener_init, + .l_fini = zt_ep_fini, + .l_bind = zt_ep_bind, + .l_accept = zt_ep_accept, + .l_close = zt_ep_close, + .l_options = zt_listener_options, +}; + +// This is the ZeroTier transport linkage, and should be the +// only global symbol in this entire file. +static struct nni_tran zt_tran = { + .tran_version = NNI_TRANSPORT_VERSION, + .tran_scheme = "zt", + .tran_dialer = &zt_dialer_ops, + .tran_listener = &zt_listener_ops, + .tran_pipe = &zt_pipe_ops, + .tran_init = zt_tran_init, + .tran_fini = zt_tran_fini, +}; + +int +nng_zt_register(void) +{ + return (nni_tran_register(&zt_tran)); +} diff --git a/src/sp/transport/zerotier/zthash.c b/src/sp/transport/zerotier/zthash.c new file mode 100644 index 00000000..ca46b373 --- /dev/null +++ b/src/sp/transport/zerotier/zthash.c @@ -0,0 +1,302 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#include "core/nng_impl.h" +#include "zthash.h" + +struct zt_hash_entry { + uint64_t key; + void * val; + uint32_t skips; +}; + +int +zt_hash_init(zt_hash **hp) +{ + zt_hash *h; + + if ((h = NNI_ALLOC_STRUCT(h)) == NULL) { + return (NNG_ENOMEM); + } + h->ih_entries = NULL; + h->ih_count = 0; + h->ih_load = 0; + h->ih_cap = 0; + h->ih_maxload = 0; + h->ih_minload = 0; // never shrink below this + h->ih_minval = 0; + h->ih_maxval = 0xffffffff; + h->ih_dynval = 0; + + *hp = h; + return (0); +} + +void +zt_hash_fini(zt_hash *h) +{ + if (h != NULL) { + if (h->ih_entries != NULL) { + NNI_FREE_STRUCTS(h->ih_entries, h->ih_cap); + h->ih_entries = NULL; + h->ih_cap = h->ih_count = 0; + h->ih_load = h->ih_minload = h->ih_maxload = 0; + } + + NNI_FREE_STRUCT(h); + } +} + +void +zt_hash_limits(zt_hash *h, uint64_t minval, uint64_t maxval, uint64_t start) +{ + if (start < minval) { + start = minval; + } + if (start > maxval) { + start = maxval; + } + + h->ih_minval = minval; + h->ih_maxval = maxval; + h->ih_dynval = start; + NNI_ASSERT(minval < maxval); + NNI_ASSERT(start >= minval); + NNI_ASSERT(start <= maxval); +} + +// Inspired by Python dict implementation. This probe will visit every +// cell. We always hash consecutively assigned IDs. +#define ZT_HASH_NEXT(h, j) ((((j) *5) + 1) & (h->ih_cap - 1)) +#define ZT_HASH_INDEX(h, j) ((j) & (h->ih_cap - 1)) + +static size_t +zt_hash_find_index(zt_hash *h, uint64_t id) +{ + size_t index; + size_t start; + if (h->ih_count == 0) { + return ((size_t) -1); + } + + index = ZT_HASH_INDEX(h, id); + start = index; + for (;;) { + // The value of ihe_key is only valid if ihe_val is not NULL. + if ((h->ih_entries[index].key == id) && + (h->ih_entries[index].val != NULL)) { + return (index); + } + if (h->ih_entries[index].skips == 0) { + return ((size_t) -1); + } + index = ZT_HASH_NEXT(h, index); + + if (index == start) { + break; + } + } + + return ((size_t) -1); +} + +int +zt_hash_find(zt_hash *h, uint64_t id, void **vp) +{ + size_t index; + if ((index = zt_hash_find_index(h, id)) == (size_t) -1) { + return (NNG_ENOENT); + } + *vp = h->ih_entries[index].val; + return (0); +} + +static int +zt_hash_resize(zt_hash *h) +{ + size_t newsize; + size_t oldsize; + zt_hash_entry *newents; + zt_hash_entry *oldents; + uint32_t i; + + if ((h->ih_load < h->ih_maxload) && (h->ih_load >= h->ih_minload)) { + // No resize needed. + return (0); + } + + oldsize = h->ih_cap; + + newsize = 8; + while (newsize < (h->ih_count * 2)) { + newsize *= 2; + } + if (newsize == oldsize) { + // Same size. + return (0); + } + + oldents = h->ih_entries; + newents = NNI_ALLOC_STRUCTS(newents, newsize); + if (newents == NULL) { + return (NNG_ENOMEM); + } + + h->ih_entries = newents; + h->ih_cap = newsize; + h->ih_load = 0; + if (newsize > 8) { + h->ih_minload = newsize / 8; + h->ih_maxload = newsize * 2 / 3; + } else { + h->ih_minload = 0; + h->ih_maxload = 5; + } + for (i = 0; i < oldsize; i++) { + size_t index; + if (oldents[i].val == NULL) { + continue; + } + index = oldents[i].key & (newsize - 1); + for (;;) { + // Increment the load unconditionally. It counts + // once for every item stored, plus once for each + // hashing operation we use to store the item (i.e. + // one for the item, plus once for each rehash.) + h->ih_load++; + if (newents[index].val == NULL) { + // As we are hitting this entry for the first + // time, it won't have any skips. + NNI_ASSERT(newents[index].skips == 0); + newents[index].val = oldents[i].val; + newents[index].key = oldents[i].key; + break; + } + newents[index].skips++; + index = ZT_HASH_NEXT(h, index); + } + } + if (oldsize != 0) { + NNI_FREE_STRUCTS(oldents, oldsize); + } + return (0); +} + +int +zt_hash_remove(zt_hash *h, uint64_t id) +{ + size_t index; + size_t probe; + + if ((index = zt_hash_find_index(h, id)) == (size_t) -1) { + return (NNG_ENOENT); + } + + // Now we have found the index where the object exists. We are going + // to restart the search, until the index matches, to decrement the + // skips counter. + probe = (int) ZT_HASH_INDEX(h, id); + + for (;;) { + zt_hash_entry *entry; + // The load was increased once each hashing operation we used + // to place the the item. Decrement it accordingly. + h->ih_load--; + entry = &h->ih_entries[probe]; + if (probe == index) { + entry->val = NULL; + entry->key = 0; + break; + } + NNI_ASSERT(entry->skips > 0); + entry->skips--; + probe = ZT_HASH_NEXT(h, probe); + } + + h->ih_count--; + + // Shrink -- but it's ok if we can't. + (void) zt_hash_resize(h); + + return (0); +} + +int +zt_hash_insert(zt_hash *h, uint64_t id, void *val) +{ + size_t index; + zt_hash_entry *ent; + + // Try to resize -- if we don't need to, this will be a no-op. + if (zt_hash_resize(h) != 0) { + return (NNG_ENOMEM); + } + + // If it already exists, just overwrite the old value. + if ((index = zt_hash_find_index(h, id)) != (size_t) -1) { + ent = &h->ih_entries[index]; + ent->val = val; + return (0); + } + + index = ZT_HASH_INDEX(h, id); + for (;;) { + ent = &h->ih_entries[index]; + + // Increment the load count. We do this each time time we + // rehash. This may over-count items that collide on the + // same rehashing, but this should just cause a table to + // grow sooner, which is probably a good thing. + h->ih_load++; + if (ent->val == NULL) { + h->ih_count++; + ent->key = id; + ent->val = val; + return (0); + } + // Record the skip count. This being non-zero informs + // that a rehash will be necessary. Without this we + // would need to scan the entire hash for the match. + ent->skips++; + index = ZT_HASH_NEXT(h, index); + } +} + +int +zt_hash_alloc(zt_hash *h, uint64_t *idp, void *val) +{ + uint64_t id; + int rv; + + NNI_ASSERT(val != NULL); + + if (h->ih_count > (h->ih_maxval - h->ih_minval)) { + // Really more like ENOSPC.. the table is filled to max. + return (NNG_ENOMEM); + } + + for (;;) { + id = h->ih_dynval; + h->ih_dynval++; + if (h->ih_dynval > h->ih_maxval) { + h->ih_dynval = h->ih_minval; + } + + if (zt_hash_find_index(h, id) == (size_t) -1) { + break; + } + } + + rv = zt_hash_insert(h, id, val); + if (rv == 0) { + *idp = id; + } + return (rv); +} diff --git a/src/sp/transport/zerotier/zthash.h b/src/sp/transport/zerotier/zthash.h new file mode 100644 index 00000000..249eabbf --- /dev/null +++ b/src/sp/transport/zerotier/zthash.h @@ -0,0 +1,43 @@ +// +// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech> +// Copyright 2018 Capitar IT Group BV <info@capitar.com> +// +// This software is supplied under the terms of the MIT License, a +// copy of which should be located in the distribution where this +// file was obtained (LICENSE.txt). A copy of the license may also be +// found online at https://opensource.org/licenses/MIT. +// + +#ifndef ZT_HASH_H +#define ZT_HASH_H + +#include <stdint.h> + +// This code is derived from id hash, but supports 64-bit IDs. + +typedef struct zt_hash zt_hash; +typedef struct zt_hash_entry zt_hash_entry; + +// NB: These details are entirely private to the hash implementation. +// They are provided here to facilitate inlining in structures. +struct zt_hash { + size_t ih_cap; + size_t ih_count; + size_t ih_load; + size_t ih_minload; // considers placeholders + size_t ih_maxload; + uint64_t ih_minval; + uint64_t ih_maxval; + uint64_t ih_dynval; + zt_hash_entry *ih_entries; +}; + +extern int zt_hash_init(zt_hash **); +extern void zt_hash_fini(zt_hash *); +extern void zt_hash_limits(zt_hash *, uint64_t, uint64_t, uint64_t); +extern int zt_hash_find(zt_hash *, uint64_t, void **); +extern int zt_hash_remove(zt_hash *, uint64_t); +extern int zt_hash_insert(zt_hash *, uint64_t, void *); +extern int zt_hash_alloc(zt_hash *, uint64_t *, void *); + +#endif // CORE_IDHASH_H |
