diff options
| author | Garrett D'Amore <garrett@damore.org> | 2018-04-18 20:38:00 -0700 |
|---|---|---|
| committer | Garrett D'Amore <garrett@damore.org> | 2018-04-20 07:34:16 -0700 |
| commit | 5902d02ad0a056a146231568f1293ffbcd59f61c (patch) | |
| tree | be38584c02d703ec2322ab941d4d723c752fe187 /src/core/aio.c | |
| parent | 40542e7af0f5003d7ad67876ea580a59174031ca (diff) | |
| download | nng-5902d02ad0a056a146231568f1293ffbcd59f61c.tar.gz nng-5902d02ad0a056a146231568f1293ffbcd59f61c.tar.bz2 nng-5902d02ad0a056a146231568f1293ffbcd59f61c.zip | |
fixes #346 nng_recv() sometimes acts on null `msg` pointer
This closes a fundamental flaw in the way aio structures were
handled. In paticular, aio expiration could race ahead, and
fire before the aio was properly registered by the provider.
This ultimately led to the possibility of duplicate completions
on the same aio.
The solution involved breaking up nni_aio_start into two functions.
nni_aio_begin (which can be run outside of external locks) simply
validates that nni_aio_fini() has not been called, and clears certain
fields in the aio to make it ready for use by the provider.
nni_aio_schedule does the work to register the aio with the expiration
thread, and should only be called when the aio is actually scheduled
for asynchronous completion. nni_aio_schedule_verify does the same thing,
but returns NNG_ETIMEDOUT if the aio has a zero length timeout.
This change has a small negative performance impact. We have plans to
rectify that by converting nni_aio_begin to use a locklesss flag for
the aio->a_fini bit.
While we were here, we fixed some error paths in the POSIX subsystem,
which would have returned incorrect error codes, and we made some
optmizations in the message queues to reduce conditionals while holding
locks in the hot code path.
Diffstat (limited to 'src/core/aio.c')
| -rw-r--r-- | src/core/aio.c | 52 |
1 files changed, 36 insertions, 16 deletions
diff --git a/src/core/aio.c b/src/core/aio.c index fac62f12..388e6677 100644 --- a/src/core/aio.c +++ b/src/core/aio.c @@ -49,9 +49,9 @@ static nni_list nni_aio_expire_aios; // // In order to guard against aio reuse during teardown, we set a fini // flag. Any attempt to initialize for a new operation after that point -// will fail and the caller will get NNG_ESTATE indicating this. The -// provider that calls nni_aio_start() MUST check the return value, and -// if it comes back nonzero (NNG_ESTATE) then it must simply discard the +// will fail and the caller will get NNG_ECANCELED indicating this. The +// provider that calls nni_aio_begin() MUST check the return value, and +// if it comes back nonzero (NNG_ECANCELED) then it must simply discard the // request and return. // An nni_aio is an async I/O handle. @@ -184,7 +184,7 @@ nni_aio_fini_cb(nni_aio *aio) // nni_aio_stop cancels any oustanding operation, and waits for the // callback to complete, if still running. It also marks the AIO as -// stopped, preventing further calls to nni_aio_start from succeeding. +// stopped, preventing further calls to nni_aio_begin from succeeding. // To correctly tear down an AIO, call stop, and make sure any other // calles are not also stopped, before calling nni_aio_fini to release // actual memory. @@ -298,14 +298,11 @@ nni_aio_wait(nni_aio *aio) } int -nni_aio_start(nni_aio *aio, nni_aio_cancelfn cancelfn, void *data) +nni_aio_begin(nni_aio *aio) { - nni_time now = nni_clock(); - nni_mtx_lock(&nni_aio_lk); - + // We should not reschedule anything at this point. if (aio->a_fini) { - // We should not reschedule anything at this point. aio->a_active = false; aio->a_result = NNG_ECANCELED; nni_mtx_unlock(&nni_aio_lk); @@ -315,34 +312,52 @@ nni_aio_start(nni_aio *aio, nni_aio_cancelfn cancelfn, void *data) aio->a_pend = false; aio->a_result = 0; aio->a_count = 0; - aio->a_prov_cancel = cancelfn; - aio->a_prov_data = data; + aio->a_prov_cancel = NULL; + aio->a_prov_data = NULL; aio->a_active = true; - for (unsigned i = 0; i < NNI_NUM_ELEMENTS(aio->a_outputs); i++) { aio->a_outputs[i] = NULL; } + nni_mtx_unlock(&nni_aio_lk); + return (0); +} +void +nni_aio_schedule(nni_aio *aio, nni_aio_cancelfn cancelfn, void *data) +{ if (!aio->a_sleep) { // Convert the relative timeout to an absolute timeout. switch (aio->a_timeout) { case NNG_DURATION_ZERO: - aio->a_expire = NNI_TIME_ZERO; + aio->a_expire = nni_clock(); break; case NNG_DURATION_INFINITE: case NNG_DURATION_DEFAULT: aio->a_expire = NNI_TIME_NEVER; break; default: - aio->a_expire = now + aio->a_timeout; + aio->a_expire = nni_clock() + aio->a_timeout; break; } } + nni_mtx_lock(&nni_aio_lk); + aio->a_prov_cancel = cancelfn; + aio->a_prov_data = data; if (aio->a_expire != NNI_TIME_NEVER) { nni_aio_expire_add(aio); } nni_mtx_unlock(&nni_aio_lk); +} + +int +nni_aio_schedule_verify(nni_aio *aio, nni_aio_cancelfn cancelfn, void *data) +{ + + if ((!aio->a_sleep) && (aio->a_timeout == NNG_DURATION_ZERO)) { + return (NNG_ETIMEDOUT); + } + nni_aio_schedule(aio, cancelfn, data); return (0); } @@ -651,6 +666,9 @@ nni_aio_iov_advance(nni_aio *aio, size_t n) void nni_sleep_aio(nng_duration ms, nng_aio *aio) { + if (nni_aio_begin(aio) != 0) { + return; + } switch (aio->a_timeout) { case NNG_DURATION_DEFAULT: case NNG_DURATION_INFINITE: @@ -661,13 +679,15 @@ nni_sleep_aio(nng_duration ms, nng_aio *aio) // then let it still wake up early, but with NNG_ETIMEDOUT. if (ms > aio->a_timeout) { aio->a_sleep = false; - (void) nni_aio_start(aio, NULL, NULL); + (void) nni_aio_schedule(aio, NULL, NULL); return; } } aio->a_sleep = true; aio->a_expire = nni_clock() + ms; - (void) nni_aio_start(aio, NULL, NULL); + + // There is no cancellation, apart from just unexpiring. + nni_aio_schedule(aio, NULL, NULL); } void |
