From e623dedab28a1fec6270c05f9643e68bfb98b7c3 Mon Sep 17 00:00:00 2001 From: Garrett D'Amore Date: Sat, 25 Nov 2023 17:35:35 -0800 Subject: fixes #1523 rare SEGV in sub nni_list_remove Credit goes to Wu Xuan (@willwu1217) for diagnosing and proposing a fix as part of #1695. This approach takes a revised approach to avoid adding extra memory, and it also is slightly faster as we do not need to update both pointers in the linked list, by reusing the reap node. As part of this a new internal API, nni_aio_completions, is introduced. In all likelihood we will be able to use this to solve some similar crashes in other areas of the code. --- src/core/aio.c | 33 +++++++++++++++++++++++++++++++++ src/core/aio.h | 26 +++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'src/core') diff --git a/src/core/aio.c b/src/core/aio.c index 564e91a3..e849b33d 100644 --- a/src/core/aio.c +++ b/src/core/aio.c @@ -508,6 +508,39 @@ nni_aio_list_active(nni_aio *aio) return (nni_list_node_active(&aio->a_prov_node)); } +// completions list. +// Implementation note: in order to avoid wasting space, we +// reuse the reap node -- which will be inactive here. +void +nni_aio_completions_init(nni_aio_completions *clp) +{ + *clp = NULL; +} + +void +nni_aio_completions_add(nni_aio_completions *clp, nni_aio *aio, int result, size_t count) +{ + NNI_ASSERT(!nni_aio_list_active(aio)); + aio->a_reap_node.rn_next = *clp; + aio->a_result = result; + aio->a_count = count; + *clp = aio; +} + +void +nni_aio_completions_run(nni_aio_completions *clp) +{ + nni_aio *aio; + nni_aio *cl = *clp; + *clp = NULL; + + while ((aio = cl) != NULL) { + cl = (void *)aio->a_reap_node.rn_next; + aio->a_reap_node.rn_next = NULL; + nni_aio_finish_sync(aio, aio->a_result, aio->a_count); + } +} + static void nni_aio_expire_add(nni_aio *aio) { diff --git a/src/core/aio.h b/src/core/aio.h index 6315e90c..a2ebf70a 100644 --- a/src/core/aio.h +++ b/src/core/aio.h @@ -1,5 +1,5 @@ // -// Copyright 2022 Staysail Systems, Inc. +// Copyright 2023 Staysail Systems, Inc. // Copyright 2018 Capitar IT Group BV // // This software is supplied under the terms of the MIT License, a @@ -166,6 +166,30 @@ extern int nni_aio_schedule(nni_aio *, nni_aio_cancel_fn, void *); extern void nni_sleep_aio(nni_duration, nni_aio *); +// nni_aio_completion_list is used after removing the aio from an +// active work queue, and keeping them so that the completions can +// be run in a deferred manner. These lists are simple, and intended +// to be used as local variables. It's important to initialize the +// list before using it. Also, any AIO added to a completion list must +// not be in active use anywhere. +typedef void *nni_aio_completions; + +// nni_aio_completions_init just initializes a completions list. +// This just sets the pointed value to NULL. +extern void nni_aio_completions_init(nni_aio_completions *); + +// nni_aio_completions_run runs nni_aio_finish_sync for all the aio objects +// that have been added to the completions. The result code and count used +// are those supplied in nni_aio_completions_add. Callers should not hold +// locks when calling this. +extern void nni_aio_completions_run(nni_aio_completions *); + +// nni_aio_completions_add adds an aio (with the result code and length as +// appropriate) to the completion list. This should be done while the +// appropriate lock is held. The aio must not be scheduled. +extern void nni_aio_completions_add(nni_aio_completions *, nni_aio *, + int, size_t); + extern int nni_aio_sys_init(void); extern void nni_aio_sys_fini(void); -- cgit v1.2.3-70-g09d2