From bd084078815f1481ec9fb2a308f54451833ec9ba Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Fri, 18 Oct 2024 14:58:37 -0600 Subject: [PATCH 01/11] Adjust cachelines for the user-facing version of the Nemesis queue too. --- include/qt_queue.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/qt_queue.h b/include/qt_queue.h index ae8812fb4..805469e1d 100644 --- a/include/qt_queue.h +++ b/include/qt_queue.h @@ -16,12 +16,13 @@ typedef struct qthread_queue_node_s { typedef struct qthread_queue_NEMESIS_s { /* The First Cacheline */ void *head; + uint8_t pad1[CACHELINE_WIDTH - sizeof(void *)]; void *tail; - uint8_t pad1[CACHELINE_WIDTH - (2 * sizeof(void *))]; + uint8_t pad2[CACHELINE_WIDTH - sizeof(void *)]; /* The Second Cacheline */ aligned_t length; void *shadow_head; - uint8_t pad2[CACHELINE_WIDTH - sizeof(void *) - sizeof(aligned_t)]; + uint8_t pad3[CACHELINE_WIDTH - sizeof(void *) - sizeof(aligned_t)]; } qthread_queue_NEMESIS_t; typedef struct qthread_queue_nosync_s { From 304724aa2fb8225d48622acfc51f5ff7f3e5ca17 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Fri, 18 Oct 2024 15:09:25 -0600 Subject: [PATCH 02/11] Make the queue length in the user-facing nemesis queue atomic. --- include/qt_queue.h | 2 +- src/queue.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/qt_queue.h b/include/qt_queue.h index 805469e1d..3a999e689 100644 --- a/include/qt_queue.h +++ b/include/qt_queue.h @@ -20,7 +20,7 @@ typedef struct qthread_queue_NEMESIS_s { void *tail; uint8_t pad2[CACHELINE_WIDTH - sizeof(void *)]; /* The Second Cacheline */ - aligned_t length; + _Atomic aligned_t length; void *shadow_head; uint8_t pad3[CACHELINE_WIDTH - sizeof(void *) - sizeof(aligned_t)]; } qthread_queue_NEMESIS_t; diff --git a/src/queue.c b/src/queue.c index a074b423f..09db478dc 100644 --- a/src/queue.c +++ b/src/queue.c @@ -54,7 +54,8 @@ qthread_queue_t API_FUNC qthread_queue_create(uint8_t flags, aligned_t length) { aligned_t API_FUNC qthread_queue_length(qthread_queue_t q) { assert(q); switch (q->type) { - case NEMESIS_LENGTH: return q->q.nemesis.length; + case NEMESIS_LENGTH: + return atomic_load_explicit(&q->q.nemesis.length, memory_order_relaxed); case CAPPED: return q->q.capped.membercount; default: return 0; } @@ -78,7 +79,8 @@ void INTERNAL qthread_queue_internal_enqueue(qthread_queue_t q, qthread_t *t) { break; case NEMESIS_LENGTH: qthread_queue_internal_NEMESIS_enqueue(&q->q.nemesis, t); - qthread_incr(&q->q.nemesis.length, 1); + atomic_fetch_add_explicit( + &q->q.nemesis.length, 1ull, memory_order_relaxed); break; case CAPPED: qthread_queue_internal_capped_enqueue(&q->q.capped, t); break; case MTS: QTHREAD_TRAP(); @@ -110,7 +112,8 @@ int API_FUNC qthread_queue_release_one(qthread_queue_t q) { break; case NEMESIS_LENGTH: t = qthread_queue_internal_NEMESIS_dequeue(&q->q.nemesis); - qthread_incr(&q->q.nemesis.length, (aligned_t)-1); + atomic_fetch_add_explicit( + &q->q.nemesis.length, (aligned_t)-1, memory_order_relaxed); break; case CAPPED: t = qthread_queue_internal_capped_dequeue(&q->q.capped); break; default: QTHREAD_TRAP(); @@ -142,13 +145,15 @@ int API_FUNC qthread_queue_release_all(qthread_queue_t q) { } break; case NEMESIS_LENGTH: { - aligned_t const count = q->q.nemesis.length; + aligned_t const count = + atomic_load_explicit(&q->q.nemesis.length, memory_order_relaxed); for (aligned_t c = 0; c < count; c++) { t = qthread_queue_internal_NEMESIS_dequeue(&q->q.nemesis); assert(t); if (t) { qthread_queue_internal_launch(t, shep); } } - qthread_incr(&q->q.nemesis.length, -count); + atomic_fetch_add_explicit( + &q->q.nemesis.length, -count, memory_order_relaxed); break; } case CAPPED: { From 273fdf29a30302206c92f3946dcaad65b42ff191 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Fri, 18 Oct 2024 15:17:47 -0600 Subject: [PATCH 03/11] Make one of the static variables static in the queue test to avoid a race condition found by thread sanitizer. --- test/basics/queue.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/basics/queue.c b/test/basics/queue.c index f8c459414..0cb249a62 100644 --- a/test/basics/queue.c +++ b/test/basics/queue.c @@ -1,17 +1,20 @@ -#include "argparsing.h" #include -#include +#include #include #include -aligned_t threads_in = 0; +#include "qthread/qthread.h" + +#include "argparsing.h" + +_Atomic aligned_t threads_in = 0; aligned_t awoke = 0; int THREADS_ENQUEUED = 100; qthread_queue_t the_queue; static aligned_t tobequeued(void *arg) { - qthread_incr(&threads_in, 1); + atomic_fetch_add_explicit(&threads_in, 1, memory_order_relaxed); // iprintf("\tOne thread about to join the queue...\n"); qthread_queue_join(the_queue); // iprintf("\tAwoke from the queue! %p\n", qthread_retloc()); @@ -54,7 +57,7 @@ int main(int argc, char *argv[]) { ret = qthread_readFF(NULL, &return_value); test_check(ret == QTHREAD_SUCCESS); - test_check(threads_in == 1); + test_check(atomic_load_explicit(&threads_in, memory_order_relaxed) == 1); test_check(awoke == 1); test_check(qthread_queue_length(the_queue) == 0); // This relies on approximate estimates, so it's not reliable to test here. @@ -64,7 +67,7 @@ int main(int argc, char *argv[]) { iprintf("---------------------------------------------------------\n"); iprintf("\tMULTI THREAD TEST\n\n"); - threads_in = 0; + atomic_store_explicit(&threads_in, 0, memory_order_relaxed); awoke = 0; aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED); iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED); @@ -76,7 +79,8 @@ int main(int argc, char *argv[]) { iprintf("2/6 Waiting for %u threads to queue themselves...\n", THREADS_ENQUEUED); while (qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield(); - test_check(threads_in == THREADS_ENQUEUED); + test_check(atomic_load_explicit(&threads_in, memory_order_relaxed) == + THREADS_ENQUEUED); test_check(qthread_readstate(NODE_BUSYNESS) == 1); iprintf("3/6 Releasing a single thread...\n"); From 62992cdb1c577c979c040430e8d2598258d753cf Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Fri, 18 Oct 2024 15:40:08 -0600 Subject: [PATCH 04/11] Fix various thread sanitizer error regarding race conditions in the old user-facing queue implementation. --- include/qt_queue.h | 10 +++++----- src/queue.c | 50 +++++++++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/include/qt_queue.h b/include/qt_queue.h index 3a999e689..31da4c196 100644 --- a/include/qt_queue.h +++ b/include/qt_queue.h @@ -9,15 +9,15 @@ */ typedef struct qthread_queue_node_s { - struct qthread_queue_node_s *next; + struct qthread_queue_node_s *_Atomic next; qthread_t *thread; } qthread_queue_node_t; typedef struct qthread_queue_NEMESIS_s { /* The First Cacheline */ - void *head; + void *_Atomic head; uint8_t pad1[CACHELINE_WIDTH - sizeof(void *)]; - void *tail; + void *_Atomic tail; uint8_t pad2[CACHELINE_WIDTH - sizeof(void *)]; /* The Second Cacheline */ _Atomic aligned_t length; @@ -26,8 +26,8 @@ typedef struct qthread_queue_NEMESIS_s { } qthread_queue_NEMESIS_t; typedef struct qthread_queue_nosync_s { - qthread_queue_node_t *head; - qthread_queue_node_t *tail; + qthread_queue_node_t *_Atomic head; + qthread_queue_node_t *_Atomic tail; } qthread_queue_nosync_t; typedef struct qthread_queue_capped_s { diff --git a/src/queue.c b/src/queue.c index 09db478dc..109c1eb20 100644 --- a/src/queue.c +++ b/src/queue.c @@ -204,13 +204,16 @@ void INTERNAL qthread_queue_internal_nosync_enqueue(qthread_queue_nosync_t *q, assert(t); node->thread = t; - node->next = NULL; - if (q->tail == NULL) { - q->head = node; + atomic_store_explicit(&node->next, NULL, memory_order_relaxed); + if (atomic_load_explicit(&q->tail, memory_order_relaxed) == NULL) { + atomic_store_explicit(&q->head, node, memory_order_relaxed); } else { - q->tail->next = node; + atomic_store_explicit( + &atomic_load_explicit(&q->tail, memory_order_relaxed)->next, + node, + memory_order_relaxed); } - q->tail = node; + atomic_store_explicit(&q->tail, node, memory_order_relaxed); } qthread_t INTERNAL * @@ -220,9 +223,12 @@ qthread_queue_internal_nosync_dequeue(qthread_queue_nosync_t *q) { assert(q); - node = q->head; + node = atomic_load_explicit(&q->head, memory_order_relaxed); if (node) { - q->head = node->next; + atomic_store_explicit( + &q->head, + atomic_load_explicit(&node->next, memory_order_relaxed), + memory_order_relaxed); t = node->thread; FREE_TQNODE(node); } @@ -236,37 +242,41 @@ void INTERNAL qthread_queue_internal_NEMESIS_enqueue(qthread_queue_NEMESIS_t *q, node = ALLOC_TQNODE(); assert(node != NULL); node->thread = t; - node->next = NULL; + atomic_store_explicit(&node->next, NULL, memory_order_relaxed); prev = qt_internal_atomic_swap_ptr((void **)&(q->tail), node); if (prev == NULL) { - q->head = node; + atomic_store_explicit(&q->head, node, memory_order_relaxed); } else { - prev->next = node; + atomic_store_explicit(&prev->next, node, memory_order_relaxed); } } qthread_t INTERNAL * qthread_queue_internal_NEMESIS_dequeue(qthread_queue_NEMESIS_t *q) { if (!q->shadow_head) { - if (!q->head) { return NULL; } - q->shadow_head = q->head; - q->head = NULL; + if (!atomic_load_explicit(&q->head, memory_order_relaxed)) { return NULL; } + q->shadow_head = atomic_load_explicit(&q->head, memory_order_relaxed); + atomic_store_explicit(&q->head, NULL, memory_order_relaxed); } qthread_queue_node_t *const dequeued = q->shadow_head; if (dequeued != NULL) { - if (dequeued->next != NULL) { - q->shadow_head = dequeued->next; - dequeued->next = NULL; + if (atomic_load_explicit(&dequeued->next, memory_order_relaxed) != NULL) { + q->shadow_head = + atomic_load_explicit(&dequeued->next, memory_order_relaxed); + atomic_store_explicit(&dequeued->next, NULL, memory_order_relaxed); } else { qthread_queue_node_t *old; q->shadow_head = NULL; - old = qthread_cas_ptr(&(q->tail), dequeued, NULL); + old = qthread_cas_ptr((void **)&(q->tail), dequeued, NULL); if (old != dequeued) { - while (dequeued->next == NULL) SPINLOCK_BODY(); - q->shadow_head = dequeued->next; - dequeued->next = NULL; + while (atomic_load_explicit(&dequeued->next, memory_order_relaxed) == + NULL) + SPINLOCK_BODY(); + q->shadow_head = + atomic_load_explicit(&dequeued->next, memory_order_relaxed); + atomic_store_explicit(&dequeued->next, NULL, memory_order_relaxed); } } qthread_t *retval = dequeued->thread; From 32a66b599d05cb3f22c4d831e90347100ab8ced8 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Fri, 18 Oct 2024 15:41:52 -0600 Subject: [PATCH 05/11] Skip thread sanitizer tests that currently don't pass due to lack of sanitizer interop in the threadqueues. It's better to at least test what we have working to avoid regressions. --- .circleci/config.yml | 22 +++++++++++++++++++++- .github/workflows/CI.yml | 8 +++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dadb14514..3c3fb1c66 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -96,7 +96,9 @@ jobs: make -j2 make tests -j2 - run: - command: timeout --foreground -k 10s 4m make check + command: | + if [[ "<< parameters.sanitizer >>" == "thread" ]]; then cd test/basics; fi + timeout --foreground -k 10s 4m make check no_output_timeout: 120s arm_acfl: @@ -238,6 +240,24 @@ workflows: - scheduler: distrib topology: hwloc sanitizer: memory + - scheduler: sherwood + topology: 'no' + sanitizer: thread + - scheduler: sherwood + topology: hwloc + sanitizer: thread + - scheduler: sherwood + topology: binders + sanitizer: thread + - scheduler: distrib + topology: 'no' + sanitizer: thread + - scheduler: distrib + topology: hwloc + sanitizer: thread + - scheduler: distrib + topology: binders + sanitizer: thread - arm_acfl: matrix: parameters: diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index bf4d1baae..0d10e2527 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -245,6 +245,10 @@ jobs: topology: hwloc - sanitizer: memory topology: binders + - sanitizer: thread + scheduler: sherwood + - sanitizer: thread + scheduler: distrib env: CC: clang-19 CXX: clang++-19 @@ -276,7 +280,9 @@ jobs: make -j2 make tests -j2 - name: make check - run: timeout -k 10s --foreground 8m make check + run: | + if [[ "${{ matrix.sanitizer }}" == "thread" ]]; then cd test/basics; fi + timeout -k 10s --foreground 8m make check timeout-minutes: 9 linux-thorough: From 2d3f185a9a21bb6852738b52ea292d0752c3fdde Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 21 Oct 2024 10:05:09 -0600 Subject: [PATCH 06/11] Print out the test suite logs on failure in CI. --- .circleci/config.yml | 13 +++++++------ .cirrus.yml | 8 ++++---- .github/workflows/CI.yml | 17 +++++++++-------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3c3fb1c66..71c68ed0a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,7 @@ jobs: make -j2 make tests -j2 - run: - command: timeout --foreground -k 10s 2m make check + command: timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) no_output_timeout: 180s arm_clang: @@ -58,7 +58,7 @@ jobs: make -j2 make tests -j2 - run: - command: timeout --foreground -k 10s 2m make check + command: timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) no_output_timeout: 180s arm_sanitizers: @@ -97,8 +97,9 @@ jobs: make tests -j2 - run: command: | + export QTHREADS_DIR="$(pwd)" if [[ "<< parameters.sanitizer >>" == "thread" ]]; then cd test/basics; fi - timeout --foreground -k 10s 4m make check + timeout --foreground -k 10s 4m make check || ( cd $QTHREADS_DIR && cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) no_output_timeout: 120s arm_acfl: @@ -136,7 +137,7 @@ jobs: - run: command: | export PATH=$PATH:/opt/arm/arm-linux-compiler-24.04_Ubuntu-22.04/bin - timeout --foreground -k 10s 4m make check + timeout --foreground -k 10s 4m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) no_output_timeout: 180s nvc: @@ -175,7 +176,7 @@ jobs: make tests -j2 - run: command: | - timeout --foreground -k 10s 4m make check + timeout --foreground -k 10s 4m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) no_output_timeout: 180s musl: @@ -199,7 +200,7 @@ jobs: make -j2 make tests -j2 - run: - command: make check + command: make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) no_output_timeout: 180s workflows: diff --git a/.cirrus.yml b/.cirrus.yml index 7154de16b..8ca9b0e8f 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -45,7 +45,7 @@ osx_m1_task: # commented example for how to get a backtrace from CI usign lldb on OSX: #echo "settings set target.process.stop-on-exec false" > ~/.lldbinit #QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 lldb bash --batch --one-line 'process launch' --one-line-on-crash 'bt' --one-line-on-crash 'quit' -- test/basics/hello_world - gtimeout --foreground 3m make check + gtimeout --foreground 3m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) freebsd_task: freebsd_instance: @@ -68,7 +68,7 @@ freebsd_task: make -j$CIRRUS_CPU make tests -j$CIRRUS_CPU test_script: | - gtimeout --foreground -k 10s 2m make check + gtimeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) arm_linux_task: arm_container: @@ -112,7 +112,7 @@ arm_linux_task: make -j$CIRRUS_CPU make tests -j$CIRRUS_CPU test_script: | - timeout --foreground -k 10s 2m make check + timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) arm_linux_clang_task: arm_container: @@ -168,5 +168,5 @@ arm_linux_clang_task: make -j$CIRRUS_CPU make tests -j$CIRRUS_CPU test_script: | - timeout --foreground -k 10s 2m make check + timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0d10e2527..426f3753f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -35,7 +35,7 @@ jobs: make -j2 make tests -j2 - name: make check - run: timeout -k 10s --foreground 3m make check + run: timeout -k 10s --foreground 3m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 4 linux-clang: @@ -77,7 +77,7 @@ jobs: make -j2 make tests -j2 - name: make check - run: timeout -k 10s --foreground 6m make check + run: timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 7 linux-icx: @@ -116,7 +116,7 @@ jobs: - name: make check run: | source /opt/intel/oneapi/setvars.sh - timeout -k 10s --foreground 6m make check + timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 7 linux-icc: @@ -157,7 +157,7 @@ jobs: - name: make check run: | source /opt/intel/oneapi/setvars.sh - timeout -k 10s --foreground 6m make check + timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 7 linux-aocc: @@ -192,7 +192,7 @@ jobs: make tests -j2 - name: make check run: | - timeout -k 10s --foreground 6m make check + timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 7 mac: @@ -228,7 +228,7 @@ jobs: # commented example for how to get a backtrace from CI usign lldb on OSX: #echo "settings set target.process.stop-on-exec false" > ~/.lldbinit #QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 lldb bash --batch --one-line 'process launch' --one-line-on-crash 'bt' --one-line-on-crash 'quit' -- test/basics/hello_world - gtimeout -k 10s --foreground 8m make check + gtimeout -k 10s --foreground 8m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 9 sanitizers: @@ -281,8 +281,9 @@ jobs: make tests -j2 - name: make check run: | + export QTHREADS_DIR="$(pwd)" if [[ "${{ matrix.sanitizer }}" == "thread" ]]; then cd test/basics; fi - timeout -k 10s --foreground 8m make check + timeout -k 10s --foreground 8m make check || ( cd $QTHREADS_DIR && cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 9 linux-thorough: @@ -325,7 +326,7 @@ jobs: make -j2 make tests -j2 - name: make check - run: timeout -k 10s --foreground 6m make check + run: timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) timeout-minutes: 7 clang-format: From 048912ae4ea26d1c80da9314cfda032a868757d4 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 21 Oct 2024 10:07:41 -0600 Subject: [PATCH 07/11] Increase timeout limit for cirrus arm-linux builds. --- .cirrus.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 8ca9b0e8f..eb3b1620b 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -112,7 +112,7 @@ arm_linux_task: make -j$CIRRUS_CPU make tests -j$CIRRUS_CPU test_script: | - timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) + timeout --foreground -k 10s 5m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) arm_linux_clang_task: arm_container: @@ -168,5 +168,5 @@ arm_linux_clang_task: make -j$CIRRUS_CPU make tests -j$CIRRUS_CPU test_script: | - timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) + timeout --foreground -k 10s 5m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 ) From 25e94bd23a0488cbeb0e1f557bedb11e782c326d Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 21 Oct 2024 10:58:03 -0600 Subject: [PATCH 08/11] Fix another thread sanitizer warning regarding the nemesis advisory queuelen. --- src/threadqueues/nemesis_threadqueues.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/threadqueues/nemesis_threadqueues.c b/src/threadqueues/nemesis_threadqueues.c index cee6ffbcd..85a27c5c0 100644 --- a/src/threadqueues/nemesis_threadqueues.c +++ b/src/threadqueues/nemesis_threadqueues.c @@ -58,7 +58,7 @@ struct _qt_threadqueue { alignas(CACHELINE_WIDTH) NEMESIS_queue q; /* the following is for estimating a queue's "busy" level, and is not * guaranteed accurate (that would be a race condition) */ - saligned_t advisory_queuelen; + _Atomic saligned_t advisory_queuelen; #ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE uint32_t frustration; QTHREAD_COND_DECL(trigger); @@ -180,7 +180,8 @@ void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{ */ if (node) { qthread_t *retval = node->thread; assert(atomic_load_explicit(&node->next, memory_order_relaxed) == NULL); - (void)qthread_incr(&(q->advisory_queuelen), (aligned_t)-1); + atomic_fetch_add_explicit( + &q->advisory_queuelen, (aligned_t)-1, memory_order_relaxed); FREE_TQNODE(node); qthread_thread_free(retval); } else { @@ -231,7 +232,7 @@ void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q, } else { atomic_store_explicit(&prev->next, node, memory_order_relaxed); } - (void)qthread_incr(&(q->advisory_queuelen), 1); + atomic_fetch_add_explicit(&q->advisory_queuelen, 1, memory_order_relaxed); #ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE /* awake waiter */ /* Yes, this needs to be here, to prevent reading frustration being hoisted @@ -256,7 +257,7 @@ void INTERNAL qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q, ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{ */ assert(q); - return q->advisory_queuelen; + return atomic_load_explicit(&q->advisory_queuelen, memory_order_relaxed); } /*}}} */ qthread_t INTERNAL * @@ -297,7 +298,8 @@ qt_scheduler_get_thread(qt_threadqueue_t *q, } assert(node); assert(atomic_load_explicit(&node->next, memory_order_relaxed) == NULL); - (void)qthread_incr(&(q->advisory_queuelen), (aligned_t)-1); + atomic_fetch_add_explicit( + &q->advisory_queuelen, (aligned_t)-1, memory_order_relaxed); retval = node->thread; FREE_TQNODE(node); return retval; From 2945351be4b9a30601798bc64861113b2f4a76ad Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 21 Oct 2024 11:10:28 -0600 Subject: [PATCH 09/11] Fix another thread sanitizer error regarding detecting a worker's current qthread. --- include/qt_shepherd_innards.h | 2 +- src/qthread.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/qt_shepherd_innards.h b/include/qt_shepherd_innards.h index 19b073a0e..f2833227c 100644 --- a/include/qt_shepherd_innards.h +++ b/include/qt_shepherd_innards.h @@ -36,7 +36,7 @@ struct qthread_worker_s { qthread_shepherd_t *shepherd; struct qthread_s **nostealbuffer; struct qthread_s **stealbuffer; - qthread_t *current; + qthread_t *_Atomic current; qthread_worker_id_t unique_id; qthread_worker_id_t worker_id; qthread_worker_id_t packed_worker_id; diff --git a/src/qthread.c b/src/qthread.c index 9ede8ab69..7cf8d63db 100644 --- a/src/qthread.c +++ b/src/qthread.c @@ -334,7 +334,7 @@ static void *qthread_master(void *arg) { qt_threadqueue_t *threadqueue; qt_threadqueue_private_t *localqueue = NULL; qthread_t *t; - qthread_t **current; + qthread_t *_Atomic *current; int done = 0; assert(me != NULL); @@ -444,8 +444,9 @@ static void *qthread_master(void *arg) { #endif qthread_exec(t, &my_context); - t = *current; // necessary for direct-swap sanity - *current = NULL; // neessary for "queue sanity" + t = *current; // necessary for direct-swap sanity + atomic_store_explicit( + current, NULL, memory_order_relaxed); // neessary for "queue sanity" /* now clean up, based on the thread's state */ switch (atomic_load_explicit(&t->thread_state, memory_order_relaxed)) { @@ -1340,7 +1341,8 @@ qthread_readstate(const enum introspective_state type) { /*{{{ */ sum += qt_threadqueue_advisory_queuelen(sheps[s].ready); qthread_worker_t const *wkrs = sheps[s].workers; for (qthread_worker_id_t w = 0; w < qlib->nworkerspershep; w++) { - sum += (wkrs[w].current != NULL); + sum += (atomic_load_explicit(&wkrs[w].current, + memory_order_relaxed) != NULL); } } return sum; @@ -1351,7 +1353,8 @@ qthread_readstate(const enum introspective_state type) { /*{{{ */ for (qthread_shepherd_id_t s = 0; s < qlib->nshepherds; s++) { qthread_worker_t const *wkrs = sheps[s].workers; for (qthread_worker_id_t w = 0; w < qlib->nworkerspershep; w++) { - count += (wkrs[w].current != NULL); + count += (atomic_load_explicit(&wkrs[w].current, + memory_order_relaxed) != NULL); } } return count; From 138c8f53863c7e2a12620dfe3b997b41eb333e93 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 21 Oct 2024 11:18:33 -0600 Subject: [PATCH 10/11] Fix yet another minor race condition in the non-threadqueue queue data structure. --- include/qt_queue.h | 2 +- src/queue.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/qt_queue.h b/include/qt_queue.h index 31da4c196..a89c86efe 100644 --- a/include/qt_queue.h +++ b/include/qt_queue.h @@ -10,7 +10,7 @@ typedef struct qthread_queue_node_s { struct qthread_queue_node_s *_Atomic next; - qthread_t *thread; + qthread_t *_Atomic thread; } qthread_queue_node_t; typedef struct qthread_queue_NEMESIS_s { diff --git a/src/queue.c b/src/queue.c index 109c1eb20..e4357a38b 100644 --- a/src/queue.c +++ b/src/queue.c @@ -203,7 +203,7 @@ void INTERNAL qthread_queue_internal_nosync_enqueue(qthread_queue_nosync_t *q, assert(q); assert(t); - node->thread = t; + atomic_store_explicit(&node->thread, t, memory_order_relaxed); atomic_store_explicit(&node->next, NULL, memory_order_relaxed); if (atomic_load_explicit(&q->tail, memory_order_relaxed) == NULL) { atomic_store_explicit(&q->head, node, memory_order_relaxed); @@ -229,7 +229,7 @@ qthread_queue_internal_nosync_dequeue(qthread_queue_nosync_t *q) { &q->head, atomic_load_explicit(&node->next, memory_order_relaxed), memory_order_relaxed); - t = node->thread; + t = atomic_load_explicit(&node->thread, memory_order_relaxed); FREE_TQNODE(node); } return t; @@ -241,7 +241,7 @@ void INTERNAL qthread_queue_internal_NEMESIS_enqueue(qthread_queue_NEMESIS_t *q, node = ALLOC_TQNODE(); assert(node != NULL); - node->thread = t; + atomic_store_explicit(&node->thread, t, memory_order_relaxed); atomic_store_explicit(&node->next, NULL, memory_order_relaxed); prev = qt_internal_atomic_swap_ptr((void **)&(q->tail), node); @@ -279,7 +279,8 @@ qthread_queue_internal_NEMESIS_dequeue(qthread_queue_NEMESIS_t *q) { atomic_store_explicit(&dequeued->next, NULL, memory_order_relaxed); } } - qthread_t *retval = dequeued->thread; + qthread_t *retval = + atomic_load_explicit(&dequeued->thread, memory_order_relaxed); FREE_TQNODE(dequeued); return retval; } else { From e3e1f8f9fb3683c11341ce925238b33bdeffd2e0 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 21 Oct 2024 11:40:41 -0600 Subject: [PATCH 11/11] Disable the tsan/nemesis hwloc and binders builds in CI since we're getting un-suppress-able false positives there too. --- .github/workflows/CI.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 426f3753f..b0d250f4c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -249,6 +249,10 @@ jobs: scheduler: sherwood - sanitizer: thread scheduler: distrib + - sanitizer: thread + topology: hwloc + - sanitizer: thread + topology: binders env: CC: clang-19 CXX: clang++-19