diff --git a/.circleci/config.yml b/.circleci/config.yml
index dadb14514..71c68ed0a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -27,7 +27,7 @@ jobs:
           make -j2
           make tests -j2
       - run:
-          command: timeout --foreground -k 10s 2m make check
+          command: timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
           no_output_timeout: 180s
 
   arm_clang:
@@ -58,7 +58,7 @@ jobs:
           make -j2
           make tests -j2
       - run:
-          command: timeout --foreground -k 10s 2m make check
+          command: timeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
           no_output_timeout: 180s
 
   arm_sanitizers:
@@ -96,7 +96,10 @@ jobs:
           make -j2
           make tests -j2
       - run:
-          command: timeout --foreground -k 10s 4m make check
+          command: |
+              export QTHREADS_DIR="$(pwd)"
+              if [[ "<< parameters.sanitizer >>" == "thread" ]]; then cd test/basics; fi
+              timeout --foreground -k 10s 4m make check || ( cd $QTHREADS_DIR && cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
           no_output_timeout: 120s
 
   arm_acfl:
@@ -134,7 +137,7 @@ jobs:
       - run:
           command: |
               export PATH=$PATH:/opt/arm/arm-linux-compiler-24.04_Ubuntu-22.04/bin
-              timeout --foreground -k 10s 4m make check
+              timeout --foreground -k 10s 4m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
           no_output_timeout: 180s
 
   nvc:
@@ -173,7 +176,7 @@ jobs:
           make tests -j2
       - run:
           command: |
-              timeout --foreground -k 10s 4m make check
+              timeout --foreground -k 10s 4m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
           no_output_timeout: 180s
 
   musl:
@@ -197,7 +200,7 @@ jobs:
           make -j2
           make tests -j2
       - run:
-          command: make check
+          command: make check  || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
           no_output_timeout: 180s
 
 workflows:
@@ -238,6 +241,24 @@ workflows:
               - scheduler: distrib
                 topology: hwloc
                 sanitizer: memory
+              - scheduler: sherwood
+                topology: 'no'
+                sanitizer: thread
+              - scheduler: sherwood
+                topology: hwloc
+                sanitizer: thread
+              - scheduler: sherwood
+                topology: binders
+                sanitizer: thread
+              - scheduler: distrib
+                topology: 'no'
+                sanitizer: thread
+              - scheduler: distrib
+                topology: hwloc
+                sanitizer: thread
+              - scheduler: distrib
+                topology: binders
+                sanitizer: thread
       - arm_acfl:
           matrix:
             parameters:
diff --git a/.cirrus.yml b/.cirrus.yml
index 7154de16b..eb3b1620b 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -45,7 +45,7 @@ osx_m1_task:
     # commented example for how to get a backtrace from CI usign lldb on OSX:
     #echo "settings set target.process.stop-on-exec false" > ~/.lldbinit
     #QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 lldb bash --batch --one-line 'process launch' --one-line-on-crash 'bt' --one-line-on-crash 'quit' -- test/basics/hello_world
-    gtimeout --foreground 3m make check
+    gtimeout --foreground 3m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
 
 freebsd_task:
   freebsd_instance:
@@ -68,7 +68,7 @@ freebsd_task:
     make -j$CIRRUS_CPU
     make tests -j$CIRRUS_CPU
   test_script: |
-    gtimeout --foreground -k 10s 2m make check
+    gtimeout --foreground -k 10s 2m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
 
 arm_linux_task:
   arm_container:
@@ -112,7 +112,7 @@ arm_linux_task:
     make -j$CIRRUS_CPU
     make tests -j$CIRRUS_CPU
   test_script: |
-    timeout --foreground -k 10s 2m make check
+    timeout --foreground -k 10s 5m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
 
 arm_linux_clang_task:
   arm_container:
@@ -168,5 +168,5 @@ arm_linux_clang_task:
     make -j$CIRRUS_CPU
     make tests -j$CIRRUS_CPU
   test_script: |
-    timeout --foreground -k 10s 2m make check
+    timeout --foreground -k 10s 5m make check  || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
 
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index bf4d1baae..b0d250f4c 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -35,7 +35,7 @@ jobs:
         make -j2
         make tests -j2
     - name: make check
-      run: timeout -k 10s --foreground 3m make check
+      run: timeout -k 10s --foreground 3m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 4
 
   linux-clang:
@@ -77,7 +77,7 @@ jobs:
         make -j2
         make tests -j2
     - name: make check
-      run: timeout -k 10s --foreground 6m make check
+      run: timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 7
 
   linux-icx:
@@ -116,7 +116,7 @@ jobs:
     - name: make check
       run: |
         source /opt/intel/oneapi/setvars.sh
-        timeout -k 10s --foreground 6m make check
+        timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 7
 
   linux-icc:
@@ -157,7 +157,7 @@ jobs:
     - name: make check
       run: |
         source /opt/intel/oneapi/setvars.sh
-        timeout -k 10s --foreground 6m make check
+        timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 7
 
   linux-aocc:
@@ -192,7 +192,7 @@ jobs:
         make tests -j2
     - name: make check
       run: |
-        timeout -k 10s --foreground 6m make check
+        timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 7
 
   mac:
@@ -228,7 +228,7 @@ jobs:
         # commented example for how to get a backtrace from CI usign lldb on OSX:
         #echo "settings set target.process.stop-on-exec false" > ~/.lldbinit
         #QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 lldb bash --batch --one-line 'process launch' --one-line-on-crash 'bt' --one-line-on-crash 'quit' -- test/basics/hello_world
-        gtimeout -k 10s --foreground 8m make check
+        gtimeout -k 10s --foreground 8m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 9
 
   sanitizers:
@@ -245,6 +245,14 @@ jobs:
             topology: hwloc
           - sanitizer: memory
             topology: binders
+          - sanitizer: thread
+            scheduler: sherwood
+          - sanitizer: thread
+            scheduler: distrib
+          - sanitizer: thread
+            topology: hwloc
+          - sanitizer: thread
+            topology: binders
     env:
       CC: clang-19
       CXX: clang++-19
@@ -276,7 +284,10 @@ jobs:
         make -j2
         make tests -j2
     - name: make check
-      run: timeout -k 10s --foreground 8m make check
+      run: |
+        export QTHREADS_DIR="$(pwd)"
+        if [[ "${{ matrix.sanitizer }}" == "thread" ]]; then cd test/basics; fi
+        timeout -k 10s --foreground 8m make check || ( cd $QTHREADS_DIR && cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 9
 
   linux-thorough:
@@ -319,7 +330,7 @@ jobs:
         make -j2
         make tests -j2
     - name: make check
-      run: timeout -k 10s --foreground 6m make check
+      run: timeout -k 10s --foreground 6m make check || ( cat test/basics/test-suite.log && cat test/features/test-suite.log && cat test/stress/test-suite.log && exit 1 )
       timeout-minutes: 7
 
   clang-format:
diff --git a/include/qt_queue.h b/include/qt_queue.h
index ae8812fb4..a89c86efe 100644
--- a/include/qt_queue.h
+++ b/include/qt_queue.h
@@ -9,24 +9,25 @@
  */
 
 typedef struct qthread_queue_node_s {
-  struct qthread_queue_node_s *next;
-  qthread_t *thread;
+  struct qthread_queue_node_s *_Atomic next;
+  qthread_t *_Atomic thread;
 } qthread_queue_node_t;
 
 typedef struct qthread_queue_NEMESIS_s {
   /* The First Cacheline */
-  void *head;
-  void *tail;
-  uint8_t pad1[CACHELINE_WIDTH - (2 * sizeof(void *))];
+  void *_Atomic head;
+  uint8_t pad1[CACHELINE_WIDTH - sizeof(void *)];
+  void *_Atomic tail;
+  uint8_t pad2[CACHELINE_WIDTH - sizeof(void *)];
   /* The Second Cacheline */
-  aligned_t length;
+  _Atomic aligned_t length;
   void *shadow_head;
-  uint8_t pad2[CACHELINE_WIDTH - sizeof(void *) - sizeof(aligned_t)];
+  uint8_t pad3[CACHELINE_WIDTH - sizeof(void *) - sizeof(aligned_t)];
 } qthread_queue_NEMESIS_t;
 
 typedef struct qthread_queue_nosync_s {
-  qthread_queue_node_t *head;
-  qthread_queue_node_t *tail;
+  qthread_queue_node_t *_Atomic head;
+  qthread_queue_node_t *_Atomic tail;
 } qthread_queue_nosync_t;
 
 typedef struct qthread_queue_capped_s {
diff --git a/include/qt_shepherd_innards.h b/include/qt_shepherd_innards.h
index 19b073a0e..f2833227c 100644
--- a/include/qt_shepherd_innards.h
+++ b/include/qt_shepherd_innards.h
@@ -36,7 +36,7 @@ struct qthread_worker_s {
   qthread_shepherd_t *shepherd;
   struct qthread_s **nostealbuffer;
   struct qthread_s **stealbuffer;
-  qthread_t *current;
+  qthread_t *_Atomic current;
   qthread_worker_id_t unique_id;
   qthread_worker_id_t worker_id;
   qthread_worker_id_t packed_worker_id;
diff --git a/src/qthread.c b/src/qthread.c
index 9ede8ab69..7cf8d63db 100644
--- a/src/qthread.c
+++ b/src/qthread.c
@@ -334,7 +334,7 @@ static void *qthread_master(void *arg) {
   qt_threadqueue_t *threadqueue;
   qt_threadqueue_private_t *localqueue = NULL;
   qthread_t *t;
-  qthread_t **current;
+  qthread_t *_Atomic *current;
   int done = 0;
 
   assert(me != NULL);
@@ -444,8 +444,9 @@ static void *qthread_master(void *arg) {
 #endif
         qthread_exec(t, &my_context);
 
-        t = *current;    // necessary for direct-swap sanity
-        *current = NULL; // neessary for "queue sanity"
+        t = *current; // necessary for direct-swap sanity
+        atomic_store_explicit(
+          current, NULL, memory_order_relaxed); // neessary for "queue sanity"
 
         /* now clean up, based on the thread's state */
         switch (atomic_load_explicit(&t->thread_state, memory_order_relaxed)) {
@@ -1340,7 +1341,8 @@ qthread_readstate(const enum introspective_state type) { /*{{{ */
         sum += qt_threadqueue_advisory_queuelen(sheps[s].ready);
         qthread_worker_t const *wkrs = sheps[s].workers;
         for (qthread_worker_id_t w = 0; w < qlib->nworkerspershep; w++) {
-          sum += (wkrs[w].current != NULL);
+          sum += (atomic_load_explicit(&wkrs[w].current,
+                                       memory_order_relaxed) != NULL);
         }
       }
       return sum;
@@ -1351,7 +1353,8 @@ qthread_readstate(const enum introspective_state type) { /*{{{ */
       for (qthread_shepherd_id_t s = 0; s < qlib->nshepherds; s++) {
         qthread_worker_t const *wkrs = sheps[s].workers;
         for (qthread_worker_id_t w = 0; w < qlib->nworkerspershep; w++) {
-          count += (wkrs[w].current != NULL);
+          count += (atomic_load_explicit(&wkrs[w].current,
+                                         memory_order_relaxed) != NULL);
         }
       }
       return count;
diff --git a/src/queue.c b/src/queue.c
index a074b423f..e4357a38b 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -54,7 +54,8 @@ qthread_queue_t API_FUNC qthread_queue_create(uint8_t flags, aligned_t length) {
 aligned_t API_FUNC qthread_queue_length(qthread_queue_t q) {
   assert(q);
   switch (q->type) {
-    case NEMESIS_LENGTH: return q->q.nemesis.length;
+    case NEMESIS_LENGTH:
+      return atomic_load_explicit(&q->q.nemesis.length, memory_order_relaxed);
     case CAPPED: return q->q.capped.membercount;
     default: return 0;
   }
@@ -78,7 +79,8 @@ void INTERNAL qthread_queue_internal_enqueue(qthread_queue_t q, qthread_t *t) {
       break;
     case NEMESIS_LENGTH:
       qthread_queue_internal_NEMESIS_enqueue(&q->q.nemesis, t);
-      qthread_incr(&q->q.nemesis.length, 1);
+      atomic_fetch_add_explicit(
+        &q->q.nemesis.length, 1ull, memory_order_relaxed);
       break;
     case CAPPED: qthread_queue_internal_capped_enqueue(&q->q.capped, t); break;
     case MTS: QTHREAD_TRAP();
@@ -110,7 +112,8 @@ int API_FUNC qthread_queue_release_one(qthread_queue_t q) {
       break;
     case NEMESIS_LENGTH:
       t = qthread_queue_internal_NEMESIS_dequeue(&q->q.nemesis);
-      qthread_incr(&q->q.nemesis.length, (aligned_t)-1);
+      atomic_fetch_add_explicit(
+        &q->q.nemesis.length, (aligned_t)-1, memory_order_relaxed);
       break;
     case CAPPED: t = qthread_queue_internal_capped_dequeue(&q->q.capped); break;
     default: QTHREAD_TRAP();
@@ -142,13 +145,15 @@ int API_FUNC qthread_queue_release_all(qthread_queue_t q) {
       }
       break;
     case NEMESIS_LENGTH: {
-      aligned_t const count = q->q.nemesis.length;
+      aligned_t const count =
+        atomic_load_explicit(&q->q.nemesis.length, memory_order_relaxed);
       for (aligned_t c = 0; c < count; c++) {
         t = qthread_queue_internal_NEMESIS_dequeue(&q->q.nemesis);
         assert(t);
         if (t) { qthread_queue_internal_launch(t, shep); }
       }
-      qthread_incr(&q->q.nemesis.length, -count);
+      atomic_fetch_add_explicit(
+        &q->q.nemesis.length, -count, memory_order_relaxed);
       break;
     }
     case CAPPED: {
@@ -198,14 +203,17 @@ void INTERNAL qthread_queue_internal_nosync_enqueue(qthread_queue_nosync_t *q,
   assert(q);
   assert(t);
 
-  node->thread = t;
-  node->next = NULL;
-  if (q->tail == NULL) {
-    q->head = node;
+  atomic_store_explicit(&node->thread, t, memory_order_relaxed);
+  atomic_store_explicit(&node->next, NULL, memory_order_relaxed);
+  if (atomic_load_explicit(&q->tail, memory_order_relaxed) == NULL) {
+    atomic_store_explicit(&q->head, node, memory_order_relaxed);
   } else {
-    q->tail->next = node;
+    atomic_store_explicit(
+      &atomic_load_explicit(&q->tail, memory_order_relaxed)->next,
+      node,
+      memory_order_relaxed);
   }
-  q->tail = node;
+  atomic_store_explicit(&q->tail, node, memory_order_relaxed);
 }
 
 qthread_t INTERNAL *
@@ -215,10 +223,13 @@ qthread_queue_internal_nosync_dequeue(qthread_queue_nosync_t *q) {
 
   assert(q);
 
-  node = q->head;
+  node = atomic_load_explicit(&q->head, memory_order_relaxed);
   if (node) {
-    q->head = node->next;
-    t = node->thread;
+    atomic_store_explicit(
+      &q->head,
+      atomic_load_explicit(&node->next, memory_order_relaxed),
+      memory_order_relaxed);
+    t = atomic_load_explicit(&node->thread, memory_order_relaxed);
     FREE_TQNODE(node);
   }
   return t;
@@ -230,41 +241,46 @@ void INTERNAL qthread_queue_internal_NEMESIS_enqueue(qthread_queue_NEMESIS_t *q,
 
   node = ALLOC_TQNODE();
   assert(node != NULL);
-  node->thread = t;
-  node->next = NULL;
+  atomic_store_explicit(&node->thread, t, memory_order_relaxed);
+  atomic_store_explicit(&node->next, NULL, memory_order_relaxed);
 
   prev = qt_internal_atomic_swap_ptr((void **)&(q->tail), node);
   if (prev == NULL) {
-    q->head = node;
+    atomic_store_explicit(&q->head, node, memory_order_relaxed);
   } else {
-    prev->next = node;
+    atomic_store_explicit(&prev->next, node, memory_order_relaxed);
   }
 }
 
 qthread_t INTERNAL *
 qthread_queue_internal_NEMESIS_dequeue(qthread_queue_NEMESIS_t *q) {
   if (!q->shadow_head) {
-    if (!q->head) { return NULL; }
-    q->shadow_head = q->head;
-    q->head = NULL;
+    if (!atomic_load_explicit(&q->head, memory_order_relaxed)) { return NULL; }
+    q->shadow_head = atomic_load_explicit(&q->head, memory_order_relaxed);
+    atomic_store_explicit(&q->head, NULL, memory_order_relaxed);
   }
 
   qthread_queue_node_t *const dequeued = q->shadow_head;
   if (dequeued != NULL) {
-    if (dequeued->next != NULL) {
-      q->shadow_head = dequeued->next;
-      dequeued->next = NULL;
+    if (atomic_load_explicit(&dequeued->next, memory_order_relaxed) != NULL) {
+      q->shadow_head =
+        atomic_load_explicit(&dequeued->next, memory_order_relaxed);
+      atomic_store_explicit(&dequeued->next, NULL, memory_order_relaxed);
     } else {
       qthread_queue_node_t *old;
       q->shadow_head = NULL;
-      old = qthread_cas_ptr(&(q->tail), dequeued, NULL);
+      old = qthread_cas_ptr((void **)&(q->tail), dequeued, NULL);
       if (old != dequeued) {
-        while (dequeued->next == NULL) SPINLOCK_BODY();
-        q->shadow_head = dequeued->next;
-        dequeued->next = NULL;
+        while (atomic_load_explicit(&dequeued->next, memory_order_relaxed) ==
+               NULL)
+          SPINLOCK_BODY();
+        q->shadow_head =
+          atomic_load_explicit(&dequeued->next, memory_order_relaxed);
+        atomic_store_explicit(&dequeued->next, NULL, memory_order_relaxed);
       }
     }
-    qthread_t *retval = dequeued->thread;
+    qthread_t *retval =
+      atomic_load_explicit(&dequeued->thread, memory_order_relaxed);
     FREE_TQNODE(dequeued);
     return retval;
   } else {
diff --git a/src/threadqueues/nemesis_threadqueues.c b/src/threadqueues/nemesis_threadqueues.c
index cee6ffbcd..85a27c5c0 100644
--- a/src/threadqueues/nemesis_threadqueues.c
+++ b/src/threadqueues/nemesis_threadqueues.c
@@ -58,7 +58,7 @@ struct _qt_threadqueue {
   alignas(CACHELINE_WIDTH) NEMESIS_queue q;
   /* the following is for estimating a queue's "busy" level, and is not
    * guaranteed accurate (that would be a race condition) */
-  saligned_t advisory_queuelen;
+  _Atomic saligned_t advisory_queuelen;
 #ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
   uint32_t frustration;
   QTHREAD_COND_DECL(trigger);
@@ -180,7 +180,8 @@ void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{ */
     if (node) {
       qthread_t *retval = node->thread;
       assert(atomic_load_explicit(&node->next, memory_order_relaxed) == NULL);
-      (void)qthread_incr(&(q->advisory_queuelen), (aligned_t)-1);
+      atomic_fetch_add_explicit(
+        &q->advisory_queuelen, (aligned_t)-1, memory_order_relaxed);
       FREE_TQNODE(node);
       qthread_thread_free(retval);
     } else {
@@ -231,7 +232,7 @@ void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q,
   } else {
     atomic_store_explicit(&prev->next, node, memory_order_relaxed);
   }
-  (void)qthread_incr(&(q->advisory_queuelen), 1);
+  atomic_fetch_add_explicit(&q->advisory_queuelen, 1, memory_order_relaxed);
 #ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
   /* awake waiter */
   /* Yes, this needs to be here, to prevent reading frustration being hoisted
@@ -256,7 +257,7 @@ void INTERNAL qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q,
 ssize_t INTERNAL
 qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{ */
   assert(q);
-  return q->advisory_queuelen;
+  return atomic_load_explicit(&q->advisory_queuelen, memory_order_relaxed);
 } /*}}} */
 
 qthread_t INTERNAL *
@@ -297,7 +298,8 @@ qt_scheduler_get_thread(qt_threadqueue_t *q,
   }
   assert(node);
   assert(atomic_load_explicit(&node->next, memory_order_relaxed) == NULL);
-  (void)qthread_incr(&(q->advisory_queuelen), (aligned_t)-1);
+  atomic_fetch_add_explicit(
+    &q->advisory_queuelen, (aligned_t)-1, memory_order_relaxed);
   retval = node->thread;
   FREE_TQNODE(node);
   return retval;
diff --git a/test/basics/queue.c b/test/basics/queue.c
index f8c459414..0cb249a62 100644
--- a/test/basics/queue.c
+++ b/test/basics/queue.c
@@ -1,17 +1,20 @@
-#include "argparsing.h"
 #include <assert.h>
-#include <qthread/qthread.h>
+#include <stdatomic.h>
 #include <stdio.h>
 #include <stdlib.h>
 
-aligned_t threads_in = 0;
+#include "qthread/qthread.h"
+
+#include "argparsing.h"
+
+_Atomic aligned_t threads_in = 0;
 aligned_t awoke = 0;
 int THREADS_ENQUEUED = 100;
 
 qthread_queue_t the_queue;
 
 static aligned_t tobequeued(void *arg) {
-  qthread_incr(&threads_in, 1);
+  atomic_fetch_add_explicit(&threads_in, 1, memory_order_relaxed);
   // iprintf("\tOne thread about to join the queue...\n");
   qthread_queue_join(the_queue);
   // iprintf("\tAwoke from the queue! %p\n", qthread_retloc());
@@ -54,7 +57,7 @@ int main(int argc, char *argv[]) {
   ret = qthread_readFF(NULL, &return_value);
   test_check(ret == QTHREAD_SUCCESS);
 
-  test_check(threads_in == 1);
+  test_check(atomic_load_explicit(&threads_in, memory_order_relaxed) == 1);
   test_check(awoke == 1);
   test_check(qthread_queue_length(the_queue) == 0);
   // This relies on approximate estimates, so it's not reliable to test here.
@@ -64,7 +67,7 @@ int main(int argc, char *argv[]) {
   iprintf("---------------------------------------------------------\n");
   iprintf("\tMULTI THREAD TEST\n\n");
 
-  threads_in = 0;
+  atomic_store_explicit(&threads_in, 0, memory_order_relaxed);
   awoke = 0;
   aligned_t *retvals = malloc(sizeof(aligned_t) * THREADS_ENQUEUED);
   iprintf("1/6 Spawning %u threads to be queued...\n", THREADS_ENQUEUED);
@@ -76,7 +79,8 @@ int main(int argc, char *argv[]) {
   iprintf("2/6 Waiting for %u threads to queue themselves...\n",
           THREADS_ENQUEUED);
   while (qthread_queue_length(the_queue) != THREADS_ENQUEUED) qthread_yield();
-  test_check(threads_in == THREADS_ENQUEUED);
+  test_check(atomic_load_explicit(&threads_in, memory_order_relaxed) ==
+             THREADS_ENQUEUED);
   test_check(qthread_readstate(NODE_BUSYNESS) == 1);
 
   iprintf("3/6 Releasing a single thread...\n");