From cec8b42cb391f48e578ed7860d749d6735f6ef8e Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Wed, 20 Sep 2023 20:34:35 +0200
Subject: [PATCH] MEDIUM: logs: atomically check and update the log sample
 index

The log server lock is pretty visible in perf top when using log samples
because it's taken for each server in turn while trying to validate and
update the log server's index. Let's change this for a CAS, since we have
the index and the range at hand now. This allow us to remove the logsrv
lock.

The test on 4 servers now shows a 3.7 times improvement thanks to much
lower contention. Without log sampling a test producing 4.4M logs/s
delivers 4.4M logs/s at 21 CPUs used, everything spent in the kernel.
After enabling 4 samples (1:4, 2:4, 3:4 and 4:4), the throughput would
previously drop to 1.13M log/s with 37 CPUs used and 75% spent in
process_send_log(). Now with this change, 4.25M logs/s are emitted,
using 26 CPUs and 22% in process_send_log(). That's a 3.7x throughput
improvement for a 30% global CPU usage reduction, but in practice it
mostly shows that the performance drop caused by having samples is much
less noticeable (each of the 4 servers has its index updated for each
log).

Note that in order to even avoid incrementing an index for each log srv
that is consulted, it would be more convenient to have a single index
per frontend and apply the modulus on each log server in turn to see if
the range has to be updated. It would then only perform one write per
range switch. However the place where this is done doesn't have access
to a frontend, so some changes would need to be performed for this, and
it would require to update the current range independently in each
logsrv, which is not necessarily easier since we don't know yet if we
can commit it.
---
 include/haproxy/log-t.h |  1 -
 src/log.c               | 43 ++++++++++++++++++++---------------------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/include/haproxy/log-t.h b/include/haproxy/log-t.h
index a503ef9afd62..467d31ca5f57 100644
--- a/include/haproxy/log-t.h
+++ b/include/haproxy/log-t.h
@@ -235,7 +235,6 @@ struct logsrv {
                 char *file;                     /* file where the logsrv appears */
                 int line;                       /* line where the logsrv appears */
         } conf;
-	__decl_thread(HA_SPINLOCK_T lock);
 };
 
 #endif /* _HAPROXY_LOG_T_H */
diff --git a/src/log.c b/src/log.c
index a03172bb7376..b40ac1eda528 100644
--- a/src/log.c
+++ b/src/log.c
@@ -770,7 +770,6 @@ struct logsrv *dup_logsrv(struct logsrv *def)
 	cpy->ring_name = NULL;
 	cpy->conf.file = NULL;
 	LIST_INIT(&cpy->list);
-	HA_SPIN_INIT(&cpy->lock);
 
 	/* special members */
 	if (def->ring_name) {
@@ -1006,7 +1005,7 @@ int parse_logsrv(char **args, struct list *logsrvs, int do_del, const char *file
 
 		cur_arg += 2;
 	}
-	HA_SPIN_INIT(&logsrv->lock);
+
 	/* parse the facility */
 	logsrv->facility = get_log_facility(args[cur_arg]);
 	if (logsrv->facility < 0) {
@@ -1831,28 +1830,28 @@ void process_send_log(struct list *logsrvs, int level, int facility,
 		if (logsrv->lb.smp_rgs) {
 			struct smp_log_range *smp_rg;
 			uint next_idx, curr_rg;
-			ullong curr_rg_idx;
-
-			HA_SPIN_LOCK(LOGSRV_LOCK, &logsrv->lock);
-			curr_rg_idx = logsrv->lb.curr_rg_idx;
-			next_idx = (curr_rg_idx & 0xFFFFFFFFU) + 1;
-			curr_rg  = curr_rg_idx >> 32;
-			smp_rg = &logsrv->lb.smp_rgs[curr_rg];
-
-			/* check if the index we're going to take is within range  */
-			in_range = smp_rg->low <= next_idx && next_idx <= smp_rg->high;
-			if (in_range) {
-				/* Let's consume this range. */
-				if (next_idx == smp_rg->high) {
-					/* If consumed, let's select the next range. */
-					curr_rg = (curr_rg + 1) % logsrv->lb.smp_rgs_sz;
+			ullong curr_rg_idx, next_rg_idx;
+
+			curr_rg_idx = _HA_ATOMIC_LOAD(&logsrv->lb.curr_rg_idx);
+			do {
+				next_idx = (curr_rg_idx & 0xFFFFFFFFU) + 1;
+				curr_rg  = curr_rg_idx >> 32;
+				smp_rg = &logsrv->lb.smp_rgs[curr_rg];
+
+				/* check if the index we're going to take is within range  */
+				in_range = smp_rg->low <= next_idx && next_idx <= smp_rg->high;
+				if (in_range) {
+					/* Let's consume this range. */
+					if (next_idx == smp_rg->high) {
+						/* If consumed, let's select the next range. */
+						curr_rg = (curr_rg + 1) % logsrv->lb.smp_rgs_sz;
+					}
 				}
-			}
 
-			next_idx = next_idx % logsrv->lb.smp_sz;
-			curr_rg_idx = ((ullong)curr_rg << 32) + next_idx;
-			logsrv->lb.curr_rg_idx = curr_rg_idx;
-			HA_SPIN_UNLOCK(LOGSRV_LOCK, &logsrv->lock);
+				next_idx = next_idx % logsrv->lb.smp_sz;
+				next_rg_idx = ((ullong)curr_rg << 32) + next_idx;
+			} while (!_HA_ATOMIC_CAS(&logsrv->lb.curr_rg_idx, &curr_rg_idx, next_rg_idx) &&
+				 __ha_cpu_relax());
 		}
 		if (in_range)
 			__do_send_log(logsrv, ++nblogger,  MAX(level, logsrv->minlvl),