From cec8b42cb391f48e578ed7860d749d6735f6ef8e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 20 Sep 2023 20:34:35 +0200 Subject: [PATCH] MEDIUM: logs: atomically check and update the log sample index The log server lock is pretty visible in perf top when using log samples because it's taken for each server in turn while trying to validate and update the log server's index. Let's change this for a CAS, since we have the index and the range at hand now. This allow us to remove the logsrv lock. The test on 4 servers now shows a 3.7 times improvement thanks to much lower contention. Without log sampling a test producing 4.4M logs/s delivers 4.4M logs/s at 21 CPUs used, everything spent in the kernel. After enabling 4 samples (1:4, 2:4, 3:4 and 4:4), the throughput would previously drop to 1.13M log/s with 37 CPUs used and 75% spent in process_send_log(). Now with this change, 4.25M logs/s are emitted, using 26 CPUs and 22% in process_send_log(). That's a 3.7x throughput improvement for a 30% global CPU usage reduction, but in practice it mostly shows that the performance drop caused by having samples is much less noticeable (each of the 4 servers has its index updated for each log). Note that in order to even avoid incrementing an index for each log srv that is consulted, it would be more convenient to have a single index per frontend and apply the modulus on each log server in turn to see if the range has to be updated. It would then only perform one write per range switch. However the place where this is done doesn't have access to a frontend, so some changes would need to be performed for this, and it would require to update the current range independently in each logsrv, which is not necessarily easier since we don't know yet if we can commit it. --- include/haproxy/log-t.h | 1 - src/log.c | 43 ++++++++++++++++++++--------------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/include/haproxy/log-t.h b/include/haproxy/log-t.h index a503ef9afd62..467d31ca5f57 100644 --- a/include/haproxy/log-t.h +++ b/include/haproxy/log-t.h @@ -235,7 +235,6 @@ struct logsrv { char *file; /* file where the logsrv appears */ int line; /* line where the logsrv appears */ } conf; - __decl_thread(HA_SPINLOCK_T lock); }; #endif /* _HAPROXY_LOG_T_H */ diff --git a/src/log.c b/src/log.c index a03172bb7376..b40ac1eda528 100644 --- a/src/log.c +++ b/src/log.c @@ -770,7 +770,6 @@ struct logsrv *dup_logsrv(struct logsrv *def) cpy->ring_name = NULL; cpy->conf.file = NULL; LIST_INIT(&cpy->list); - HA_SPIN_INIT(&cpy->lock); /* special members */ if (def->ring_name) { @@ -1006,7 +1005,7 @@ int parse_logsrv(char **args, struct list *logsrvs, int do_del, const char *file cur_arg += 2; } - HA_SPIN_INIT(&logsrv->lock); + /* parse the facility */ logsrv->facility = get_log_facility(args[cur_arg]); if (logsrv->facility < 0) { @@ -1831,28 +1830,28 @@ void process_send_log(struct list *logsrvs, int level, int facility, if (logsrv->lb.smp_rgs) { struct smp_log_range *smp_rg; uint next_idx, curr_rg; - ullong curr_rg_idx; - - HA_SPIN_LOCK(LOGSRV_LOCK, &logsrv->lock); - curr_rg_idx = logsrv->lb.curr_rg_idx; - next_idx = (curr_rg_idx & 0xFFFFFFFFU) + 1; - curr_rg = curr_rg_idx >> 32; - smp_rg = &logsrv->lb.smp_rgs[curr_rg]; - - /* check if the index we're going to take is within range */ - in_range = smp_rg->low <= next_idx && next_idx <= smp_rg->high; - if (in_range) { - /* Let's consume this range. */ - if (next_idx == smp_rg->high) { - /* If consumed, let's select the next range. */ - curr_rg = (curr_rg + 1) % logsrv->lb.smp_rgs_sz; + ullong curr_rg_idx, next_rg_idx; + + curr_rg_idx = _HA_ATOMIC_LOAD(&logsrv->lb.curr_rg_idx); + do { + next_idx = (curr_rg_idx & 0xFFFFFFFFU) + 1; + curr_rg = curr_rg_idx >> 32; + smp_rg = &logsrv->lb.smp_rgs[curr_rg]; + + /* check if the index we're going to take is within range */ + in_range = smp_rg->low <= next_idx && next_idx <= smp_rg->high; + if (in_range) { + /* Let's consume this range. */ + if (next_idx == smp_rg->high) { + /* If consumed, let's select the next range. */ + curr_rg = (curr_rg + 1) % logsrv->lb.smp_rgs_sz; + } } - } - next_idx = next_idx % logsrv->lb.smp_sz; - curr_rg_idx = ((ullong)curr_rg << 32) + next_idx; - logsrv->lb.curr_rg_idx = curr_rg_idx; - HA_SPIN_UNLOCK(LOGSRV_LOCK, &logsrv->lock); + next_idx = next_idx % logsrv->lb.smp_sz; + next_rg_idx = ((ullong)curr_rg << 32) + next_idx; + } while (!_HA_ATOMIC_CAS(&logsrv->lb.curr_rg_idx, &curr_rg_idx, next_rg_idx) && + __ha_cpu_relax()); } if (in_range) __do_send_log(logsrv, ++nblogger, MAX(level, logsrv->minlvl),