Skip to content

Commit

Permalink
OPTIM: vars: use a cebtree instead of a list for variable names
Browse files Browse the repository at this point in the history
Configs involving many variables can start to eat a lot of CPU in name
lookups. The reason is that the names themselves are dynamic in that
they are relative to dynamic objects (sessions, streams, etc), so
there's no fixed index for example. The current implementation relies
on a standard linked list, and in order to speed up lookups and avoid
comparing strings, only a 64-bit hash of the variable's name is stored
and compared everywhere.

But with just 100 variables and 1000 accesses in a config, it's clearly
visible that variable name lookup can reach 56% CPU with a config
generated this way:

  for i in {0..100}; do
    printf "\thttp-request set-var(txn.var%04d) int(%d)" $i $i;
    for j in {1..10}; do [ $i -lt $j ] || printf ",add(txn.var%04d)" $((i-j)); done;
    echo;
  done

The performance and a 4-core skylake 4.4 GHz reaches 85k RPS with a perf
profile showing:

  Samples: 170K of event 'cycles', Event count (approx.): 142378815419
  Overhead  Shared Object            Symbol
    56.39%  haproxy                  [.] var_to_smp
     6.65%  haproxy                  [.] var_set.part.0
     5.76%  haproxy                  [.] sample_process_cnv
     3.23%  haproxy                  [.] sample_conv_var2smp
     2.88%  haproxy                  [.] sample_conv_arith_add
     2.33%  haproxy                  [.] __pool_alloc
     2.19%  haproxy                  [.] action_store
     2.13%  haproxy                  [.] vars_get_by_desc
     1.87%  haproxy                  [.] smp_dup

[above, var_to_smp() calls var_get() under the read lock].

By switching to a binary tree, the cost is significantly lower, the
performance reaches 117k RPS (+37%) with this profile:

  Samples: 170K of event 'cycles', Event count (approx.): 142323631229
  Overhead  Shared Object            Symbol
    40.22%  haproxy                  [.] cebu64_lookup
     7.12%  haproxy                  [.] sample_process_cnv
     6.15%  haproxy                  [.] var_to_smp
     4.75%  haproxy                  [.] cebu64_insert
     3.79%  haproxy                  [.] sample_conv_var2smp
     3.40%  haproxy                  [.] cebu64_delete
     3.10%  haproxy                  [.] sample_conv_arith_add
     2.36%  haproxy                  [.] action_store
     2.32%  haproxy                  [.] __pool_alloc
     2.08%  haproxy                  [.] vars_get_by_desc
     1.96%  haproxy                  [.] smp_dup
     1.75%  haproxy                  [.] var_set.part.0
     1.74%  haproxy                  [.] cebu64_first
     1.07%  [kernel]                 [k] aq_hw_read_reg
     1.03%  haproxy                  [.] pool_put_to_cache
     1.00%  haproxy                  [.] sample_process

The performance lowers a bit earlier than with the list however. What
can be seen is that the performance maintains a plateau till 25 vars,
starts degrading a little bit for the tree while it remains stable till
28 vars for the list. Then both cross at 42 vars and the list continues
to degrade doing a hyperbole while the tree resists better. The biggest
loss is at around 32 variables where the list stays 10% higher.

Regardless, given the extremely narrow band where the list is better, it
looks relevant to switch to this in order to preserve the almost linear
performance of large setups. For example at 1000 variables and 10k
lookups, the tree is 18 times faster than the list.

In addition this reduces the size of the struct vars by 8 bytes since
there's a single pointer, though it could make sense to re-invest them
into a secondary head for example.
  • Loading branch information
wtarreau committed Sep 15, 2024
1 parent a0205f9 commit 47ec7c6
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 19 deletions.
7 changes: 4 additions & 3 deletions include/haproxy/vars-t.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <haproxy/sample_data-t.h>
#include <haproxy/thread-t.h>
#include <import/cebtree.h>

/* flags used when setting/clearing variables */
#define VF_CREATEONLY 0x00000001 // do nothing if the variable already exists
Expand All @@ -48,7 +49,7 @@ enum vars_scope {
};

struct vars {
struct list head;
struct ceb_node *name_root;
enum vars_scope scope;
unsigned int size;
__decl_thread(HA_RWLOCK_T rwlock);
Expand All @@ -64,8 +65,8 @@ struct var_desc {
};

struct var {
struct list l; /* Used for chaining vars. */
uint64_t name_hash; /* XXH3() of the variable's name */
struct ceb_node node; /* Used for chaining vars. */
uint64_t name_hash; /* XXH3() of the variable's name, must be just after node */
uint flags; // VF_*
/* 32-bit hole here */
struct sample_data data; /* data storage. */
Expand Down
12 changes: 8 additions & 4 deletions include/haproxy/vars.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#ifndef _HAPROXY_VARS_H
#define _HAPROXY_VARS_H

#include <import/cebu64_tree.h>

#include <haproxy/api-t.h>
#include <haproxy/session-t.h>
#include <haproxy/stream-t.h>
Expand All @@ -34,7 +36,7 @@ struct arg;

void vars_init_head(struct vars *vars, enum vars_scope scope);
void var_accounting_diff(struct vars *vars, struct session *sess, struct stream *strm, int size);
unsigned int var_clear(struct var *var, int force);
unsigned int var_clear(struct vars *vars, struct var *var, int force);
void vars_prune_per_sess(struct vars *vars);
int var_set(const struct var_desc *desc, struct sample *smp, uint flags);
int var_unset(const struct var_desc *desc, struct sample *smp);
Expand Down Expand Up @@ -78,11 +80,13 @@ static inline void vars_rdunlock(struct vars *vars)
*/
static inline void vars_prune(struct vars *vars, struct session *sess, struct stream *strm)
{
struct var *var, *tmp;
struct ceb_node *node;
struct var *var;
unsigned int size = 0;

list_for_each_entry_safe(var, tmp, &vars->head, l) {
size += var_clear(var, 1);
while ((node = cebu64_first(&vars->name_root))) {
var = container_of(node, struct var, node);
size += var_clear(vars, var, 1);
}

if (!size)
Expand Down
29 changes: 17 additions & 12 deletions src/vars.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <haproxy/vars.h>
#include <haproxy/xxhash.h>

#include <import/cebu64_tree.h>


/* This contains a pool of struct vars */
DECLARE_STATIC_POOL(var_pool, "vars", sizeof(struct var));
Expand Down Expand Up @@ -172,7 +174,7 @@ static int var_accounting_add(struct vars *vars, struct session *sess, struct st
* using. If the variable is marked "VF_PERMANENT", the sample_data is only
* reset to SMP_T_ANY unless <force> is non nul. Returns the freed size.
*/
unsigned int var_clear(struct var *var, int force)
unsigned int var_clear(struct vars *vars, struct var *var, int force)
{
unsigned int size = 0;

Expand All @@ -188,7 +190,7 @@ unsigned int var_clear(struct var *var, int force)
var->data.type = SMP_T_ANY;

if (!(var->flags & VF_PERMANENT) || force) {
LIST_DELETE(&var->l);
cebu64_delete(&vars->name_root, &var->node);
pool_free(var_pool, var);
size += sizeof(struct var);
}
Expand All @@ -200,11 +202,13 @@ unsigned int var_clear(struct var *var, int force)
*/
void vars_prune_per_sess(struct vars *vars)
{
struct var *var, *tmp;
struct ceb_node *node;
struct var *var;
unsigned int size = 0;

list_for_each_entry_safe(var, tmp, &vars->head, l) {
size += var_clear(var, 1);
while ((node = cebu64_first(&vars->name_root))) {
var = container_of(node, struct var, node);
size += var_clear(vars, var, 1);
}

if (!size)
Expand All @@ -219,7 +223,7 @@ void vars_prune_per_sess(struct vars *vars)
/* This function initializes a variables list head */
void vars_init_head(struct vars *vars, enum vars_scope scope)
{
LIST_INIT(&vars->head);
vars->name_root = NULL;
vars->scope = scope;
vars->size = 0;
HA_RWLOCK_INIT(&vars->rwlock);
Expand Down Expand Up @@ -327,11 +331,12 @@ static int vars_fill_desc(const char *name, int len, struct var_desc *desc, char
*/
static struct var *var_get(struct vars *vars, uint64_t name_hash)
{
struct var *var;
struct ceb_node *node;

node = cebu64_lookup(&vars->name_root, name_hash);
if (node)
return container_of(node, struct var, node);

list_for_each_entry(var, &vars->head, l)
if (var->name_hash == name_hash)
return var;
return NULL;
}

Expand Down Expand Up @@ -428,10 +433,10 @@ int var_set(const struct var_desc *desc, struct sample *smp, uint flags)
var = pool_alloc(var_pool);
if (!var)
goto unlock;
LIST_APPEND(&vars->head, &var->l);
var->name_hash = desc->name_hash;
var->flags = flags & VF_PERMANENT;
var->data.type = SMP_T_ANY;
cebu64_insert(&vars->name_root, &var->node);
}

/* A variable of type SMP_T_ANY is considered as unset (either created
Expand Down Expand Up @@ -561,7 +566,7 @@ int var_unset(const struct var_desc *desc, struct sample *smp)
vars_wrlock(vars);
var = var_get(vars, desc->name_hash);
if (var) {
size = var_clear(var, 0);
size = var_clear(vars, var, 0);
var_accounting_diff(vars, smp->sess, smp->strm, -size);
}
vars_wrunlock(vars);
Expand Down

0 comments on commit 47ec7c6

Please sign in to comment.