From e694079df77db8a860ce1ecf4e7156e3eb4da1c8 Mon Sep 17 00:00:00 2001 From: Hy3n4 Date: Tue, 31 Oct 2023 08:08:22 +0100 Subject: [PATCH] feat(slo): ability to use metric labels Now we are able to use labels in the recording rules Signed-off-by: Hy3n4 --- internal/controller/openslo/slo_controller.go | 45 ++++++++++++++----- internal/utils/common_utils.go | 41 ++++++++++++++--- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/internal/controller/openslo/slo_controller.go b/internal/controller/openslo/slo_controller.go index 78b1af8..ec2efee 100644 --- a/internal/controller/openslo/slo_controller.go +++ b/internal/controller/openslo/slo_controller.go @@ -205,7 +205,11 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1. var totalRule monitoringv1.Rule var goodRule monitoringv1.Rule var badRule monitoringv1.Rule + var ratioRule monitoringv1.Rule defaultRateWindow := "1m" + burnRateTimeWindows := []string{"1h", "6h", "3d"} + l := utils.LabelGeneratorParams{Slo: slo, Sli: sli} + m := utils.MetricLabelParams{Slo: slo, Sli: sli} // for now, total and good are required. bad is optional and is calculated as (total - good) if not provided // TODO: validate that the SLO budgeting method is Occurrences and that the SLIs are all ratio metrics in other case throw an error @@ -214,7 +218,7 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1. sli.Spec.RatioMetric.Total.MetricSource.Spec, defaultRateWindow, )) - totalRule.Labels = utils.GenerateMetricLabels(slo, sli) + totalRule.Labels = l.NewMetricLabelGenerator() monitoringRules = append(monitoringRules, totalRule) @@ -223,21 +227,21 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1. sli.Spec.RatioMetric.Good.MetricSource.Spec, defaultRateWindow, )) - goodRule.Labels = utils.GenerateMetricLabels(slo, sli) + goodRule.Labels = l.NewMetricLabelGenerator() monitoringRules = append(monitoringRules, goodRule) - basicRuleQuery := fmt.Sprintf("(1-%s) * %s[%s:%s] - (%s[%s:%s] - %s[%s:%s])", + basicRuleQuery := fmt.Sprintf("(1-%s) * sum(increase(%s{%s}[%s])) - (sum(increase(%s{%s}[%s])) - sum(increase(%s{%s}[%s])))", slo.Spec.Objectives[0].Target, totalRule.Record, + m.NewMetricLabelCompiler(), slo.Spec.TimeWindow[0].Duration, - defaultRateWindow, totalRule.Record, + m.NewMetricLabelCompiler(), slo.Spec.TimeWindow[0].Duration, - defaultRateWindow, goodRule.Record, + m.NewMetricLabelCompiler(), slo.Spec.TimeWindow[0].Duration, - defaultRateWindow, ) if sli.Spec.RatioMetric.Bad != (openslov1.MetricSpec{}) { @@ -246,15 +250,15 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1. sli.Spec.RatioMetric.Bad.MetricSource.Spec, defaultRateWindow, )) - badRule.Labels = utils.GenerateMetricLabels(slo, sli) - basicRuleQuery = fmt.Sprintf("(1-%s) * %s[%s:%s] - %s[%s:%s])", + badRule.Labels = l.NewMetricLabelGenerator() + basicRuleQuery = fmt.Sprintf("(1-%s) * sum(increase(%s{%s}[%s])) - sum(increase(%s{%s}[%s])))", slo.Spec.Objectives[0].Target, totalRule.Record, + m.NewMetricLabelCompiler(), slo.Spec.TimeWindow[0].Duration, - defaultRateWindow, badRule.Expr.StrVal, + m.NewMetricLabelCompiler(), slo.Spec.TimeWindow[0].Duration, - defaultRateWindow, ) monitoringRules = append(monitoringRules, badRule) } @@ -262,12 +266,29 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1. mRule := monitoringv1.Rule{ Record: fmt.Sprint("osko_error_budget_available"), Expr: intstr.Parse(fmt.Sprint(basicRuleQuery)), - Labels: utils.GenerateMetricLabels(slo, sli), + Labels: l.NewMetricLabelGenerator(), } monitoringRules = append(monitoringRules, mRule) - // Calculate Error ratios for 1h, 6h + // Calculate Error ratios for 1h, 6h, 3d + for _, timeWindow := range burnRateTimeWindows { + l.TimeWindow = timeWindow + ratioRule.Record = fmt.Sprintf("osko_sli_ratio") + ratioRule.Expr = intstr.Parse(fmt.Sprintf("(sum(increase(%s{%s}[%s]))-sum(increase(%s{%s}[%s])))/sum(increase(%s{%s}[%s]))", + totalRule.Record, + m.NewMetricLabelCompiler(), + timeWindow, + goodRule.Record, + m.NewMetricLabelCompiler(), + timeWindow, + totalRule.Record, + m.NewMetricLabelCompiler(), + timeWindow, + )) + ratioRule.Labels = l.NewMetricLabelGenerator() + monitoringRules = append(monitoringRules, ratioRule) + } rule := &monitoringv1.PrometheusRule{ TypeMeta: metav1.TypeMeta{ diff --git a/internal/utils/common_utils.go b/internal/utils/common_utils.go index 6c4b96c..9683526 100644 --- a/internal/utils/common_utils.go +++ b/internal/utils/common_utils.go @@ -9,6 +9,19 @@ import ( "time" ) +type LabelGeneratorParams struct { + Slo *openslov1.SLO + Sli *openslov1.SLI + TimeWindow string +} + +type MetricLabelParams struct { + Slo *openslov1.SLO + Sli *openslov1.SLI + TimeWindow string + Labels map[string]string +} + // UpdateCondition checks if the condition of the given type is already in the slice // if the condition already exists and has the same status, return the unmodified conditions // if the condition exists and has a different status, remove it and add the new one @@ -69,12 +82,30 @@ func ExtractMetricNameFromQuery(query string) string { return subStr } -func GenerateMetricLabels(slo *openslov1.SLO, sli *openslov1.SLI) map[string]string { +func (m MetricLabelParams) NewMetricLabelCompiler() string { + window := string(m.Slo.Spec.TimeWindow[0].Duration) + if m.TimeWindow != "" { + window = m.TimeWindow + } + + labelString := `sli_name="` + m.Sli.Name + `", slo_name="` + m.Slo.Name + `", service="` + m.Slo.Spec.Service + `", window="` + window + `"` + for k, v := range m.Labels { + labelString += `, ` + k + `="` + v + `"` + } + + return labelString +} + +func (l LabelGeneratorParams) NewMetricLabelGenerator() map[string]string { + window := string(l.Slo.Spec.TimeWindow[0].Duration) + if l.TimeWindow != "" { + window = l.TimeWindow + } return map[string]string{ - "sli_name": sli.Name, - "slo_name": slo.Name, - "service": slo.Spec.Service, - "window": string(slo.Spec.TimeWindow[0].Duration), + "sli_name": l.Sli.Name, + "slo_name": l.Slo.Name, + "service": l.Slo.Spec.Service, + "window": window, } }