Skip to content

Commit

Permalink
feat(slo): ability to use metric labels
Browse files Browse the repository at this point in the history
Now we are able to use labels in the recording rules
Signed-off-by: Hy3n4 <[email protected]>
  • Loading branch information
Hy3n4 committed Oct 31, 2023
1 parent e8e5b49 commit e694079
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 17 deletions.
45 changes: 33 additions & 12 deletions internal/controller/openslo/slo_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,11 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.
var totalRule monitoringv1.Rule
var goodRule monitoringv1.Rule
var badRule monitoringv1.Rule
var ratioRule monitoringv1.Rule
defaultRateWindow := "1m"
burnRateTimeWindows := []string{"1h", "6h", "3d"}
l := utils.LabelGeneratorParams{Slo: slo, Sli: sli}
m := utils.MetricLabelParams{Slo: slo, Sli: sli}

// for now, total and good are required. bad is optional and is calculated as (total - good) if not provided
// TODO: validate that the SLO budgeting method is Occurrences and that the SLIs are all ratio metrics in other case throw an error
Expand All @@ -214,7 +218,7 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.
sli.Spec.RatioMetric.Total.MetricSource.Spec,
defaultRateWindow,
))
totalRule.Labels = utils.GenerateMetricLabels(slo, sli)
totalRule.Labels = l.NewMetricLabelGenerator()

monitoringRules = append(monitoringRules, totalRule)

Expand All @@ -223,21 +227,21 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.
sli.Spec.RatioMetric.Good.MetricSource.Spec,
defaultRateWindow,
))
goodRule.Labels = utils.GenerateMetricLabels(slo, sli)
goodRule.Labels = l.NewMetricLabelGenerator()

monitoringRules = append(monitoringRules, goodRule)

basicRuleQuery := fmt.Sprintf("(1-%s) * %s[%s:%s] - (%s[%s:%s] - %s[%s:%s])",
basicRuleQuery := fmt.Sprintf("(1-%s) * sum(increase(%s{%s}[%s])) - (sum(increase(%s{%s}[%s])) - sum(increase(%s{%s}[%s])))",
slo.Spec.Objectives[0].Target,
totalRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
defaultRateWindow,
totalRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
defaultRateWindow,
goodRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
defaultRateWindow,
)

if sli.Spec.RatioMetric.Bad != (openslov1.MetricSpec{}) {
Expand All @@ -246,28 +250,45 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.
sli.Spec.RatioMetric.Bad.MetricSource.Spec,
defaultRateWindow,
))
badRule.Labels = utils.GenerateMetricLabels(slo, sli)
basicRuleQuery = fmt.Sprintf("(1-%s) * %s[%s:%s] - %s[%s:%s])",
badRule.Labels = l.NewMetricLabelGenerator()
basicRuleQuery = fmt.Sprintf("(1-%s) * sum(increase(%s{%s}[%s])) - sum(increase(%s{%s}[%s])))",
slo.Spec.Objectives[0].Target,
totalRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
defaultRateWindow,
badRule.Expr.StrVal,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
defaultRateWindow,
)
monitoringRules = append(monitoringRules, badRule)
}

mRule := monitoringv1.Rule{
Record: fmt.Sprint("osko_error_budget_available"),
Expr: intstr.Parse(fmt.Sprint(basicRuleQuery)),
Labels: utils.GenerateMetricLabels(slo, sli),
Labels: l.NewMetricLabelGenerator(),
}

monitoringRules = append(monitoringRules, mRule)

// Calculate Error ratios for 1h, 6h
// Calculate Error ratios for 1h, 6h, 3d
for _, timeWindow := range burnRateTimeWindows {
l.TimeWindow = timeWindow
ratioRule.Record = fmt.Sprintf("osko_sli_ratio")
ratioRule.Expr = intstr.Parse(fmt.Sprintf("(sum(increase(%s{%s}[%s]))-sum(increase(%s{%s}[%s])))/sum(increase(%s{%s}[%s]))",
totalRule.Record,
m.NewMetricLabelCompiler(),
timeWindow,
goodRule.Record,
m.NewMetricLabelCompiler(),
timeWindow,
totalRule.Record,
m.NewMetricLabelCompiler(),
timeWindow,
))
ratioRule.Labels = l.NewMetricLabelGenerator()
monitoringRules = append(monitoringRules, ratioRule)
}

rule := &monitoringv1.PrometheusRule{
TypeMeta: metav1.TypeMeta{
Expand Down
41 changes: 36 additions & 5 deletions internal/utils/common_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@ import (
"time"
)

type LabelGeneratorParams struct {
Slo *openslov1.SLO
Sli *openslov1.SLI
TimeWindow string
}

type MetricLabelParams struct {
Slo *openslov1.SLO
Sli *openslov1.SLI
TimeWindow string
Labels map[string]string
}

// UpdateCondition checks if the condition of the given type is already in the slice
// if the condition already exists and has the same status, return the unmodified conditions
// if the condition exists and has a different status, remove it and add the new one
Expand Down Expand Up @@ -69,12 +82,30 @@ func ExtractMetricNameFromQuery(query string) string {
return subStr
}

func GenerateMetricLabels(slo *openslov1.SLO, sli *openslov1.SLI) map[string]string {
func (m MetricLabelParams) NewMetricLabelCompiler() string {
window := string(m.Slo.Spec.TimeWindow[0].Duration)
if m.TimeWindow != "" {
window = m.TimeWindow
}

labelString := `sli_name="` + m.Sli.Name + `", slo_name="` + m.Slo.Name + `", service="` + m.Slo.Spec.Service + `", window="` + window + `"`
for k, v := range m.Labels {
labelString += `, ` + k + `="` + v + `"`
}

return labelString
}

func (l LabelGeneratorParams) NewMetricLabelGenerator() map[string]string {
window := string(l.Slo.Spec.TimeWindow[0].Duration)
if l.TimeWindow != "" {
window = l.TimeWindow
}
return map[string]string{
"sli_name": sli.Name,
"slo_name": slo.Name,
"service": slo.Spec.Service,
"window": string(slo.Spec.TimeWindow[0].Duration),
"sli_name": l.Sli.Name,
"slo_name": l.Slo.Name,
"service": l.Slo.Spec.Service,
"window": window,
}
}

Expand Down

0 comments on commit e694079

Please sign in to comment.