Skip to content

Commit

Permalink
Added a rule for a health check alert (#630) (#689)
Browse files Browse the repository at this point in the history
* Fix quote escape problem with Prometheus rules

Signed-off-by: Antonio Mindov <[email protected]>
  • Loading branch information
rokn authored Aug 12, 2022
1 parent 8ac0f47 commit 9ead80c
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 45 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/mainnet-deploy-to-vms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ jobs:
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_READ_ONLY }}" >> .env
echo '${{ secrets.ENV_READ_ONLY }}' >> .env
- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_MAINNET_DAVE_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_MAINNET_BRIDGE_CONFIG }}" > ./bridge.yml && \
echo "${{ secrets.APP_MAINNET_ALERT_CONFIG }}" > ./monitoring/alertmanager/config.yml && \
echo "${{ secrets.APP_MAINNET_GRAFANA_CONFIG_ENV }}" > ./monitoring/grafana/config-overrides.env && \
echo "${{ secrets.APP_MAINNET_PROMETHEUS_CONFIG }}" > ./monitoring/prometheus/prometheus.yml && \
echo "${{ secrets.APP_MAINNET_PROMETHEUS_RULES_CONFIG }}" > ./monitoring/prometheus/rules.yml
echo '${{ secrets.APP_MAINNET_DAVE_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_MAINNET_BRIDGE_CONFIG }}' > ./bridge.yml && \
echo '${{ secrets.APP_MAINNET_ALERT_CONFIG }}' > ./monitoring/alertmanager/config.yml && \
echo '${{ secrets.APP_MAINNET_GRAFANA_CONFIG_ENV }}' > ./monitoring/grafana/config-overrides.env && \
echo '${{ secrets.APP_MAINNET_PROMETHEUS_CONFIG }}' > ./monitoring/prometheus/prometheus.yml && \
echo '${{ secrets.APP_MAINNET_PROMETHEUS_RULES_CONFIG }}' > ./monitoring/prometheus/rules.yml
- name: Copy files
run: |
Expand Down
40 changes: 20 additions & 20 deletions .github/workflows/testnet-deploy-to-vms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env
- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_ALICE_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml
echo '${{ secrets.APP_TESTNET_ALICE_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml
- name: Copy files
run: |
Expand All @@ -58,14 +58,14 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env
- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_BOB_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml
echo '${{ secrets.APP_TESTNET_BOB_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml
- name: Copy files
run: |
Expand All @@ -92,14 +92,14 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env
- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_CAROL_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml
echo '${{ secrets.APP_TESTNET_CAROL_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml
- name: Copy files
run: |
Expand All @@ -126,18 +126,18 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env
- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_DAVE_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml && \
echo "${{ secrets.APP_TESTNET_ALERT_CONFIG }}" > ./monitoring/alertmanager/config.yml && \
echo "${{ secrets.APP_TESTNET_GRAFANA_CONFIG_ENV }}" > ./monitoring/grafana/config-overrides.env && \
echo "${{ secrets.APP_TESTNET_PROMETHEUS_CONFIG }}" > ./monitoring/prometheus/prometheus.yml && \
echo "${{ secrets.APP_TESTNET_PROMETHEUS_RULES_CONFIG }}" > ./monitoring/prometheus/rules.yml
echo '${{ secrets.APP_TESTNET_DAVE_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml && \
echo '${{ secrets.APP_TESTNET_ALERT_CONFIG }}' > ./monitoring/alertmanager/config.yml && \
echo '${{ secrets.APP_TESTNET_GRAFANA_CONFIG_ENV }}' > ./monitoring/grafana/config-overrides.env && \
echo '${{ secrets.APP_TESTNET_PROMETHEUS_CONFIG }}' > ./monitoring/prometheus/prometheus.yml && \
echo '${{ secrets.APP_TESTNET_PROMETHEUS_RULES_CONFIG }}' > ./monitoring/prometheus/rules.yml
- name: Copy files
run: |
Expand Down
48 changes: 40 additions & 8 deletions examples/three-validators/monitoring/prometheus/rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: "Fee account amount: {{ $value }} HBAR"
#
Expand All @@ -18,8 +18,8 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: "Operator account amount: {{ $value }} HBAR"
#
Expand All @@ -31,8 +31,40 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'critical'
# group: 'validators'
# repeat_interval: 'long'
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: "Participation Rate: {{ $value }} %"
# description: "Participation Rate: {{ $value }} %"
# # Rules for health checking. Should be set according to validators count
# - alert: HealthyValidatorsMinor
# # Condition for alerting
# expr: count_validators_alive < 3
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "minor"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsWarning
# # Condition for alerting
# expr: count_validators_alive < 2
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "warning"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsCritical
# # Condition for alerting
# expr: count_validators_alive < 1
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: "Healthy validators: {{ $value }}"
52 changes: 42 additions & 10 deletions monitoring/prometheus/rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: 'Fee account amount: {{ $value }} HBAR'
# description: "Fee account amount: {{ $value }} HBAR"
#
# - alert: LowOperatorAccountAmount
# # Condition for alerting
# expr: operator_account_amount < 100
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: 'Operator account amount: {{ $value }} HBAR'
# description: "Operator account amount: {{ $value }} HBAR"
#
# - name: validators
# rules:
Expand All @@ -31,8 +31,40 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'critical'
# group: 'validators'
# repeat_interval: 'long'
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: 'Participation Rate: {{ $value }} %'
# description: "Participation Rate: {{ $value }} %"
# # Rules for health checking. Should be set according to validators count
# - alert: HealthyValidatorsMinor
# # Condition for alerting
# expr: count_validators_alive < 9
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "minor"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsWarning
# # Condition for alerting
# expr: count_validators_alive < 7
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "warning"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsCritical
# # Condition for alerting
# expr: count_validators_alive < 5
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: "Healthy validators: {{ $value }}"

0 comments on commit 9ead80c

Please sign in to comment.