Skip to content
This repository has been archived by the owner on Jan 11, 2021. It is now read-only.

[W.I.P] Node monitoring #42

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/kube.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ class Kubernetes {
: this.getAllPodsInCluster();
}

async getNodes() {
let nodes = await this.kube.nodes.get();

return nodes.items;
}

async getContainerStatuses() {
let pods = await this.getWatchedPods();
let out = [];
Expand Down
2 changes: 1 addition & 1 deletion src/monitors/index.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
module.exports = [require('./waitingpods'), require('./longnotready')];
module.exports = [require('./waitingpods'), require('./longnotready'), require('./nodehealth')];
30 changes: 27 additions & 3 deletions src/monitors/longnotready.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class PodLongNotReady extends EventEmitter {
constructor() {
super();
this.minimumTime = config.get('not_ready_min_time');
this.alerted = {};
}

start() {
Expand All @@ -23,7 +24,7 @@ class PodLongNotReady extends EventEmitter {
let pods = await kube.getWatchedPods();

for (let pod of pods) {
let messageProps = {};
this.messageProps = {};
let annotations = pod.metadata.annotations;
if (annotations) {
// Ignore pod if the annotation is set and evaluates to true
Expand All @@ -45,16 +46,19 @@ class PodLongNotReady extends EventEmitter {
);

if (readyStatus.length === 0) {
this.checkRecovery(pod, readyStatus);
continue;
}

readyStatus = readyStatus[0];

if (readyStatus.status === 'True') {
this.checkRecovery(pod, readyStatus);
continue;
}

if (readyStatus.reason === 'PodCompleted') {
this.checkRecovery(pod, readyStatus);
continue;
}

Expand All @@ -74,6 +78,8 @@ class PodLongNotReady extends EventEmitter {
key = pod.metadata.ownerReferences[0].name;
}

this.messageProps._key = key;

this.emit('message', {
fallback: `Pod ${pod.metadata.namespace}/${
pod.metadata.name
Expand All @@ -83,8 +89,26 @@ class PodLongNotReady extends EventEmitter {
pod.metadata.name
}: ${readyStatus.reason || 'Pod not ready'}`,
text: readyStatus.message || 'Pod not ready',
_key: key,
...messageProps,
...this.messageProps,
});
this.alerted[pod.metadata.name] = pod;
}
}

checkRecovery(item, readyStatus) {
if(this.alerted[item.metadata.name]) {
delete this.alerted[item.metadata.name]
this.emit('message', {
fallback: `Pod ${item.metadata.namespace}/${
item.metadata.name
} is ready: ${readyStatus.reason} - ${readyStatus.message}`,
color: 'good',
title: `${item.metadata.namespace}/${
item.metadata.name
}: ${readyStatus.reason || 'Pod is ready'}`,
text: readyStatus.message || 'Pod is ready',
...this.messageProps,
_key: this.messageProps._key + "recovery"
});
}
}
Expand Down
68 changes: 68 additions & 0 deletions src/monitors/nodehealth.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
const EventEmitter = require('events');
const config = require('config');
const kube = require('../kube');

class NodeStatus extends EventEmitter {
constructor() {
super();
this.alerted = {};
}

start() {
setInterval(() => {
this.check();
}, config.get('interval'));

return this;
}

async check() {
let nodes = await kube.getNodes();

for (let item of nodes) {

for (let status of item.status.conditions) {

if((status.status == "False" && status.type !== "Ready") || (status.status !== "False" && status.type == "Ready")) {
this.checkRecovery(item, status);
continue
}

if(new Date(status.lastHeartbeatTime).getTime() > (new Date().getTime()) - config.get('interval') - 15000 ) {
this.checkRecovery(item, status);
continue
}

this.emit('message', {
fallback: `Node ${item.metadata.name} condition ${status.type} entered status ${status.status} (${status.message})`,
color: 'danger',
title: `${status.type} on ${item.metadata.name} is ${status.status}`,
text: `Node ${status.type} has reason *${status.reason}*\n\`\`\`${
status.message
}\`\`\`\nLast seen ${status.lastHeartbeatTime}\nLast transition ${status.lastTransitionTime}`,
mrkdwn_in: ['text'],
_key: item.metadata.name + status.type
});
this.alerted[item.metadata.name + status.type] = status;
}
}
}

checkRecovery(item, status) {
if(this.alerted[item.metadata.name + status.type]) {
delete this.alerted[item.metadata.name + status.type]
this.emit('message', {
fallback: `Node ${item.metadata.name} condition ${status.type} entered status ${status.status} (${status.message})`,
color: 'good',
title: `${status.type} on ${item.metadata.name} is ${status.status}`,
text: `Node ${status.type} has reason *${status.reason}*\n\`\`\`${
status.message
}\`\`\`\nLast seen ${status.lastHeartbeatTime}\nLast transition ${status.lastTransitionTime}`,
mrkdwn_in: ['text'],
_key: item.metadata.name + status.type + "recovered"
});
}
}
}

module.exports = () => new NodeStatus().start();
47 changes: 36 additions & 11 deletions src/monitors/waitingpods.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class PodStatus extends EventEmitter {
constructor() {
super();
this.blacklistReason = ['ContainerCreating', 'PodInitializing'];
this.alerted = {};
}

start() {
Expand All @@ -20,7 +21,7 @@ class PodStatus extends EventEmitter {
let containers = await kube.getContainerStatuses();

for (let item of containers) {
let messageProps = {};
this.messageProps = {};
let annotations = item.pod.metadata.annotations;
if (annotations) {
// Ignore pod if the annotation is set and evaluates to true
Expand All @@ -29,17 +30,10 @@ class PodStatus extends EventEmitter {
}

if (annotations['kube-slack/slack-channel']) {
messageProps['channel'] = annotations['kube-slack/slack-channel'];
this.messageProps['channel'] = annotations['kube-slack/slack-channel'];
}
}

if (!item.state.waiting) {
continue;
}
if (this.blacklistReason.includes(item.state.waiting.reason)) {
continue;
}

let key = item.pod.metadata.name;

if (
Expand All @@ -48,6 +42,16 @@ class PodStatus extends EventEmitter {
) {
key = item.pod.metadata.ownerReferences[0].name;
}
this.messageProps._key = key;

if (!item.state.waiting) {
this.checkRecovery(item)
continue;
}
if (this.blacklistReason.includes(item.state.waiting.reason)) {
this.checkRecovery(item)
continue;
}

this.emit('message', {
fallback: `Container ${item.pod.metadata.namespace}/${
Expand All @@ -63,9 +67,30 @@ class PodStatus extends EventEmitter {
item.state.waiting.message
}\`\`\``,
mrkdwn_in: ['text'],
_key: key,
...messageProps,
...this.messageProps
});
this.alerted[item.name] = item;
}
}

checkRecovery(item) {
if(this.alerted[item.name] && item.ready && this.alerted[item.name].restartCount == item.restartCount) {
delete this.alerted[item.name]
this.emit('message', {
fallback: `Container ${item.pod.metadata.namespace}/${
item.pod.metadata.name
}/${item.name} ready`,
color: 'good',
title: `${item.pod.metadata.namespace}/${item.pod.metadata.name}/${
item.name
}`,
text: `Container entered status *${item.pod.status.phase}*\n${item.restartCount} restart${item.restartCount == 1 ? '' : 's'}`,
mrkdwn_in: ['text'],
...this.messageProps,
_key: this.messageProps._key + "recovery"
});
} else if(this.alerted[item.name]) {
this.alerted[item.name] = item;
}
}
}
Expand Down