mirror of
https://github.com/argoproj/argo-cd.git
synced 2026-02-20 01:28:45 +01:00
feat: Add health checks for Coralogix (#23853)
Signed-off-by: Daniel Leinov <daniellei@jfrog.com> Co-authored-by: Pasha Kostohrys <pasha.kostohrys@gmail.com>
This commit is contained in:
21
resource_customizations/coralogix.com/Alert/health.lua
Normal file
21
resource_customizations/coralogix.com/Alert/health.lua
Normal file
@@ -0,0 +1,21 @@
|
||||
hs = {}
|
||||
|
||||
hs.status = "Progressing"
|
||||
hs.message = "Waiting for status to be updated"
|
||||
|
||||
if obj.status ~= nil and obj.status.conditions ~= nil then
|
||||
for i, condition in ipairs(obj.status.conditions) do
|
||||
if condition.type == "RemoteSynced" then
|
||||
if condition.status == "True" then
|
||||
hs.status = "Healthy"
|
||||
hs.message = "Resource is ready"
|
||||
return hs
|
||||
elseif condition.status == "False" then
|
||||
hs.status = "Degraded"
|
||||
hs.message = condition.message
|
||||
return hs
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
return hs
|
||||
16
resource_customizations/coralogix.com/Alert/health_test.yaml
Normal file
16
resource_customizations/coralogix.com/Alert/health_test.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
tests:
|
||||
- healthStatus:
|
||||
status: Degraded
|
||||
message: "error on extracting alert properties: failed to expand notification
|
||||
group: failed to expand webhooks settings: failed to expand webhook
|
||||
setting: failed to expand integration: failed to convert name to
|
||||
integration ID: webhook critical-alerts-webhook not found"
|
||||
inputPath: testdata/degraded_alert.yaml
|
||||
- healthStatus:
|
||||
status: Progressing
|
||||
message: "Waiting for status to be updated"
|
||||
inputPath: testdata/progressing_alert.yaml
|
||||
- healthStatus:
|
||||
status: Healthy
|
||||
message: "Resource is ready"
|
||||
inputPath: testdata/healthy_alert.yaml
|
||||
58
resource_customizations/coralogix.com/Alert/testdata/degraded_alert.yaml
vendored
Normal file
58
resource_customizations/coralogix.com/Alert/testdata/degraded_alert.yaml
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
apiVersion: coralogix.com/v1beta1
|
||||
kind: Alert
|
||||
metadata:
|
||||
name: bitbucketcontainernotrunning-test
|
||||
spec:
|
||||
alertType:
|
||||
metricThreshold:
|
||||
metricFilter:
|
||||
promql: >-
|
||||
sum({namespace="bitbucket",pod=~"bitbucket-k8s-.*",condition="false"}) by (pod)
|
||||
missingValues:
|
||||
replaceWithZero: true
|
||||
rules:
|
||||
- condition:
|
||||
conditionType: moreThan
|
||||
forOverPct: 100
|
||||
ofTheLast:
|
||||
specificValue: 5m
|
||||
threshold: 0
|
||||
override:
|
||||
priority: p1
|
||||
description: >-
|
||||
Bitbucket one of the container is not running
|
||||
entityLabels:
|
||||
app: bitbucket
|
||||
name: Bitbucketcontainernotrunning-test
|
||||
notificationGroup:
|
||||
groupByKeys:
|
||||
- pod
|
||||
webhooks:
|
||||
- integration:
|
||||
integrationRef:
|
||||
backendRef:
|
||||
name: opsgenie-example
|
||||
notifyOn: triggeredAndResolved
|
||||
retriggeringPeriod:
|
||||
minutes: 60
|
||||
- integration:
|
||||
integrationRef:
|
||||
backendRef:
|
||||
name: critical-alerts-webhook
|
||||
notifyOn: triggeredAndResolved
|
||||
retriggeringPeriod:
|
||||
minutes: 60
|
||||
priority: p1
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: '2025-07-17T07:39:54Z'
|
||||
message: >-
|
||||
error on extracting alert properties: failed to expand notification
|
||||
group: failed to expand webhooks settings: failed to expand webhook
|
||||
setting: failed to expand integration: failed to convert name to
|
||||
integration ID: webhook critical-alerts-webhook not found
|
||||
observedGeneration: 1
|
||||
reason: RemoteCreationFailed
|
||||
status: 'False'
|
||||
type: RemoteSynced
|
||||
|
||||
53
resource_customizations/coralogix.com/Alert/testdata/healthy_alert.yaml
vendored
Normal file
53
resource_customizations/coralogix.com/Alert/testdata/healthy_alert.yaml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
apiVersion: coralogix.com/v1beta1
|
||||
kind: Alert
|
||||
metadata:
|
||||
name: bitbucketcontainernotrunning-test
|
||||
spec:
|
||||
alertType:
|
||||
metricThreshold:
|
||||
metricFilter:
|
||||
promql: >-
|
||||
sum({namespace="bitbucket",pod=~"bitbucket-k8s-.*",condition="false"}) by (pod)
|
||||
missingValues:
|
||||
replaceWithZero: true
|
||||
rules:
|
||||
- condition:
|
||||
conditionType: moreThan
|
||||
forOverPct: 100
|
||||
ofTheLast:
|
||||
specificValue: 5m
|
||||
threshold: 0
|
||||
override:
|
||||
priority: p1
|
||||
description: >-
|
||||
Bitbucket one of the container is not running
|
||||
entityLabels:
|
||||
app: bitbucket
|
||||
name: Bitbucketcontainernotrunning-test
|
||||
notificationGroup:
|
||||
groupByKeys:
|
||||
- pod
|
||||
webhooks:
|
||||
- integration:
|
||||
integrationRef:
|
||||
backendRef:
|
||||
name: opsgenie-example
|
||||
notifyOn: triggeredAndResolved
|
||||
retriggeringPeriod:
|
||||
minutes: 60
|
||||
- integration:
|
||||
integrationRef:
|
||||
backendRef:
|
||||
name: critical-alerts-webhook
|
||||
notifyOn: triggeredAndResolved
|
||||
retriggeringPeriod:
|
||||
minutes: 60
|
||||
priority: p1
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: '2025-07-17T07:39:55Z'
|
||||
message: Remote resource synced
|
||||
observedGeneration: 3
|
||||
reason: RemoteSyncedSuccessfully
|
||||
status: 'True'
|
||||
type: RemoteSynced
|
||||
46
resource_customizations/coralogix.com/Alert/testdata/progressing_alert.yaml
vendored
Normal file
46
resource_customizations/coralogix.com/Alert/testdata/progressing_alert.yaml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
apiVersion: coralogix.com/v1beta1
|
||||
kind: Alert
|
||||
metadata:
|
||||
name: bitbucketcontainernotrunning-test
|
||||
spec:
|
||||
alertType:
|
||||
metricThreshold:
|
||||
metricFilter:
|
||||
promql: >-
|
||||
sum({namespace="bitbucket",pod=~"bitbucket-k8s-.*",condition="false"}) by (pod)
|
||||
missingValues:
|
||||
replaceWithZero: true
|
||||
rules:
|
||||
- condition:
|
||||
conditionType: moreThan
|
||||
forOverPct: 100
|
||||
ofTheLast:
|
||||
specificValue: 5m
|
||||
threshold: 0
|
||||
override:
|
||||
priority: p1
|
||||
description: >-
|
||||
Bitbucket one of the container is not running
|
||||
entityLabels:
|
||||
app: bitbucket
|
||||
name: Bitbucketcontainernotrunning-test
|
||||
notificationGroup:
|
||||
groupByKeys:
|
||||
- pod
|
||||
webhooks:
|
||||
- integration:
|
||||
integrationRef:
|
||||
backendRef:
|
||||
name: opsgenie-example
|
||||
notifyOn: triggeredAndResolved
|
||||
retriggeringPeriod:
|
||||
minutes: 60
|
||||
- integration:
|
||||
integrationRef:
|
||||
backendRef:
|
||||
name: critical-alerts-webhook
|
||||
notifyOn: triggeredAndResolved
|
||||
retriggeringPeriod:
|
||||
minutes: 60
|
||||
priority: p1
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
hs = {}
|
||||
|
||||
hs.status = "Progressing"
|
||||
hs.message = "Waiting for status to be updated"
|
||||
|
||||
if obj.status ~= nil and obj.status.conditions ~= nil then
|
||||
for i, condition in ipairs(obj.status.conditions) do
|
||||
if condition.type == "RemoteSynced" then
|
||||
if condition.status == "True" then
|
||||
hs.status = "Healthy"
|
||||
hs.message = "Resource is ready"
|
||||
return hs
|
||||
elseif condition.status == "False" then
|
||||
hs.status = "Degraded"
|
||||
hs.message = condition.message
|
||||
return hs
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
return hs
|
||||
@@ -0,0 +1,41 @@
|
||||
tests:
|
||||
- healthStatus:
|
||||
status: Degraded
|
||||
message: >-
|
||||
error on creating remote recordingRuleGroupSet: SDK API error from /com.coralogixapis.metrics_rule_manager.v1.RuleGroupSets/Create for feature group recording-rules: rpc error: code = InvalidArgument desc = {
|
||||
"groups": {
|
||||
"0": {
|
||||
"rules": {
|
||||
"0": {
|
||||
"record": [
|
||||
{
|
||||
"code": "length",
|
||||
"message": null,
|
||||
"params": {
|
||||
"value": "",
|
||||
"min": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"code": "invalid_promql",
|
||||
"message": "SingleExpr: unexpected token ; want \"\"(\", \"{\", \"-\", \"+\"\"",
|
||||
"params": {
|
||||
"value": ""
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inputPath: testdata/degraded_recording_rule.yaml
|
||||
- healthStatus:
|
||||
status: Progressing
|
||||
message: "Waiting for status to be updated"
|
||||
inputPath: testdata/progressing_recording_rule.yaml
|
||||
- healthStatus:
|
||||
status: Healthy
|
||||
message: "Resource is ready"
|
||||
inputPath: testdata/healthy_recording_rule.yaml
|
||||
@@ -0,0 +1,47 @@
|
||||
apiVersion: coralogix.com/v1alpha1
|
||||
kind: RecordingRuleGroupSet
|
||||
metadata:
|
||||
name: rules
|
||||
spec:
|
||||
groups:
|
||||
- name: k8s_rules
|
||||
rules:
|
||||
- expr: >-
|
||||
sum(rate(container_cpu_usage_seconds_total{job="kubelet",
|
||||
metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]))
|
||||
by (namespace)
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: "2025-07-17T14:41:18Z"
|
||||
message: |-
|
||||
error on creating remote recordingRuleGroupSet: SDK API error from /com.coralogixapis.metrics_rule_manager.v1.RuleGroupSets/Create for feature group recording-rules: rpc error: code = InvalidArgument desc = {
|
||||
"groups": {
|
||||
"0": {
|
||||
"rules": {
|
||||
"0": {
|
||||
"record": [
|
||||
{
|
||||
"code": "length",
|
||||
"message": null,
|
||||
"params": {
|
||||
"value": "",
|
||||
"min": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"code": "invalid_promql",
|
||||
"message": "SingleExpr: unexpected token ; want \"\"(\", \"{\", \"-\", \"+\"\"",
|
||||
"params": {
|
||||
"value": ""
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
observedGeneration: 1
|
||||
reason: RemoteCreationFailed
|
||||
status: "False"
|
||||
type: RemoteSynced
|
||||
21
resource_customizations/coralogix.com/RecordingRuleGroupSet/testdata/healthy_recording_rule.yaml
vendored
Normal file
21
resource_customizations/coralogix.com/RecordingRuleGroupSet/testdata/healthy_recording_rule.yaml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
apiVersion: coralogix.com/v1alpha1
|
||||
kind: RecordingRuleGroupSet
|
||||
metadata:
|
||||
name: rules
|
||||
spec:
|
||||
groups:
|
||||
- name: k8s_rules
|
||||
rules:
|
||||
- expr: >-
|
||||
sum(rate(container_cpu_usage_seconds_total{job="kubelet",
|
||||
metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]))
|
||||
by (namespace)
|
||||
record: 'namespace:container_cpu_usage_seconds_total:sum_rate'
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: '2025-05-27T08:49:26Z'
|
||||
message: Remote resource synced
|
||||
observedGeneration: 3
|
||||
reason: RemoteSyncedSuccessfully
|
||||
status: 'True'
|
||||
type: RemoteSynced
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: coralogix.com/v1alpha1
|
||||
kind: RecordingRuleGroupSet
|
||||
metadata:
|
||||
name: rules
|
||||
spec:
|
||||
groups:
|
||||
- name: k8s_rules
|
||||
rules:
|
||||
- expr: >-
|
||||
sum(rate(container_cpu_usage_seconds_total{job="kubelet",
|
||||
metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]))
|
||||
by (namespace)
|
||||
record: 'namespace:container_cpu_usage_seconds_total:sum_rate'
|
||||
Reference in New Issue
Block a user