mirror of
https://github.com/argoproj/argo-cd.git
synced 2026-02-20 01:28:45 +01:00
feat: update health checks for Numaflow resources (#25698)
Signed-off-by: Dillen Padhiar <dillen_padhiar@intuit.com>
This commit is contained in:
@@ -6,7 +6,7 @@ actions["force-promote"] = {
|
||||
|
||||
-- force-promote
|
||||
local forcePromote = false
|
||||
if (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "in-progress") then
|
||||
if obj.metadata.labels ~= nil and (obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "in-progress" or obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "trial") then
|
||||
forcePromote = true
|
||||
end
|
||||
if (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/force-promote"] == "true") then
|
||||
|
||||
@@ -11,11 +11,14 @@ if obj.status ~= nil then
|
||||
end
|
||||
end
|
||||
|
||||
progressiveFailure = (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/progressive-result-state"] == "failed")
|
||||
if obj.metadata.generation == obj.status.observedGeneration then
|
||||
if (healthy ~= {} and healthy.status == "False") or obj.status.phase == "Failed" then
|
||||
if (healthy ~= {} and healthy.status == "False") or obj.status.phase == "Failed" or progressiveFailure then
|
||||
hs.status = "Degraded"
|
||||
if obj.status.phase == "Failed" then
|
||||
hs.message = obj.status.message
|
||||
elseif progressiveFailure then
|
||||
hs.message = "Failed progressive upgrade"
|
||||
else
|
||||
hs.message = healthy.message
|
||||
end
|
||||
|
||||
@@ -10,4 +10,8 @@ tests:
|
||||
- healthStatus:
|
||||
status: Degraded
|
||||
message: "Waiting for 3 pods to be ready...\n"
|
||||
inputPath: testdata/degraded.yaml
|
||||
inputPath: testdata/degraded.yaml
|
||||
- healthStatus:
|
||||
status: Degraded
|
||||
message: "Failed progressive upgrade"
|
||||
inputPath: testdata/degraded-progressive.yaml
|
||||
@@ -0,0 +1,80 @@
|
||||
apiVersion: numaflow.numaproj.io/v1alpha1
|
||||
kind: InterStepBufferService
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/last-applied-configuration: |
|
||||
{"apiVersion":"numaflow.numaproj.io/v1alpha1","kind":"InterStepBufferService","metadata":{"annotations":{},"name":"default","namespace":"numaflow-system"},"spec":{"jetstream":{"persistence":{"volumeSize":"3Gi"},"version":"latest"}}}
|
||||
creationTimestamp: "2024-10-08T18:21:09Z"
|
||||
finalizers:
|
||||
- isbsvc-controller
|
||||
generation: 1
|
||||
name: default
|
||||
namespace: numaflow-system
|
||||
resourceVersion: "357862"
|
||||
uid: e175db66-3918-4ef8-993d-12b37eb9a964
|
||||
labels:
|
||||
numaplane.numaproj.io/progressive-result-state: "failed"
|
||||
spec:
|
||||
jetstream:
|
||||
persistence:
|
||||
volumeSize: 3Gi
|
||||
replicas: 3
|
||||
version: latest
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: "2024-10-08T18:21:53Z"
|
||||
message: |
|
||||
partitioned roll out complete: 3 new pods have been updated...
|
||||
reason: Healthy
|
||||
status: "True"
|
||||
type: ChildrenResourcesHealthy
|
||||
- lastTransitionTime: "2024-10-08T18:21:53Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: Configured
|
||||
- lastTransitionTime: "2024-10-08T18:21:53Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: Deployed
|
||||
config:
|
||||
jetstream:
|
||||
auth:
|
||||
basic:
|
||||
password:
|
||||
key: client-auth-password
|
||||
name: isbsvc-default-js-client-auth
|
||||
user:
|
||||
key: client-auth-user
|
||||
name: isbsvc-default-js-client-auth
|
||||
streamConfig: |
|
||||
consumer:
|
||||
ackwait: 60s
|
||||
maxackpending: 25000
|
||||
otbucket:
|
||||
history: 1
|
||||
maxbytes: 0
|
||||
maxvaluesize: 0
|
||||
replicas: 3
|
||||
storage: 0
|
||||
ttl: 3h
|
||||
procbucket:
|
||||
history: 1
|
||||
maxbytes: 0
|
||||
maxvaluesize: 0
|
||||
replicas: 3
|
||||
storage: 0
|
||||
ttl: 72h
|
||||
stream:
|
||||
duplicates: 60s
|
||||
maxage: 72h
|
||||
maxbytes: -1
|
||||
maxmsgs: 100000
|
||||
replicas: 3
|
||||
retention: 0
|
||||
storage: 0
|
||||
url: nats://isbsvc-default-js-svc.numaflow-system.svc:4222
|
||||
observedGeneration: 1
|
||||
phase: Running
|
||||
type: jetstream
|
||||
@@ -45,7 +45,7 @@ end
|
||||
|
||||
-- force-promote
|
||||
local forcePromote = false
|
||||
if (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "in-progress") then
|
||||
if obj.metadata.labels ~= nil and (obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "in-progress" or obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "trial") then
|
||||
forcePromote = true
|
||||
end
|
||||
if (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/force-promote"] == "true") then
|
||||
|
||||
@@ -10,11 +10,14 @@ if obj.status ~= nil then
|
||||
end
|
||||
end
|
||||
|
||||
progressiveFailure = (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/progressive-result-state"] == "failed")
|
||||
if obj.metadata.generation == obj.status.observedGeneration then
|
||||
if (healthy ~= {} and healthy.status == "False") or obj.status.phase == "Failed" then
|
||||
if (healthy ~= {} and healthy.status == "False") or (obj.status.phase == "Failed") or progressiveFailure then
|
||||
hs.status = "Degraded"
|
||||
if obj.status.phase == "Failed" then
|
||||
hs.message = obj.status.message
|
||||
elseif progressiveFailure then
|
||||
hs.message = "Failed progressive upgrade"
|
||||
else
|
||||
hs.message = "Subresources are unhealthy"
|
||||
end
|
||||
|
||||
@@ -14,4 +14,8 @@ tests:
|
||||
- healthStatus:
|
||||
status: Healthy
|
||||
message: "MonoVertex is paused"
|
||||
inputPath: testdata/paused.yaml
|
||||
inputPath: testdata/paused.yaml
|
||||
- healthStatus:
|
||||
status: Degraded
|
||||
message: "Failed progressive upgrade"
|
||||
inputPath: testdata/degraded-progressive.yaml
|
||||
62
resource_customizations/numaflow.numaproj.io/MonoVertex/testdata/degraded-progressive.yaml
vendored
Normal file
62
resource_customizations/numaflow.numaproj.io/MonoVertex/testdata/degraded-progressive.yaml
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
apiVersion: numaflow.numaproj.io/v1alpha1
|
||||
kind: MonoVertex
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/last-applied-configuration: |
|
||||
{"apiVersion":"numaflow.numaproj.io/v1alpha1","kind":"MonoVertex","metadata":{"annotations":{},"name":"simple-mono-vertex","namespace":"numaflow-system"},"spec":{"sink":{"udsink":{"container":{"image":"quay.io/numaio/numaflow-java/simple-sink:stable"}}},"source":{"transformer":{"container":{"image":"quay.io/numaio/numaflow-rs/source-transformer-now:stable"}},"udsource":{"container":{"image":"quay.io/numaio/numaflow-java/source-simple-source:stable"}}}}}
|
||||
creationTimestamp: "2024-10-08T20:34:32Z"
|
||||
generation: 1
|
||||
name: simple-mono-vertex
|
||||
namespace: numaflow-system
|
||||
resourceVersion: "367420"
|
||||
uid: 7bc9291a-9c80-4ec1-8b06-46fac8f7e507
|
||||
labels:
|
||||
numaplane.numaproj.io/progressive-result-state: "failed"
|
||||
spec:
|
||||
lifecycle:
|
||||
desiredPhase: Running
|
||||
replicas: 1
|
||||
sink:
|
||||
udsink:
|
||||
container:
|
||||
image: quay.io/numaio/numaflow-java/simple-sink:stable
|
||||
source:
|
||||
transformer:
|
||||
container:
|
||||
image: quay.io/numaio/numaflow-rs/source-transformer-now:stable
|
||||
udsource:
|
||||
container:
|
||||
image: quay.io/numaio/numaflow-java/source-simple-source:stable
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 25%
|
||||
type: RollingUpdate
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: "2024-10-08T20:34:36Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: DaemonHealthy
|
||||
- lastTransitionTime: "2024-10-08T20:34:32Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: Deployed
|
||||
- lastTransitionTime: "2024-10-08T21:58:03Z"
|
||||
message: All pods are healthy
|
||||
reason: Running
|
||||
status: "True"
|
||||
type: PodsHealthy
|
||||
currentHash: 8ed34d9058faa60997ee13083ccb3d80691df37b45a34eaa347af99f237e8df6
|
||||
desiredReplicas: 1
|
||||
lastScaledAt: "2024-10-08T20:34:32Z"
|
||||
lastUpdated: "2024-10-08T21:58:13Z"
|
||||
observedGeneration: 1
|
||||
phase: Running
|
||||
readyReplicas: 1
|
||||
replicas: 1
|
||||
selector: app.kubernetes.io/component=mono-vertex,numaflow.numaproj.io/mono-vertex-name=simple-mono-vertex
|
||||
updateHash: 8ed34d9058faa60997ee13083ccb3d80691df37b45a34eaa347af99f237e8df6
|
||||
updatedReadyReplicas: 1
|
||||
updatedReplicas: 1
|
||||
@@ -45,7 +45,7 @@ end
|
||||
|
||||
-- force-promote
|
||||
local forcePromote = false
|
||||
if (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "in-progress") then
|
||||
if obj.metadata.labels ~= nil and (obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "in-progress" or obj.metadata.labels["numaplane.numaproj.io/upgrade-state"] == "trial") then
|
||||
forcePromote = true
|
||||
end
|
||||
if (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/force-promote"] == "true") then
|
||||
|
||||
@@ -10,11 +10,14 @@ if obj.status ~= nil then
|
||||
end
|
||||
end
|
||||
|
||||
progressiveFailure = (obj.metadata.labels ~= nil and obj.metadata.labels["numaplane.numaproj.io/progressive-result-state"] == "failed")
|
||||
if obj.metadata.generation == obj.status.observedGeneration then
|
||||
if (healthy ~= {} and healthy.status == "False") or obj.status.phase == "Failed" then
|
||||
if (healthy ~= {} and healthy.status == "False") or (obj.status.phase == "Failed") or progressiveFailure then
|
||||
hs.status = "Degraded"
|
||||
if obj.status.phase == "Failed" then
|
||||
hs.message = obj.status.message
|
||||
elseif progressiveFailure then
|
||||
hs.message = "Failed progressive upgrade"
|
||||
else
|
||||
hs.message = "Subresources are unhealthy"
|
||||
end
|
||||
|
||||
@@ -14,4 +14,8 @@ tests:
|
||||
- healthStatus:
|
||||
status: Healthy
|
||||
message: "Pipeline is paused"
|
||||
inputPath: testdata/paused.yaml
|
||||
inputPath: testdata/paused.yaml
|
||||
- healthStatus:
|
||||
status: Degraded
|
||||
message: "Failed progressive upgrade"
|
||||
inputPath: testdata/degraded-progressive.yaml
|
||||
100
resource_customizations/numaflow.numaproj.io/Pipeline/testdata/degraded-progressive.yaml
vendored
Normal file
100
resource_customizations/numaflow.numaproj.io/Pipeline/testdata/degraded-progressive.yaml
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
apiVersion: numaflow.numaproj.io/v1alpha1
|
||||
kind: Pipeline
|
||||
metadata:
|
||||
creationTimestamp: "2024-10-08T18:22:18Z"
|
||||
finalizers:
|
||||
- pipeline-controller
|
||||
generation: 1
|
||||
name: simple-pipeline
|
||||
namespace: numaflow-system
|
||||
resourceVersion: "358080"
|
||||
uid: bb6cc91c-eb05-4fe7-9380-63b9532a85db
|
||||
labels:
|
||||
numaplane.numaproj.io/progressive-result-state: "failed"
|
||||
spec:
|
||||
edges:
|
||||
- from: in
|
||||
to: cat
|
||||
- from: cat
|
||||
to: out
|
||||
lifecycle:
|
||||
deleteGracePeriodSeconds: 30
|
||||
desiredPhase: Running
|
||||
pauseGracePeriodSeconds: 30
|
||||
limits:
|
||||
bufferMaxLength: 30000
|
||||
bufferUsageLimit: 80
|
||||
readBatchSize: 500
|
||||
readTimeout: 1s
|
||||
vertices:
|
||||
- name: in
|
||||
scale:
|
||||
min: 1
|
||||
source:
|
||||
generator:
|
||||
duration: 1s
|
||||
jitter: 0s
|
||||
msgSize: 8
|
||||
rpu: 5
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 25%
|
||||
type: RollingUpdate
|
||||
- name: cat
|
||||
scale:
|
||||
min: 1
|
||||
udf:
|
||||
builtin:
|
||||
name: cat
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 25%
|
||||
type: RollingUpdate
|
||||
- name: out
|
||||
scale:
|
||||
min: 1
|
||||
sink:
|
||||
log: {}
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 25%
|
||||
type: RollingUpdate
|
||||
watermark:
|
||||
disabled: false
|
||||
maxDelay: 0s
|
||||
status:
|
||||
conditions:
|
||||
- lastTransitionTime: "2024-10-08T18:22:49Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: Configured
|
||||
- lastTransitionTime: "2024-10-08T18:22:49Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: DaemonServiceHealthy
|
||||
- lastTransitionTime: "2024-10-08T18:22:49Z"
|
||||
message: Successful
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: Deployed
|
||||
- lastTransitionTime: "2024-10-08T18:22:49Z"
|
||||
message: No Side Inputs attached to the pipeline
|
||||
reason: NoSideInputs
|
||||
status: "True"
|
||||
type: SideInputsManagersHealthy
|
||||
- lastTransitionTime: "2024-10-08T18:22:49Z"
|
||||
message: All vertices are healthy
|
||||
reason: Successful
|
||||
status: "True"
|
||||
type: VerticesHealthy
|
||||
lastUpdated: "2024-10-08T18:22:49Z"
|
||||
mapUDFCount: 1
|
||||
observedGeneration: 1
|
||||
phase: Running
|
||||
reduceUDFCount: 0
|
||||
sinkCount: 1
|
||||
sourceCount: 1
|
||||
udfCount: 1
|
||||
vertexCount: 3
|
||||
Reference in New Issue
Block a user