feat(health): add crossplane and upbound health checks (#21479) (#22919)

Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com>
Signed-off-by: Alexandre Gaudreault <alexandre_gaudreault@intuit.com>
Co-authored-by: Alexandre Gaudreault <alexandre_gaudreault@intuit.com>
This commit is contained in:
Michael Crenshaw
2025-05-22 08:38:40 -04:00
committed by GitHub
parent 722da4e70f
commit 9149021b2c
10 changed files with 316 additions and 21 deletions

View File

@@ -168,7 +168,33 @@ To test the implemented custom health checks, run `go test -v ./util/lua/`.
The [PR#1139](https://github.com/argoproj/argo-cd/pull/1139) is an example of Cert Manager CRDs custom health check.
Please note that bundled health checks with wildcards are not supported.
#### Wildcard Support for Built-in Health Checks
You can use a single health check for multiple resources by using a wildcard in the group or kind directory names.
The `_` character behaves like a `*` wildcard. For example, consider the following directory structure:
```
argo-cd
|-- resource_customizations
| |-- _.group.io # CRD group
| | |-- _ # Resource kind
| | | |-- health.lua # Health check
```
Any resource with a group that ends with `.group.io` will use the health check in `health.lua`.
Wildcard checks are only evaluated if there is no specific check for the resource.
If multiple wildcard checks match, the first one in the directory structure is used.
We use the [doublestar](https://github.com/bmatcuk/doublestar) glob library to match the wildcard checks. We currently
only treat a path as a wildcard if it contains a `_` character, but this may change in the future.
!!!important "Avoid Massive Scripts"
Avoid writing massive scripts to handle multiple resources. They'll get hard to read and maintain. Instead, just
duplicate the relevant parts in resource-specific scripts.
## Overriding Go-Based Health Checks

View File

@@ -0,0 +1,66 @@
-- Health check copied from here: https://github.com/crossplane/docs/blob/bd701357e9d5eecf529a0b42f23a78850a6d1d87/content/master/guides/crossplane-with-argo-cd.md
health_status = {
status = "Progressing",
message = "Provisioning ..."
}
local function contains (table, val)
for i, v in ipairs(table) do
if v == val then
return true
end
end
return false
end
local has_no_status = {
"Composition",
"CompositionRevision",
"DeploymentRuntimeConfig",
"ControllerConfig",
"ProviderConfig",
"ProviderConfigUsage"
}
if obj.status == nil or next(obj.status) == nil and contains(has_no_status, obj.kind) then
health_status.status = "Healthy"
health_status.message = "Resource is up-to-date."
return health_status
end
if obj.status == nil or next(obj.status) == nil or obj.status.conditions == nil then
if obj.kind == "ProviderConfig" and obj.status.users ~= nil then
health_status.status = "Healthy"
health_status.message = "Resource is in use."
return health_status
end
return health_status
end
for i, condition in ipairs(obj.status.conditions) do
if condition.type == "LastAsyncOperation" then
if condition.status == "False" then
health_status.status = "Degraded"
health_status.message = condition.message
return health_status
end
end
if condition.type == "Synced" then
if condition.status == "False" then
health_status.status = "Degraded"
health_status.message = condition.message
return health_status
end
end
if contains({"Ready", "Healthy", "Offered", "Established"}, condition.type) then
if condition.status == "True" then
health_status.status = "Healthy"
health_status.message = "Resource is up-to-date."
return health_status
end
end
end
return health_status

View File

@@ -0,0 +1,5 @@
tests:
- healthStatus:
status: Healthy
message: "Resource is up-to-date."
inputPath: testdata/composition_healthy.yaml

View File

@@ -0,0 +1,25 @@
# Taken from here May 9, 2025: https://docs.crossplane.io/latest/concepts/compositions/
apiVersion: apiextensions.crossplane.io/v1
kind: Composition
metadata:
name: example
spec:
compositeTypeRef:
apiVersion: custom-api.example.org/v1alpha1
kind: AcmeBucket
mode: Pipeline
pipeline:
- step: patch-and-transform
functionRef:
name: function-patch-and-transform
input:
apiVersion: pt.fn.crossplane.io/v1beta1
kind: Resources
resources:
- name: storage-bucket
base:
apiVersion: s3.aws.upbound.io/v1beta1
kind: Bucket
spec:
forProvider:
region: "us-east-2"

View File

@@ -0,0 +1,63 @@
-- Health check copied from here: https://github.com/crossplane/docs/blob/bd701357e9d5eecf529a0b42f23a78850a6d1d87/content/master/guides/crossplane-with-argo-cd.md
health_status = {
status = "Progressing",
message = "Provisioning ..."
}
local function contains (table, val)
for i, v in ipairs(table) do
if v == val then
return true
end
end
return false
end
local has_no_status = {
"ProviderConfig",
"ProviderConfigUsage"
}
if obj.status == nil or next(obj.status) == nil and contains(has_no_status, obj.kind) then
health_status.status = "Healthy"
health_status.message = "Resource is up-to-date."
return health_status
end
if obj.status == nil or next(obj.status) == nil or obj.status.conditions == nil then
if obj.kind == "ProviderConfig" and obj.status.users ~= nil then
health_status.status = "Healthy"
health_status.message = "Resource is in use."
return health_status
end
return health_status
end
for i, condition in ipairs(obj.status.conditions) do
if condition.type == "LastAsyncOperation" then
if condition.status == "False" then
health_status.status = "Degraded"
health_status.message = condition.message
return health_status
end
end
if condition.type == "Synced" then
if condition.status == "False" then
health_status.status = "Degraded"
health_status.message = condition.message
return health_status
end
end
if condition.type == "Ready" then
if condition.status == "True" then
health_status.status = "Healthy"
health_status.message = "Resource is up-to-date."
return health_status
end
end
end
return health_status

View File

@@ -0,0 +1,5 @@
tests:
- healthStatus:
status: Healthy
message: "Resource is up-to-date."
inputPath: testdata/providerconfig_healthy.yaml

View File

@@ -0,0 +1,10 @@
apiVersion: aws.upbound.io/v1beta1
kind: ProviderConfig
metadata:
name: irsa-with-role-chaining
spec:
credentials:
source: IRSA
assumeRoleChain:
- roleARN: <roleARN-1>
- roleARN: <roleARN-2>

View File

@@ -6,5 +6,5 @@ import (
// Embedded contains embedded resource customization
//
//go:embed *
//go:embed all:*
var Embedded embed.FS

View File

@@ -6,12 +6,17 @@ import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"reflect"
"slices"
"strings"
"sync"
"time"
"github.com/argoproj/gitops-engine/pkg/health"
glob "github.com/bmatcuk/doublestar/v4"
lua "github.com/yuin/gopher-lua"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -20,7 +25,7 @@ import (
applicationpkg "github.com/argoproj/argo-cd/v3/pkg/apiclient/application"
appv1 "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1"
"github.com/argoproj/argo-cd/v3/resource_customizations"
"github.com/argoproj/argo-cd/v3/util/glob"
argoglob "github.com/argoproj/argo-cd/v3/util/glob"
)
const (
@@ -31,15 +36,8 @@ const (
actionDiscoveryScriptFile = "discovery.lua"
)
// ScriptDoesNotExistError is an error type for when a built-in script does not exist.
type ScriptDoesNotExistError struct {
// ScriptName is the name of the script that does not exist.
ScriptName string
}
func (e ScriptDoesNotExistError) Error() string {
return fmt.Sprintf("built-in script %q does not exist", e.ScriptName)
}
// errScriptDoesNotExist is an error type for when a built-in script does not exist.
var errScriptDoesNotExist = errors.New("built-in script does not exist")
type ResourceHealthOverrides map[string]appv1.ResourceOverride
@@ -187,8 +185,16 @@ func (vm VM) GetHealthScript(obj *unstructured.Unstructured) (script string, use
// (as built-in scripts are files in folders, named after the GVK, currently there is no wildcard support for them)
builtInScript, err := vm.getPredefinedLuaScripts(key, healthScriptFile)
if err != nil {
var doesNotExist *ScriptDoesNotExistError
if errors.As(err, &doesNotExist) {
if errors.Is(err, errScriptDoesNotExist) {
// Try to find a wildcard built-in health script
builtInScript, err = getWildcardBuiltInHealthOverrideLua(key)
if err != nil {
return "", false, fmt.Errorf("error while fetching built-in health script: %w", err)
}
if builtInScript != "" {
return builtInScript, true, nil
}
// It's okay if no built-in health script exists. Just return an empty string and let the caller handle it.
return "", false, nil
}
@@ -422,8 +428,7 @@ func (vm VM) GetResourceActionDiscovery(obj *unstructured.Unstructured) ([]strin
discoveryKey := key + "/actions/"
discoveryScript, err := vm.getPredefinedLuaScripts(discoveryKey, actionDiscoveryScriptFile)
if err != nil {
var doesNotExistErr *ScriptDoesNotExistError
if errors.As(err, &doesNotExistErr) {
if errors.Is(err, errScriptDoesNotExist) {
// No worries, just return what we have.
return discoveryScripts, nil
}
@@ -477,7 +482,7 @@ func getWildcardHealthOverrideLua(overrides map[string]appv1.ResourceOverride, g
gvkKeyToMatch := GetConfigMapKey(gvk)
for key, override := range overrides {
if glob.Match(key, gvkKeyToMatch) && override.HealthLua != "" {
if argoglob.Match(key, gvkKeyToMatch) && override.HealthLua != "" {
return override.HealthLua, override.UseOpenLibs
}
}
@@ -488,13 +493,95 @@ func (vm VM) getPredefinedLuaScripts(objKey string, scriptFile string) (string,
data, err := resource_customizations.Embedded.ReadFile(filepath.Join(objKey, scriptFile))
if err != nil {
if os.IsNotExist(err) {
return "", &ScriptDoesNotExistError{ScriptName: objKey}
return "", errScriptDoesNotExist
}
return "", err
}
return string(data), nil
}
// globHealthScriptPathsOnce is a sync.Once instance to ensure that the globHealthScriptPaths are only initialized once.
// The globs come from an embedded filesystem, so it won't change at runtime.
var globHealthScriptPathsOnce sync.Once
// globHealthScriptPaths is a cache for the glob patterns of directories containing health.lua files. Don't use this
// directly, use getGlobHealthScriptPaths() instead.
var globHealthScriptPaths []string
// getGlobHealthScriptPaths returns the paths of the directories containing health.lua files where the path contains a
// glob pattern. It uses a sync.Once to ensure that the paths are only initialized once.
func getGlobHealthScriptPaths() ([]string, error) {
var err error
globHealthScriptPathsOnce.Do(func() {
// Walk through the embedded filesystem and get the directory names of all directories containing a health.lua.
var patterns []string
err = fs.WalkDir(resource_customizations.Embedded, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("error walking path %q: %w", path, err)
}
// Skip non-directories at the top level
if d.IsDir() && filepath.Dir(path) == "." {
return nil
}
// Check if the directory contains a health.lua file
if filepath.Base(path) != healthScriptFile {
return nil
}
groupKindPath := filepath.Dir(path)
// Check if the path contains a wildcard. If it doesn't, skip it.
if !strings.Contains(groupKindPath, "_") {
return nil
}
pattern := strings.ReplaceAll(groupKindPath, "_", "*")
// Check that the pattern is valid.
if !glob.ValidatePattern(pattern) {
return fmt.Errorf("invalid glob pattern %q: %w", pattern, err)
}
patterns = append(patterns, groupKindPath)
return nil
})
if err != nil {
return
}
// Sort the patterns to ensure deterministic choice of wildcard directory for a given GK.
slices.Sort(patterns)
globHealthScriptPaths = patterns
})
if err != nil {
return nil, fmt.Errorf("error getting health script glob directories: %w", err)
}
return globHealthScriptPaths, nil
}
func getWildcardBuiltInHealthOverrideLua(objKey string) (string, error) {
// Check if the GVK matches any of the wildcard directories
globs, err := getGlobHealthScriptPaths()
if err != nil {
return "", fmt.Errorf("error getting health script globs: %w", err)
}
for _, g := range globs {
pattern := strings.ReplaceAll(g, "_", "*")
if !glob.PathMatchUnvalidated(pattern, objKey) {
continue
}
var script []byte
script, err = resource_customizations.Embedded.ReadFile(filepath.Join(g, healthScriptFile))
if err != nil {
return "", fmt.Errorf("error reading %q file in embedded filesystem: %w", filepath.Join(objKey, healthScriptFile), err)
}
return string(script), nil
}
return "", nil
}
func isValidHealthStatusCode(statusCode health.HealthStatusCode) bool {
switch statusCode {
case health.HealthStatusUnknown, health.HealthStatusProgressing, health.HealthStatusSuspended, health.HealthStatusHealthy, health.HealthStatusDegraded, health.HealthStatusMissing:

View File

@@ -284,8 +284,7 @@ func TestGetResourceActionNoPredefined(t *testing.T) {
testObj := StrToUnstructured(objWithNoScriptJSON)
vm := VM{}
action, err := vm.GetResourceAction(testObj, "test")
var expectedErr *ScriptDoesNotExistError
require.ErrorAs(t, err, &expectedErr)
require.ErrorIs(t, err, errScriptDoesNotExist)
assert.Empty(t, action.ActionLua)
}
@@ -868,7 +867,7 @@ return hs`
})
t.Run("Get resource health for */* override with empty health.lua", func(t *testing.T) {
testObj := StrToUnstructured(ec2AWSCrossplaneObjJSON)
testObj := StrToUnstructured(objWithNoScriptJSON)
overrides := getBaseWildcardHealthOverrides
status, err := overrides.GetResourceHealth(testObj)
require.NoError(t, err)
@@ -954,3 +953,12 @@ func createMockResource(kind string, name string, replicas int) *unstructured.Un
image: nginx
`, kind, name, replicas))
}
func Test_getHealthScriptPaths(t *testing.T) {
paths, err := getGlobHealthScriptPaths()
require.NoError(t, err)
// This test will fail any time a glob pattern is added to the health script paths. We don't expect that to happen
// often.
assert.Equal(t, []string{"_.crossplane.io/_", "_.upbound.io/_"}, paths)
}