diff --git a/applicationset/status/resource_status.go b/applicationset/status/resource_status.go index 7a2b5c7bb6..9ac1e180e2 100644 --- a/applicationset/status/resource_status.go +++ b/applicationset/status/resource_status.go @@ -7,31 +7,17 @@ import ( func BuildResourceStatus(statusMap map[string]argov1alpha1.ResourceStatus, apps []argov1alpha1.Application) map[string]argov1alpha1.ResourceStatus { appMap := map[string]argov1alpha1.Application{} for _, app := range apps { - appCopy := app appMap[app.Name] = app gvk := app.GroupVersionKind() - // Create status if it does not exist - status, ok := statusMap[app.Name] - if !ok { - status = argov1alpha1.ResourceStatus{ - Group: gvk.Group, - Version: gvk.Version, - Kind: gvk.Kind, - Name: app.Name, - Namespace: app.Namespace, - Status: app.Status.Sync.Status, - Health: &appCopy.Status.Health, - } - } - + var status argov1alpha1.ResourceStatus status.Group = gvk.Group status.Version = gvk.Version status.Kind = gvk.Kind status.Name = app.Name status.Namespace = app.Namespace status.Status = app.Status.Sync.Status - status.Health = &appCopy.Status.Health + status.Health = app.Status.Health.DeepCopy() statusMap[app.Name] = status } diff --git a/cmd/argocd-application-controller/commands/argocd_application_controller.go b/cmd/argocd-application-controller/commands/argocd_application_controller.go index 6b49a00553..c8bf21c446 100644 --- a/cmd/argocd-application-controller/commands/argocd_application_controller.go +++ b/cmd/argocd-application-controller/commands/argocd_application_controller.go @@ -280,7 +280,7 @@ func NewCommand() *cobra.Command { command.Flags().StringToStringVar(&otlpHeaders, "otlp-headers", env.ParseStringToStringFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_HEADERS", map[string]string{}, ","), "List of OpenTelemetry collector extra headers sent with traces, headers are comma-separated key-value pairs(e.g. key1=value1,key2=value2)") command.Flags().StringSliceVar(&otlpAttrs, "otlp-attrs", env.StringsFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_ATTRS", []string{}, ","), "List of OpenTelemetry collector extra attrs when send traces, each attribute is separated by a colon(e.g. key:value)") command.Flags().StringSliceVar(&applicationNamespaces, "application-namespaces", env.StringsFromEnv("ARGOCD_APPLICATION_NAMESPACES", []string{}, ","), "List of additional namespaces that applications are allowed to be reconciled from") - command.Flags().BoolVar(&persistResourceHealth, "persist-resource-health", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_PERSIST_RESOURCE_HEALTH", true), "Enables storing the managed resources health in the Application CRD") + command.Flags().BoolVar(&persistResourceHealth, "persist-resource-health", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_PERSIST_RESOURCE_HEALTH", false), "Enables storing the managed resources health in the Application CRD") command.Flags().StringVar(&shardingAlgorithm, "sharding-method", env.StringFromEnv(common.EnvControllerShardingAlgorithm, common.DefaultShardingAlgorithm), "Enables choice of sharding method. Supported sharding methods are : [legacy, round-robin, consistent-hashing] ") // global queue rate limit config command.Flags().Int64Var(&workqueueRateLimit.BucketSize, "wq-bucket-size", env.ParseInt64FromEnv("WORKQUEUE_BUCKET_SIZE", 500, 1, math.MaxInt64), "Set Workqueue Rate Limiter Bucket Size, default 500") diff --git a/docs/operator-manual/argocd-cmd-params-cm.yaml b/docs/operator-manual/argocd-cmd-params-cm.yaml index 4bb347f1ae..7ac3de4864 100644 --- a/docs/operator-manual/argocd-cmd-params-cm.yaml +++ b/docs/operator-manual/argocd-cmd-params-cm.yaml @@ -69,11 +69,10 @@ data: # Cache expiration for app state (default 1h0m0s) controller.app.state.cache.expiration: "1h0m0s" - # Specifies if resource health should be persisted in app CRD (default true) - # Changing this to `false` significantly reduce number of Application CRD updates and improves controller performance. - # However, disabling resource health by default might affect applications that communicate with Applications CRD directly - # so we have to defer switching this to `false` by default till v3.0 release. - controller.resource.health.persist: "true" + # Specifies if resource health should be persisted in the application CR (default false). + # Setting this to true will store the health status of each resource in the application CR, + # increasing the number of updates to the CR and putting more load on the application controller + controller.resource.health.persist: "false" # Cache expiration default (default 24h0m0s) controller.default.cache.expiration: "24h0m0s" # Sharding algorithm used to balance clusters across application controller shards (default "legacy") diff --git a/docs/operator-manual/server-commands/argocd-application-controller.md b/docs/operator-manual/server-commands/argocd-application-controller.md index 29e0f18dde..bb98a2038c 100644 --- a/docs/operator-manual/server-commands/argocd-application-controller.md +++ b/docs/operator-manual/server-commands/argocd-application-controller.md @@ -54,7 +54,7 @@ argocd-application-controller [flags] --otlp-headers stringToString List of OpenTelemetry collector extra headers sent with traces, headers are comma-separated key-value pairs(e.g. key1=value1,key2=value2) (default []) --otlp-insecure OpenTelemetry collector insecure mode (default true) --password string Password for basic authentication to the API server - --persist-resource-health Enables storing the managed resources health in the Application CRD (default true) + --persist-resource-health Enables storing the managed resources health in the Application CRD --proxy-url string If provided, this URL will be used to connect via proxy --redis string Redis server hostname and port (e.g. argocd-redis:6379). --redis-ca-certificate string Path to Redis server CA certificate (e.g. /etc/certs/redis/ca.crt). If not specified, system trusted CAs will be used for server certificate validation. diff --git a/docs/operator-manual/upgrading/2.14-3.0.md b/docs/operator-manual/upgrading/2.14-3.0.md index f4464e47c7..a020bd02bf 100644 --- a/docs/operator-manual/upgrading/2.14-3.0.md +++ b/docs/operator-manual/upgrading/2.14-3.0.md @@ -21,7 +21,8 @@ to allow the `update/*` or `delete/*` actions on the application to give permiss The v2 behavior can be preserved by setting the config value `server.rbac.disableApplicationFineGrainedRBACInheritance` to `false` in the Argo CD ConfigMap `argocd-cm`. -Read the [RBAC documentation](../rbac.md#fine-grained-permissions-for-updatedelete-action) for more detailed inforamtion. +Read the [RBAC documentation](../rbac.md#fine-grained-permissions-for-updatedelete-action) for more detailed +information. ### Logs RBAC enforcement as a first-class RBAC citizen @@ -55,14 +56,17 @@ labels before upgrading. ### Changes to RBAC with Dex SSO Authentication When using Dex, the `sub` claim returned in the authentication was used as the subject for RBAC. That value depends on -the Dex internal implementation and should not be considered an immutable value that represent the subject. +the Dex internal implementation and should not be considered an immutable value that represents the subject. -The new behavior will request the `federated:id` [scope](https://dexidp.io/docs/configuration/custom-scopes-claims-clients/) from Dex, and the new value used as the RBAC subject will be based +The new behavior will request the +`federated:id` [scope](https://dexidp.io/docs/configuration/custom-scopes-claims-clients/) from Dex, and the new value +used as the RBAC subject will be based on the `federated_claims.user_id` claim instead of the `sub` claim. If you were using the Dex sub claim in RBAC policies, you will need to update them to maintain the same access. -You can know the correct `user_id` to use by decoding the current `sub` claims defined in your policies. You can also configure which +You can know the correct `user_id` to use by decoding the current `sub` claims defined in your policies. You can also +configure which value is used as `user_id` for some [connectors](https://dexidp.io/docs/connectors/). ```sh @@ -190,6 +194,77 @@ Application, it will not delete the previously managed resources. It is recommended to perform any cleanup or migration to existing in-cluster Application before upgrading when in-cluster is disabled. To perform cleanup post-migration, the in-cluster will need to be enabled temporarily. +### Health status in the Application CR + +The health status of each object used to be persisted under `/status` in the Application CR by default. +Any health churn in the resources deployed by the Application put load on the application controller. +Now, the health status is stored externally. + +You can revert this behavior by setting the `controller.resource.health.persist` to `true` in the Argo CD +`argocd-cmd-params-cm.yaml` Config Map. + +Example of a status field in the Application CR persisting health: + +```yaml +status: + health: + status: Healthy + resources: + - group: apps + health: + status: Healthy + kind: Deployment + name: my-app + namespace: foo + status: OutOfSync + version: v1 + sync: + status: OutOfSync +``` + +Example of a status field in the Application CR _not_ persisting health: + +```yaml +status: + health: + status: Healthy + resourceHealthSource: appTree + resources: + - group: apps + kind: Deployment + name: my-app + namespace: foo + status: OutOfSync + version: v1 + sync: + status: OutOfSync +``` + +#### Detection + +1. Check the `argocd-cmd-params-cm.yaml` ConfigMap for `controller.resource.health.persist`. + + If the value is empty or true, the health status is persisted in the Application CR. + +```sh +kubectl get cm argocd-cmd-params-cm -n argocd -o jsonpath='{.data.controller\.resource\.health\.persist}' +``` + +2. Check any Application CR for the `resourceHealthSource` field. + If you see a blank value, the health status is persisted in the Application CR. + +```sh +kubectl get applications.argoproj.io -n argocd -o jsonpath='{.status.resourceHealthSource}' +``` + +#### Migration + +Any tools or CLI commands parsing the `.status.resources[].health` need to be updated to use the argocd cli/API to get the health status. + +```sh +argocd app get -o json +``` + ### Empty Environment Variables in Plugins In Argo CD 3.0, empty environment variables are now passed to config management plugins. diff --git a/server/application/application.go b/server/application/application.go index 761e0a770b..fe3c91cde5 100644 --- a/server/application/application.go +++ b/server/application/application.go @@ -12,6 +12,8 @@ import ( "strings" "time" + "github.com/argoproj/gitops-engine/pkg/health" + cacheutil "github.com/argoproj/argo-cd/v3/util/cache" kubecache "github.com/argoproj/gitops-engine/pkg/cache" @@ -46,6 +48,7 @@ import ( "github.com/argoproj/argo-cd/v3/reposerver/apiclient" servercache "github.com/argoproj/argo-cd/v3/server/cache" "github.com/argoproj/argo-cd/v3/server/deeplinks" + "github.com/argoproj/argo-cd/v3/util" "github.com/argoproj/argo-cd/v3/util/argo" "github.com/argoproj/argo-cd/v3/util/collections" "github.com/argoproj/argo-cd/v3/util/db" @@ -252,9 +255,15 @@ func (s *Server) getApplicationEnforceRBACClient(ctx context.Context, action, pr if !s.isNamespaceEnabled(namespaceOrDefault) { return nil, security.NamespaceNotPermittedError(namespaceOrDefault) } - return s.appclientset.ArgoprojV1alpha1().Applications(namespaceOrDefault).Get(ctx, name, metav1.GetOptions{ + app, err := s.appclientset.ArgoprojV1alpha1().Applications(namespaceOrDefault).Get(ctx, name, metav1.GetOptions{ ResourceVersion: resourceVersion, }) + if err != nil { + return nil, err + } + // Objects returned by the lister must be treated as read-only. + // To allow us to modify the app later, make a copy + return app.DeepCopy(), nil }) } @@ -371,6 +380,9 @@ func (s *Server) Create(ctx context.Context, q *application.ApplicationCreateReq if err != nil { return nil, status.Errorf(codes.Internal, "unable to check existing application details (%s): %v", appNs, err) } + // Objects returned from listers have to be treated as read-only + // Take a deep copy so we can edit it below + existing = existing.DeepCopy() if _, err := argo.GetDestinationCluster(ctx, existing.Spec.Destination, s.db); err != nil { return nil, status.Errorf(codes.InvalidArgument, "application destination spec for %s is invalid: %s", existing.Name, err.Error()) @@ -808,7 +820,7 @@ func (s *Server) Get(ctx context.Context, q *application.ApplicationQuery) (*v1a annotations = make(map[string]string) } if _, ok := annotations[v1alpha1.AnnotationKeyRefresh]; !ok { - return &event.Application, nil + return event.Application.DeepCopy(), nil } } } @@ -1214,6 +1226,7 @@ func (s *Server) Watch(q *application.ApplicationQuery, ws application.Applicati if err != nil { return fmt.Errorf("error listing apps with selector: %w", err) } + apps = util.SliceCopy(apps) sort.Slice(apps, func(i, j int) bool { return apps[i].QualifiedName() < apps[j].QualifiedName() }) @@ -2663,7 +2676,14 @@ func (s *Server) inferResourcesStatusHealth(app *v1alpha1.Application) { if err := s.cache.GetAppResourcesTree(app.Name, tree); err == nil { healthByKey := map[kube.ResourceKey]*v1alpha1.HealthStatus{} for _, node := range tree.Nodes { - healthByKey[kube.NewResourceKey(node.Group, node.Kind, node.Namespace, node.Name)] = node.Health + if node.Health != nil { + healthByKey[kube.NewResourceKey(node.Group, node.Kind, node.Namespace, node.Name)] = node.Health + } else if node.ResourceVersion == "" && node.ResourceRef.UID == "" && node.CreatedAt == nil { + healthByKey[kube.NewResourceKey(node.Group, node.Kind, node.Namespace, node.Name)] = &v1alpha1.HealthStatus{ + Status: health.HealthStatusMissing, + Message: "Resource has not been created", + } + } } for i, res := range app.Status.Resources { res.Health = healthByKey[kube.NewResourceKey(res.Group, res.Kind, res.Namespace, res.Name)] diff --git a/test/e2e/app_deletion_test.go b/test/e2e/app_deletion_test.go index caea0da09f..2f15cb850b 100644 --- a/test/e2e/app_deletion_test.go +++ b/test/e2e/app_deletion_test.go @@ -51,7 +51,7 @@ func TestDeletingAppByLabel(t *testing.T) { CreateApp("--label=foo=bar"). Sync(). Then(). - Expect(SyncStatusIs(SyncStatusCode(SyncStatusCodeSynced))). + Expect(SyncStatusIs(SyncStatusCodeSynced)). When(). IgnoreErrors(). DeleteBySelector("foo=baz"). diff --git a/test/e2e/fixture/app/consequences.go b/test/e2e/fixture/app/consequences.go index f7b1f9c988..f69f575497 100644 --- a/test/e2e/fixture/app/consequences.go +++ b/test/e2e/fixture/app/consequences.go @@ -7,10 +7,13 @@ import ( "github.com/argoproj/gitops-engine/pkg/health" log "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + applicationpkg "github.com/argoproj/argo-cd/v3/pkg/apiclient/application" . "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1" "github.com/argoproj/argo-cd/v3/test/e2e/fixture" "github.com/argoproj/argo-cd/v3/util/errors" + util "github.com/argoproj/argo-cd/v3/util/io" ) // this implements the "then" part of given/when/then @@ -112,7 +115,16 @@ func (c *Consequences) get() (*Application, error) { } func (c *Consequences) resource(kind, name, namespace string) ResourceStatus { - for _, r := range c.app().Status.Resources { + closer, client, err := fixture.ArgoCDClientset.NewApplicationClient() + errors.CheckError(err) + defer util.Close(closer) + app, err := client.Get(context.Background(), &applicationpkg.ApplicationQuery{ + Name: ptr.To(c.context.AppName()), + Projects: []string{c.context.project}, + AppNamespace: ptr.To(c.context.appNamespace), + }) + errors.CheckError(err) + for _, r := range app.Status.Resources { if r.Kind == kind && r.Name == name && (namespace == "" || namespace == r.Namespace) { return r } diff --git a/test/e2e/hook_test.go b/test/e2e/hook_test.go index a1e3da0e37..2e60b04e8c 100644 --- a/test/e2e/hook_test.go +++ b/test/e2e/hook_test.go @@ -99,9 +99,9 @@ func TestPreSyncHookFailure(t *testing.T) { IgnoreErrors(). Sync(). Then(). - Expect(Error("hook Failed PreSync", "")). + Expect(Error("hook Failed Synced PreSync container \"main\" failed", "")). // make sure resource are also printed - Expect(Error("pod OutOfSync Missing", "")). + Expect(Error("pod OutOfSync Missing", "")). Expect(OperationPhaseIs(OperationFailed)). // if a pre-sync hook fails, we should not start the main sync Expect(SyncStatusIs(SyncStatusCodeOutOfSync)). diff --git a/util/argo/argo.go b/util/argo/argo.go index 6751c582bf..a456263ae1 100644 --- a/util/argo/argo.go +++ b/util/argo/argo.go @@ -248,7 +248,7 @@ func RefreshApp(appIf v1alpha1.ApplicationInterface, name string, refreshType ar app, err := appIf.Patch(context.Background(), name, types.MergePatchType, patch, metav1.PatchOptions{}) if err == nil { log.Infof("Requested app '%s' refresh", name) - return app, nil + return app.DeepCopy(), nil } if !apierrors.IsConflict(err) { return nil, fmt.Errorf("error patching annotations in application %q: %w", name, err) @@ -814,6 +814,7 @@ func SetAppOperation(appIf v1alpha1.ApplicationInterface, appName string, op *ar if err != nil { return nil, fmt.Errorf("error getting application %q: %w", appName, err) } + a = a.DeepCopy() if a.Operation != nil { return nil, ErrAnotherOperationInProgress } diff --git a/util/db/secrets.go b/util/db/secrets.go index 6d8100c510..56c075c151 100644 --- a/util/db/secrets.go +++ b/util/db/secrets.go @@ -42,7 +42,7 @@ func (db *db) listSecretsByType(types ...string) ([]*corev1.Secret, error) { // SecretNamespaceLister lists all Secrets in the indexer for a given namespace. // Objects returned by the lister must be treated as read-only. // To allow us to modify the secrets, make a copy - secrets = util.SecretCopy(secrets) + secrets = util.SliceCopy(secrets) return secrets, nil } diff --git a/util/settings/settings.go b/util/settings/settings.go index 7584fee0bc..8d83f12ff1 100644 --- a/util/settings/settings.go +++ b/util/settings/settings.go @@ -780,7 +780,7 @@ func (mgr *SettingsManager) getSecrets() ([]*corev1.Secret, error) { // SecretNamespaceLister lists all Secrets in the indexer for a given namespace. // Objects returned by the lister must be treated as read-only. // To allow us to modify the secrets, make a copy - secrets = util.SecretCopy(secrets) + secrets = util.SliceCopy(secrets) return secrets, nil } diff --git a/util/util.go b/util/util.go index 7046d9480b..23943708ab 100644 --- a/util/util.go +++ b/util/util.go @@ -7,7 +7,7 @@ import ( "encoding/hex" "fmt" - corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" ) // MakeSignature generates a cryptographically-secure pseudo-random token, based on a given number of random bytes, for signing purposes. @@ -22,16 +22,13 @@ func MakeSignature(size int) ([]byte, error) { return b, err } -// SecretCopy generates a deep copy of a slice containing secrets -// -// This function takes a slice of pointers to Secrets and returns a new slice -// containing deep copies of the original secrets. -func SecretCopy(secrets []*corev1.Secret) []*corev1.Secret { - secretsCopy := make([]*corev1.Secret, len(secrets)) - for i, secret := range secrets { - secretsCopy[i] = secret.DeepCopy() +// SliceCopy generates a deep copy of a slice containing any type that implements the runtime.Object interface. +func SliceCopy[T runtime.Object](items []T) []T { + itemsCopy := make([]T, len(items)) + for i, item := range items { + itemsCopy[i] = item.DeepCopyObject().(T) } - return secretsCopy + return itemsCopy } // GenerateCacheKey generates a cache key based on a format string and arguments diff --git a/util/util_test.go b/util/util_test.go index 59d23a6a3e..18d382a4d2 100644 --- a/util/util_test.go +++ b/util/util_test.go @@ -42,7 +42,7 @@ func TestParseRevision(t *testing.T) { } } -func TestSecretCopy(t *testing.T) { +func TestSliceCopy(t *testing.T) { type args struct { secrets []*corev1.Secret } @@ -72,8 +72,8 @@ func TestSecretCopy(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - secretsCopy := util.SecretCopy(tt.args.secrets) - assert.Equalf(t, tt.want, secretsCopy, "SecretCopy(%v)", tt.args.secrets) + secretsCopy := util.SliceCopy(tt.args.secrets) + assert.Equalf(t, tt.want, secretsCopy, "SliceCopy(%v)", tt.args.secrets) for i := range tt.args.secrets { assert.NotSame(t, secretsCopy[i], tt.args.secrets[i]) }