Skip to content

Commit b63151d

Browse files
ingvagabundbertinatto
authored andcommitted
UPSTREAM: <carry>: disable etcd readiness checks by default
Explicitly exclude etcd and etcd-readiness checks (OCPBUGS-48177) and have etcd operator take responsibility for properly reporting etcd readiness. Justification: kube-apiserver instances get removed from a load balancer when etcd starts to report not ready (as will KA's /readyz). Client connections can withstand etcd unreadiness longer than the readiness timeout is. Thus, it is not necessary to drop connections in case etcd resumes its readiness before a client connection times out naturally. This is a downstream patch only as OpenShift's way of using etcd is unique.
1 parent abf21c7 commit b63151d

File tree

4 files changed

+34
-6
lines changed

4 files changed

+34
-6
lines changed

staging/src/k8s.io/apiserver/pkg/server/config.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,13 +586,27 @@ type CompletedConfig struct {
586586
func (c *Config) AddHealthChecks(healthChecks ...healthz.HealthChecker) {
587587
c.HealthzChecks = append(c.HealthzChecks, healthChecks...)
588588
c.LivezChecks = append(c.LivezChecks, healthChecks...)
589-
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
589+
c.AddReadyzChecks(healthChecks...)
590590
}
591591

592592
// AddReadyzChecks adds a health check to our config to be exposed by the readyz endpoint
593593
// of our configured apiserver.
594594
func (c *Config) AddReadyzChecks(healthChecks ...healthz.HealthChecker) {
595-
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
595+
// Info(ingvagabund): Explicitly exclude etcd and etcd-readiness checks (OCPBUGS-48177)
596+
// and have etcd operator take responsibility for properly reporting etcd readiness.
597+
// Justification: kube-apiserver instances get removed from a load balancer when etcd starts
598+
// to report not ready (as will KA's /readyz). Client connections can withstand etcd unreadiness
599+
// longer than the readiness timeout is. Thus, it is not necessary to drop connections
600+
// in case etcd resumes its readiness before a client connection times out naturally.
601+
// This is a downstream patch only as OpenShift's way of using etcd is unique.
602+
readyzChecks := []healthz.HealthChecker{}
603+
for _, check := range healthChecks {
604+
if check.Name() == "etcd" || check.Name() == "etcd-readiness" {
605+
continue
606+
}
607+
readyzChecks = append(readyzChecks, check)
608+
}
609+
c.ReadyzChecks = append(c.ReadyzChecks, readyzChecks...)
596610
}
597611

598612
// AddPostStartHook allows you to add a PostStartHook that will later be added to the server itself in a New call.

staging/src/k8s.io/apiserver/pkg/server/options/etcd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ func (s *EtcdOptions) maybeApplyResourceTransformers(c *server.Config) (err erro
355355

356356
func addHealthChecksWithoutLivez(c *server.Config, healthChecks ...healthz.HealthChecker) {
357357
c.HealthzChecks = append(c.HealthzChecks, healthChecks...)
358-
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
358+
c.AddReadyzChecks(healthChecks...)
359359
}
360360

361361
func (s *EtcdOptions) addEtcdHealthEndpoint(c *server.Config) error {

staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,17 @@ func TestParseWatchCacheSizes(t *testing.T) {
262262
}
263263
}
264264

265+
func excludeEtcdReadyzChecks(readyzChecks []string) []string {
266+
includedReadyzChecks := []string{}
267+
for _, checkName := range readyzChecks {
268+
if checkName == "etcd" || checkName == "etcd-readiness" {
269+
continue
270+
}
271+
includedReadyzChecks = append(includedReadyzChecks, checkName)
272+
}
273+
return includedReadyzChecks
274+
}
275+
265276
func TestKMSHealthzEndpoint(t *testing.T) {
266277
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KMSv1, true)
267278

@@ -367,7 +378,9 @@ func TestKMSHealthzEndpoint(t *testing.T) {
367378
}
368379

369380
healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz")
370-
healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
381+
// Remove the excluded checks here to reduce the carry patch changes in case
382+
// the changes drifts too much during rebases and similar scope-like changes.
383+
healthChecksAreEqual(t, excludeEtcdReadyzChecks(tc.wantReadyzChecks), serverConfig.ReadyzChecks, "readyz")
371384
healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez")
372385
})
373386
}
@@ -407,7 +420,9 @@ func TestReadinessCheck(t *testing.T) {
407420
t.Fatalf("Failed to add healthz error: %v", err)
408421
}
409422

410-
healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
423+
// Remove the excluded checks here to reduce the carry patch changes in case
424+
// the changes drifts too much during rebases and similar scope-like changes.
425+
healthChecksAreEqual(t, excludeEtcdReadyzChecks(tc.wantReadyzChecks), serverConfig.ReadyzChecks, "readyz")
411426
healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz")
412427
healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez")
413428
})

test/e2e/apimachinery/health_handlers.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ var (
8282
requiredReadyzChecks = sets.NewString(
8383
"[+]ping ok",
8484
"[+]log ok",
85-
"[+]etcd ok",
8685
"[+]informer-sync ok",
8786
"[+]poststarthook/start-apiserver-admission-initializer ok",
8887
"[+]poststarthook/generic-apiserver-start-informers ok",

0 commit comments

Comments
 (0)