Skip to content

Commit 020235f

Browse files
committed
Add asynchronous deployer pod invariant checker for every test
1 parent 705e69b commit 020235f

File tree

3 files changed

+207
-9
lines changed

3 files changed

+207
-9
lines changed

pkg/apps/controller/deployer/deployer_controller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ func (c *DeploymentController) makeDeployerPod(deployment *v1.ReplicationControl
379379
ObjectMeta: metav1.ObjectMeta{
380380
Name: deployutil.DeployerPodNameForDeployment(deployment.Name),
381381
Annotations: map[string]string{
382-
deployapi.DeploymentAnnotation: deployment.Name,
382+
deployapi.DeploymentAnnotation: deployment.Name,
383+
deployapi.DeploymentConfigAnnotation: deployutil.DeploymentConfigNameFor(deployment),
383384
},
384385
Labels: map[string]string{
385386
deployapi.DeployerPodForDeploymentLabel: deployment.Name,

test/extended/deployments/deployments.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package deployments
22

33
import (
4+
"context"
45
"errors"
56
"fmt"
67
"math/rand"
@@ -27,10 +28,55 @@ import (
2728
const deploymentRunTimeout = 5 * time.Minute
2829
const deploymentChangeTimeout = 30 * time.Second
2930

31+
type dicEntry struct {
32+
dic *deployerPodInvariantChecker
33+
ctx context.Context
34+
cancel func()
35+
}
36+
3037
var _ = g.Describe("[Feature:DeploymentConfig] deploymentconfigs", func() {
3138
defer g.GinkgoRecover()
39+
40+
dicMap := make(map[string]dicEntry)
41+
var oc *exutil.CLI
42+
43+
g.JustBeforeEach(func() {
44+
namespace := oc.Namespace()
45+
o.Expect(namespace).NotTo(o.BeEmpty())
46+
o.Expect(dicMap).NotTo(o.HaveKey(namespace))
47+
48+
dic := NewDeployerPodInvariantChecker(namespace, oc.AdminKubeClient())
49+
ctx, cancel := context.WithCancel(context.Background())
50+
dic.Start(ctx)
51+
52+
dicMap[namespace] = dicEntry{
53+
dic: dic,
54+
ctx: ctx,
55+
cancel: cancel,
56+
}
57+
})
58+
59+
// This have to be registered before we create kube framework (NewCLI).
60+
// It is probably a bug with Ginkgo because AfterEach description say innermost will be run first
61+
// but it runs outermost first.
62+
g.AfterEach(func() {
63+
namespace := oc.Namespace()
64+
o.Expect(namespace).NotTo(o.BeEmpty(), "There is something wrong with testing framework or the AfterEach functions have been registered in wrong order")
65+
o.Expect(dicMap).To(o.HaveKey(namespace))
66+
67+
// Give some time to the checker to catch up
68+
time.Sleep(2 * time.Second)
69+
70+
entry := dicMap[namespace]
71+
delete(dicMap, namespace)
72+
73+
entry.cancel()
74+
entry.dic.Wait()
75+
})
76+
77+
oc = exutil.NewCLI("cli-deployment", exutil.KubeConfigPath())
78+
3279
var (
33-
oc = exutil.NewCLI("cli-deployment", exutil.KubeConfigPath())
3480
deploymentFixture = exutil.FixturePath("testdata", "deployments", "test-deployment-test.yaml")
3581
simpleDeploymentFixture = exutil.FixturePath("testdata", "deployments", "deployment-simple.yaml")
3682
customDeploymentFixture = exutil.FixturePath("testdata", "deployments", "custom-deployment.yaml")

test/extended/deployments/util.go

Lines changed: 158 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,28 @@
11
package deployments
22

33
import (
4+
"context"
45
"fmt"
56
"io/ioutil"
67
"reflect"
78
"sort"
89
"strings"
10+
"sync"
911
"time"
1012

13+
"github.com/davecgh/go-spew/spew"
1114
"github.com/ghodss/yaml"
1215

16+
g "github.com/onsi/ginkgo"
17+
o "github.com/onsi/gomega"
18+
1319
corev1 "k8s.io/api/core/v1"
1420
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1521
"k8s.io/apimachinery/pkg/fields"
1622
"k8s.io/apimachinery/pkg/util/sets"
1723
"k8s.io/apimachinery/pkg/util/wait"
1824
"k8s.io/apimachinery/pkg/watch"
25+
"k8s.io/client-go/kubernetes"
1926
"k8s.io/client-go/util/retry"
2027
kapi "k8s.io/kubernetes/pkg/api"
2128
kapiv1 "k8s.io/kubernetes/pkg/api/v1"
@@ -426,15 +433,33 @@ func rCConditionFromMeta(condition func(metav1.Object) (bool, error)) func(rc *c
426433
}
427434
}
428435

436+
func waitForPodModification(oc *exutil.CLI, namespace string, name string, timeout time.Duration, resourceVersion string, condition func(pod *corev1.Pod) (bool, error)) (*corev1.Pod, error) {
437+
watcher, err := oc.KubeClient().CoreV1().Pods(namespace).Watch(metav1.SingleObject(metav1.ObjectMeta{Name: name, ResourceVersion: resourceVersion}))
438+
if err != nil {
439+
return nil, err
440+
}
441+
442+
event, err := watch.Until(timeout, watcher, func(event watch.Event) (bool, error) {
443+
if event.Type != watch.Modified && (resourceVersion == "" && event.Type != watch.Added) {
444+
return true, fmt.Errorf("different kind of event appeared while waiting for Pod modification: event: %#v", event)
445+
}
446+
return condition(event.Object.(*corev1.Pod))
447+
})
448+
if err != nil {
449+
return nil, err
450+
}
451+
return event.Object.(*corev1.Pod), nil
452+
}
453+
429454
func waitForRCModification(oc *exutil.CLI, namespace string, name string, timeout time.Duration, resourceVersion string, condition func(rc *corev1.ReplicationController) (bool, error)) (*corev1.ReplicationController, error) {
430455
watcher, err := oc.KubeClient().CoreV1().ReplicationControllers(namespace).Watch(metav1.SingleObject(metav1.ObjectMeta{Name: name, ResourceVersion: resourceVersion}))
431456
if err != nil {
432457
return nil, err
433458
}
434459

435460
event, err := watch.Until(timeout, watcher, func(event watch.Event) (bool, error) {
436-
if event.Type != watch.Modified {
437-
return false, fmt.Errorf("different kind of event appeared while waiting for modification: event: %#v", event)
461+
if event.Type != watch.Modified && (resourceVersion == "" && event.Type != watch.Added) {
462+
return true, fmt.Errorf("different kind of event appeared while waiting for RC modification: event: %#v", event)
438463
}
439464
return condition(event.Object.(*corev1.ReplicationController))
440465
})
@@ -454,17 +479,14 @@ func waitForDCModification(oc *exutil.CLI, namespace string, name string, timeou
454479
}
455480

456481
event, err := watch.Until(timeout, watcher, func(event watch.Event) (bool, error) {
457-
if event.Type != watch.Modified {
458-
return false, fmt.Errorf("different kind of event appeared while waiting for modification: event: %#v", event)
482+
if event.Type != watch.Modified && (resourceVersion == "" && event.Type != watch.Added) {
483+
return true, fmt.Errorf("different kind of event appeared while waiting for DC modification: event: %#v", event)
459484
}
460485
return condition(event.Object.(*deployapi.DeploymentConfig))
461486
})
462487
if err != nil {
463488
return nil, err
464489
}
465-
if event.Type != watch.Modified {
466-
return nil, fmt.Errorf("waiting for DC modification failed: event: %v", event)
467-
}
468490
return event.Object.(*deployapi.DeploymentConfig), nil
469491
}
470492

@@ -623,3 +645,132 @@ func readDCFixtureOrDie(path string) *deployapi.DeploymentConfig {
623645
}
624646
return data
625647
}
648+
649+
type deployerPodInvariantChecker struct {
650+
ctx context.Context
651+
wg sync.WaitGroup
652+
namespace string
653+
client kubernetes.Interface
654+
cache map[string][]*corev1.Pod
655+
}
656+
657+
func NewDeployerPodInvariantChecker(namespace string, client kubernetes.Interface) *deployerPodInvariantChecker {
658+
return &deployerPodInvariantChecker{
659+
namespace: namespace,
660+
client: client,
661+
cache: make(map[string][]*corev1.Pod),
662+
}
663+
}
664+
665+
func (d *deployerPodInvariantChecker) getCacheKey(pod *corev1.Pod) string {
666+
dcName, found := pod.Annotations[deployapi.DeploymentConfigAnnotation]
667+
o.Expect(found).To(o.BeTrue(), fmt.Sprintf("internal error - deployment is missing %q annotation\npod: %#v", deployapi.DeploymentConfigAnnotation, pod))
668+
o.Expect(dcName).NotTo(o.BeEmpty())
669+
670+
return fmt.Sprintf("%s/%s", pod.Namespace, dcName)
671+
}
672+
func (d *deployerPodInvariantChecker) getPodIndex(list []*corev1.Pod, pod *corev1.Pod) int {
673+
for i, p := range list {
674+
if p.Name == pod.Name && p.Namespace == pod.Namespace {
675+
// Internal check
676+
o.Expect(p.UID).To(o.Equal(pod.UID))
677+
return i
678+
}
679+
}
680+
681+
// Internal check
682+
o.Expect(fmt.Errorf("couldn't find pod %#v \n\n in list %#v", pod, list)).NotTo(o.HaveOccurred())
683+
return -1
684+
}
685+
686+
func (d *deployerPodInvariantChecker) checkInvariants(dc string, pods []*corev1.Pod) {
687+
var unterminatedPods []*corev1.Pod
688+
for _, pod := range pods {
689+
if pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed {
690+
unterminatedPods = append(unterminatedPods, pod)
691+
}
692+
}
693+
694+
// INVARIANT: There can be no more than one unterminated deployer pod present
695+
message := fmt.Sprintf("Deployer pod invariant broken! More than one unterminated deployer pod exists for DC %s!", dc)
696+
o.Expect(len(unterminatedPods)).To(o.BeNumerically("<=", 1), spew.Sprintf(`%v: %s
697+
List of unterminated pods: %#+v
698+
`, time.Now(), message, unterminatedPods))
699+
}
700+
701+
func (d *deployerPodInvariantChecker) AddPod(pod *corev1.Pod) {
702+
key := d.getCacheKey(pod)
703+
d.cache[key] = append(d.cache[key], pod)
704+
705+
d.checkInvariants(key, d.cache[key])
706+
}
707+
708+
func (d *deployerPodInvariantChecker) RemovePod(pod *corev1.Pod) {
709+
key := d.getCacheKey(pod)
710+
index := d.getPodIndex(d.cache[key], pod)
711+
712+
d.cache[key] = append(d.cache[key][:index], d.cache[key][index+1:]...)
713+
714+
d.checkInvariants(key, d.cache[key])
715+
}
716+
717+
func (d *deployerPodInvariantChecker) UpdatePod(pod *corev1.Pod) {
718+
key := d.getCacheKey(pod)
719+
index := d.getPodIndex(d.cache[key], pod)
720+
721+
// Check for sanity.
722+
// This is not paranoid; kubelet has already been broken this way:
723+
// https://github.com/openshift/origin/issues/17011
724+
oldPhase := d.cache[key][index].Status.Phase
725+
oldPhaseIsTerminated := oldPhase == corev1.PodSucceeded || oldPhase == corev1.PodFailed
726+
o.Expect(oldPhaseIsTerminated && pod.Status.Phase != oldPhase).To(o.BeFalse(),
727+
fmt.Sprintf("%v: detected deployer pod transition from terminated phase: %q -> %q", time.Now(), oldPhase, pod.Status.Phase))
728+
729+
d.cache[key][index] = pod
730+
731+
d.checkInvariants(key, d.cache[key])
732+
}
733+
734+
func (d *deployerPodInvariantChecker) doChecking() {
735+
defer g.GinkgoRecover()
736+
737+
watcher, err := d.client.CoreV1().Pods(d.namespace).Watch(metav1.ListOptions{})
738+
o.Expect(err).NotTo(o.HaveOccurred())
739+
defer d.wg.Done()
740+
defer watcher.Stop()
741+
742+
for {
743+
select {
744+
case <-d.ctx.Done():
745+
return
746+
case event := <-watcher.ResultChan():
747+
t := event.Type
748+
if t != watch.Added && t != watch.Modified && t != watch.Deleted {
749+
o.Expect(fmt.Errorf("unexpected event: %#v", event)).NotTo(o.HaveOccurred())
750+
}
751+
pod := event.Object.(*corev1.Pod)
752+
if !strings.HasSuffix(pod.Name, "-deploy") {
753+
continue
754+
}
755+
756+
switch t {
757+
case watch.Added:
758+
d.AddPod(pod)
759+
case watch.Modified:
760+
d.UpdatePod(pod)
761+
case watch.Deleted:
762+
d.RemovePod(pod)
763+
}
764+
}
765+
}
766+
}
767+
768+
func (d *deployerPodInvariantChecker) Start(ctx context.Context) {
769+
d.ctx = ctx
770+
go d.doChecking()
771+
d.wg.Add(1)
772+
}
773+
774+
func (d *deployerPodInvariantChecker) Wait() {
775+
d.wg.Wait()
776+
}

0 commit comments

Comments
 (0)