Skip to content
This repository has been archived by the owner on May 3, 2022. It is now read-only.

Commit

Permalink
traffic controller: report better errors
Browse files Browse the repository at this point in the history
* It is more useful when we actually report the number we mean, instead
of the exact opposite: we were reporting number of pods ready and told
the users that was the number of pods not ready. Now we report it
correctly.

* It's weird to report 0 pods not ready, but still not get any traffic.
We add some more clarity on that by reporting that there are pods that
haven't made it into the endpoints yet. This can be k8s taking its time
to do its thing, or the service selector might be incorrect. We should
be able to catch an incorrect selector in the future, see #257 [1].

* To remain consistent with the errors we report in the Release, we now
suggest the specific command to run to get more information about the
CapacityTarget.

[1] #257
  • Loading branch information
juliogreff committed Jan 14, 2020
1 parent c764092 commit d972b71
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 24 deletions.
51 changes: 36 additions & 15 deletions pkg/controller/traffic/traffic_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ import (
const (
AgentName = "traffic-controller"

ClustersNotReady = "ClustersNotReady"
InProgress = "InProgress"
InternalError = "InternalError"
PodsNotReady = "PodsNotReady"
ClustersNotReady = "ClustersNotReady"
InProgress = "InProgress"
InternalError = "InternalError"
PodsNotInEndpoints = "PodsNotInEndpoints"
PodsNotReady = "PodsNotReady"

TrafficTargetConditionChanged = "TrafficTargetConditionChanged"
ClusterTrafficConditionChanged = "ClusterTrafficConditionChanged"
Expand Down Expand Up @@ -388,6 +389,9 @@ func (c *Controller) processTrafficTargetOnCluster(
}

if trafficStatus.podsToShift != nil {
// If we have pods to shift, our job can only be done after the
// change is made and observed, so we definitely still in
// progress.
err := shiftPodLabels(clientset, trafficStatus.podsToShift)
if err != nil {
readyCond = trafficutil.NewClusterTrafficCondition(
Expand All @@ -398,22 +402,39 @@ func (c *Controller) processTrafficTargetOnCluster(
)

return err
} else {
readyCond = trafficutil.NewClusterTrafficCondition(
shipper.ClusterConditionTypeReady,
corev1.ConditionFalse,
InProgress,
"",
)
}
} else {

readyCond = trafficutil.NewClusterTrafficCondition(
shipper.ClusterConditionTypeReady,
corev1.ConditionFalse,
InProgress,
"",
)
} else if trafficStatus.podsNotReady > 0 {
// All the pods have been shifted, made it to endpoints, but
// some aren't ready.
msg := fmt.Sprintf(
"%d out of %d pods designated to receive traffic are not ready. this might require intervention, try `kubectl describe ct %s` for more information",
trafficStatus.podsNotReady, trafficStatus.podsLabeled, releaseName)
readyCond = trafficutil.NewClusterTrafficCondition(
shipper.ClusterConditionTypeReady,
corev1.ConditionFalse,
PodsNotReady,
fmt.Sprintf(
"%d out of %d pods are not ready. this might require intervention, check the CapacityTarget for more information",
trafficStatus.podsReady, trafficStatus.podsLabeled),
msg,
)
} else {
// All the pods have been shifted, but not enough of them are
// ready, and there are none not ready in endpoints, which
// means that they haven't made it there yet, or that the
// service selector is invalid.
msg := fmt.Sprintf(
"%d out of %d pods designated to receive traffic are not yet in endpoints",
trafficStatus.podsLabeled-trafficStatus.podsReady, trafficStatus.podsLabeled)
readyCond = trafficutil.NewClusterTrafficCondition(
shipper.ClusterConditionTypeReady,
corev1.ConditionFalse,
PodsNotInEndpoints,
msg,
)
}

Expand Down
25 changes: 19 additions & 6 deletions pkg/controller/traffic/traffic_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,16 @@ func TestTrafficShiftingWithPodsNotReady(t *testing.T) {
},
},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-also-ready",
Namespace: shippertesting.TestNamespace,
Labels: map[string]string{
shipper.AppLabel: shippertesting.TestApp,
shipper.ReleaseLabel: ttName,
},
},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-not-ready",
Expand All @@ -194,17 +204,20 @@ func TestTrafficShiftingWithPodsNotReady(t *testing.T) {
Clusters: []*shipper.ClusterTrafficStatus{
{
Name: clusterA,
AchievedTraffic: 5,
AchievedTraffic: 7,
Conditions: []shipper.ClusterTrafficCondition{
{
Type: shipper.ClusterConditionTypeOperational,
Status: corev1.ConditionTrue,
},
{
Type: shipper.ClusterConditionTypeReady,
Status: corev1.ConditionFalse,
Reason: PodsNotReady,
Message: "1 out of 2 pods are not ready. this might require intervention, check the CapacityTarget for more information",
Type: shipper.ClusterConditionTypeReady,
Status: corev1.ConditionFalse,
Reason: PodsNotReady,
Message: fmt.Sprintf(
"1 out of 3 pods designated to receive traffic are not ready. this might require intervention, try `kubectl describe ct %s` for more information",
ttName,
),
},
},
},
Expand Down Expand Up @@ -232,7 +245,7 @@ func TestTrafficShiftingWithPodsNotReady(t *testing.T) {
trafficTarget: tt,
status: status,
podsByCluster: map[string]podStatus{
clusterA: {withTraffic: 2},
clusterA: {withTraffic: len(pods)},
},
},
},
Expand Down
11 changes: 8 additions & 3 deletions pkg/controller/traffic/traffic_shifting_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type trafficShiftingStatus struct {
ready bool
achievedTrafficWeight uint32
podsReady int
podsNotReady int
podsLabeled int
podsToShift map[string][]*corev1.Pod
}
Expand Down Expand Up @@ -44,7 +45,7 @@ func buildTrafficShiftingStatus(
shipper.ReleaseLabel: releaseName,
}).AsSelector()

podsByTrafficStatus, podsInRelease, podsReady := summarizePods(
podsByTrafficStatus, podsInRelease, podsReady, podsNotReady := summarizePods(
appPods, endpoints, releaseSelector)

releaseTargetWeight := releaseTargetWeights[releaseName]
Expand Down Expand Up @@ -80,6 +81,7 @@ func buildTrafficShiftingStatus(
return trafficShiftingStatus{
achievedTrafficWeight: achievedWeight,
podsReady: podsReady,
podsNotReady: podsNotReady,
podsLabeled: podsLabeledForTraffic,
ready: ready,
podsToShift: podsToShift,
Expand Down Expand Up @@ -132,7 +134,7 @@ func summarizePods(
pods []*corev1.Pod,
endpoints *corev1.Endpoints,
releaseSelector labels.Selector,
) (map[string][]*corev1.Pod, int, int) {
) (map[string][]*corev1.Pod, int, int, int) {
podsInRelease := make(map[string]struct{})
podsByTrafficStatus := make(map[string][]*corev1.Pod)

Expand All @@ -158,6 +160,7 @@ func summarizePods(
}

podsReady := 0
podsNotReady := 0
for podName, podReady := range podReadiness {
_, belongsToRelease := podsInRelease[podName]

Expand All @@ -167,10 +170,12 @@ func summarizePods(

if podReady {
podsReady++
} else {
podsNotReady++
}
}

return podsByTrafficStatus, len(podsInRelease), podsReady
return podsByTrafficStatus, len(podsInRelease), podsReady, podsNotReady
}

// markAddressReadiness updates podReadiness by marking
Expand Down

0 comments on commit d972b71

Please sign in to comment.