diff --git a/pkg/controller/traffic/traffic_controller.go b/pkg/controller/traffic/traffic_controller.go index 1d7669e35..327fe5200 100644 --- a/pkg/controller/traffic/traffic_controller.go +++ b/pkg/controller/traffic/traffic_controller.go @@ -36,10 +36,11 @@ import ( const ( AgentName = "traffic-controller" - ClustersNotReady = "ClustersNotReady" - InProgress = "InProgress" - InternalError = "InternalError" - PodsNotReady = "PodsNotReady" + ClustersNotReady = "ClustersNotReady" + InProgress = "InProgress" + InternalError = "InternalError" + PodsNotInEndpoints = "PodsNotInEndpoints" + PodsNotReady = "PodsNotReady" TrafficTargetConditionChanged = "TrafficTargetConditionChanged" ClusterTrafficConditionChanged = "ClusterTrafficConditionChanged" @@ -388,6 +389,9 @@ func (c *Controller) processTrafficTargetOnCluster( } if trafficStatus.podsToShift != nil { + // If we have pods to shift, our job can only be done after the + // change is made and observed, so we definitely still in + // progress. err := shiftPodLabels(clientset, trafficStatus.podsToShift) if err != nil { readyCond = trafficutil.NewClusterTrafficCondition( @@ -398,22 +402,39 @@ func (c *Controller) processTrafficTargetOnCluster( ) return err - } else { - readyCond = trafficutil.NewClusterTrafficCondition( - shipper.ClusterConditionTypeReady, - corev1.ConditionFalse, - InProgress, - "", - ) } - } else { + + readyCond = trafficutil.NewClusterTrafficCondition( + shipper.ClusterConditionTypeReady, + corev1.ConditionFalse, + InProgress, + "", + ) + } else if trafficStatus.podsNotReady > 0 { + // All the pods have been shifted, made it to endpoints, but + // some aren't ready. + msg := fmt.Sprintf( + "%d out of %d pods designated to receive traffic are not ready. this might require intervention, try `kubectl describe ct %s` for more information", + trafficStatus.podsNotReady, trafficStatus.podsLabeled, releaseName) readyCond = trafficutil.NewClusterTrafficCondition( shipper.ClusterConditionTypeReady, corev1.ConditionFalse, PodsNotReady, - fmt.Sprintf( - "%d out of %d pods are not ready. this might require intervention, check the CapacityTarget for more information", - trafficStatus.podsReady, trafficStatus.podsLabeled), + msg, + ) + } else { + // All the pods have been shifted, but not enough of them are + // ready, and there are none not ready in endpoints, which + // means that they haven't made it there yet, or that the + // service selector is invalid. + msg := fmt.Sprintf( + "%d out of %d pods designated to receive traffic are not yet in endpoints", + trafficStatus.podsLabeled-trafficStatus.podsReady, trafficStatus.podsLabeled) + readyCond = trafficutil.NewClusterTrafficCondition( + shipper.ClusterConditionTypeReady, + corev1.ConditionFalse, + PodsNotInEndpoints, + msg, ) } diff --git a/pkg/controller/traffic/traffic_controller_test.go b/pkg/controller/traffic/traffic_controller_test.go index 1c2aaba94..8fe35c4dd 100644 --- a/pkg/controller/traffic/traffic_controller_test.go +++ b/pkg/controller/traffic/traffic_controller_test.go @@ -171,6 +171,16 @@ func TestTrafficShiftingWithPodsNotReady(t *testing.T) { }, }, }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-also-ready", + Namespace: shippertesting.TestNamespace, + Labels: map[string]string{ + shipper.AppLabel: shippertesting.TestApp, + shipper.ReleaseLabel: ttName, + }, + }, + }, &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod-not-ready", @@ -194,17 +204,20 @@ func TestTrafficShiftingWithPodsNotReady(t *testing.T) { Clusters: []*shipper.ClusterTrafficStatus{ { Name: clusterA, - AchievedTraffic: 5, + AchievedTraffic: 7, Conditions: []shipper.ClusterTrafficCondition{ { Type: shipper.ClusterConditionTypeOperational, Status: corev1.ConditionTrue, }, { - Type: shipper.ClusterConditionTypeReady, - Status: corev1.ConditionFalse, - Reason: PodsNotReady, - Message: "1 out of 2 pods are not ready. this might require intervention, check the CapacityTarget for more information", + Type: shipper.ClusterConditionTypeReady, + Status: corev1.ConditionFalse, + Reason: PodsNotReady, + Message: fmt.Sprintf( + "1 out of 3 pods designated to receive traffic are not ready. this might require intervention, try `kubectl describe ct %s` for more information", + ttName, + ), }, }, }, @@ -232,7 +245,7 @@ func TestTrafficShiftingWithPodsNotReady(t *testing.T) { trafficTarget: tt, status: status, podsByCluster: map[string]podStatus{ - clusterA: {withTraffic: 2}, + clusterA: {withTraffic: len(pods)}, }, }, }, diff --git a/pkg/controller/traffic/traffic_shifting_status.go b/pkg/controller/traffic/traffic_shifting_status.go index 141149b22..3c28b5e4a 100644 --- a/pkg/controller/traffic/traffic_shifting_status.go +++ b/pkg/controller/traffic/traffic_shifting_status.go @@ -17,6 +17,7 @@ type trafficShiftingStatus struct { ready bool achievedTrafficWeight uint32 podsReady int + podsNotReady int podsLabeled int podsToShift map[string][]*corev1.Pod } @@ -44,7 +45,7 @@ func buildTrafficShiftingStatus( shipper.ReleaseLabel: releaseName, }).AsSelector() - podsByTrafficStatus, podsInRelease, podsReady := summarizePods( + podsByTrafficStatus, podsInRelease, podsReady, podsNotReady := summarizePods( appPods, endpoints, releaseSelector) releaseTargetWeight := releaseTargetWeights[releaseName] @@ -80,6 +81,7 @@ func buildTrafficShiftingStatus( return trafficShiftingStatus{ achievedTrafficWeight: achievedWeight, podsReady: podsReady, + podsNotReady: podsNotReady, podsLabeled: podsLabeledForTraffic, ready: ready, podsToShift: podsToShift, @@ -132,7 +134,7 @@ func summarizePods( pods []*corev1.Pod, endpoints *corev1.Endpoints, releaseSelector labels.Selector, -) (map[string][]*corev1.Pod, int, int) { +) (map[string][]*corev1.Pod, int, int, int) { podsInRelease := make(map[string]struct{}) podsByTrafficStatus := make(map[string][]*corev1.Pod) @@ -158,6 +160,7 @@ func summarizePods( } podsReady := 0 + podsNotReady := 0 for podName, podReady := range podReadiness { _, belongsToRelease := podsInRelease[podName] @@ -167,10 +170,12 @@ func summarizePods( if podReady { podsReady++ + } else { + podsNotReady++ } } - return podsByTrafficStatus, len(podsInRelease), podsReady + return podsByTrafficStatus, len(podsInRelease), podsReady, podsNotReady } // markAddressReadiness updates podReadiness by marking