Skip to content

Commit d75c424

Browse files
author
rohandvora
committed
Fix controller when NEG status is empty
This commit addresses an issue where the autoneg controller would fail to reconcile backend services when the Network Endpoint Group (NEG) status annotation was empty. This scenario typically occurs if the GKE NEG controller has not yet provisioned the NEGs for a service. The root cause was that an empty backend service name was being used in the request to the Google Cloud Compute API. This caused the API to treat the 'get' request as a 'list' request, leading to a JSON unmarshaling error due to the different response format. This error prevented the controller from removing finalizers on services marked for deletion, causing namespaces to become stuck. The following changes have been made: - Updated `ReconcileBackends` in `controllers/autoneg.go` to handle cases where the upsert backend service name is empty, preventing calls to `getBackendService` with an empty name. - Added `context.Context` propagation through `ReconcileBackends`. - Added a new unit test, `TestReconcileBackendsDeletionWithEmptyNEGStatus`, in `controllers/autoneg_test.go` to specifically cover the error scenario. - Updated function signatures and calls in `controllers/service_controller.go` and `controllers/suite_test.go` to align with the changes in the `BackendController` interface.
1 parent 84c4ccc commit d75c424

File tree

4 files changed

+71
-13
lines changed

4 files changed

+71
-13
lines changed

controllers/autoneg.go

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -201,15 +201,21 @@ func checkOperation(op *compute.Operation) error {
201201

202202
// ReconcileBackends takes the actual and intended AutonegStatus
203203
// and attempts to apply the intended status or return an error
204-
func (b *ProdBackendController) ReconcileBackends(actual, intended AutonegStatus) (err error) {
204+
func (b *ProdBackendController) ReconcileBackends(ctx context.Context, actual, intended AutonegStatus) (err error) {
205+
logger := log.FromContext(ctx)
206+
// Determine which backends to remove and which to insert/update.
205207
removes, upserts := ReconcileStatus(b.project, actual, intended)
208+
logger.Info("Reconciling backends", "removes", removes, "upserts", upserts)
206209

207210
var forceCapacity map[int]bool = make(map[int]bool, 0)
211+
// Iterate over each port that has backends to be removed.
208212
for port, _removes := range removes {
213+
// Iterate over each backend service to be removed.
209214
for idx, remove := range _removes {
210215
var oldSvc *compute.BackendService
211-
oldSvc, err = b.getBackendService(remove.name, remove.region)
212216
var svcUpdated = false
217+
// Get the current state of the backend service.
218+
oldSvc, err = b.getBackendService(remove.name, remove.region)
213219
var e *errNotFound
214220
if errors.As(err, &e) {
215221
// If the backend service is gone, we construct a BackendService with the same name
@@ -226,16 +232,18 @@ func (b *ProdBackendController) ReconcileBackends(actual, intended AutonegStatus
226232
var newSvc *compute.BackendService
227233
upsert := upserts[port][idx]
228234

229-
if upsert.name != remove.name {
235+
// Check if the same port is in the upsert map and if upsert needs to happen on a different backend service.
236+
if upsert.name != "" && upsert.name != remove.name {
230237
if newSvc, err = b.getBackendService(upsert.name, upsert.region); err != nil {
231238
return
232239
}
233240
} else {
234241
newSvc = oldSvc
235242
}
236243

237-
// Remove backends in the list to be deleted
244+
// Remove backends that are in the list to be deleted for this port.
238245
for _, d := range remove.backends {
246+
// Remove only the requested backends and keep the rest.
239247
for i, be := range oldSvc.Backends {
240248
if d.Group == be.Group {
241249
svcUpdated = true
@@ -246,8 +254,9 @@ func (b *ProdBackendController) ReconcileBackends(actual, intended AutonegStatus
246254
}
247255
}
248256

249-
// If we are changing backend services, save the old service
250-
if upsert.name != remove.name && svcUpdated {
257+
// If a different service needs to be updated based on the upsert map entry for this port,
258+
// then save the existing backend service and update the new service.
259+
if svcUpdated && (upsert.name == "" || upsert.name != remove.name) {
251260
if err = b.updateBackends(remove.name, remove.region, oldSvc, forceCapacity); err != nil {
252261
return
253262
}
@@ -263,7 +272,7 @@ func (b *ProdBackendController) ReconcileBackends(actual, intended AutonegStatus
263272
be.MaxConnectionsPerEndpoint = u.MaxConnectionsPerEndpoint
264273
if intended.AutonegSyncConfig != nil {
265274
var syncConfig AutonegSyncConfig = *intended.AutonegSyncConfig
266-
if syncConfig.CapacityScaler != nil && *syncConfig.CapacityScaler == true {
275+
if syncConfig.CapacityScaler != nil && *syncConfig.CapacityScaler {
267276
be.CapacityScaler = u.CapacityScaler
268277
}
269278
} else {
@@ -477,6 +486,7 @@ func getStatuses(ctx context.Context, namespace string, name string, annotations
477486
if ok {
478487
// Found a status, decode
479488
if err = json.Unmarshal([]byte(tmp), &s.negConfig); err != nil {
489+
err = fmt.Errorf("failed to decode neg annotation %s: %w", negAnnotation, err)
480490
return
481491
}
482492
}
@@ -489,12 +499,14 @@ func getStatuses(ctx context.Context, namespace string, name string, annotations
489499

490500
var tempConfig AutonegConfigTemp
491501
if err = json.Unmarshal([]byte(tmp), &tempConfig); err != nil {
502+
err = fmt.Errorf("failed to decode autoneg annotation %s: %w", autonegAnnotation, err)
492503
return
493504
}
494505

495506
tmpSync, syncOk := annotations[autonegSyncAnnotation]
496507
if syncOk {
497508
if err = json.Unmarshal([]byte(tmpSync), &s.syncConfig); err != nil {
509+
err = fmt.Errorf("failed to decode autoneg-sync annotation %s: %w", autonegSyncAnnotation, err)
498510
return
499511
}
500512
}
@@ -540,6 +552,7 @@ func getStatuses(ctx context.Context, namespace string, name string, annotations
540552
}
541553
// Found a status, decode
542554
if err = json.Unmarshal([]byte(tmp), &s.status); err != nil {
555+
err = fmt.Errorf("failed to decode autoneg-status annotation %s: %w", autonegStatusAnnotation, err)
543556
return
544557
}
545558
}
@@ -551,6 +564,7 @@ func getStatuses(ctx context.Context, namespace string, name string, annotations
551564
valid = true
552565

553566
if err = json.Unmarshal([]byte(tmp), &s.oldConfig); err != nil {
567+
err = fmt.Errorf("failed to decode %s annotation %s: %w", oldAutonegAnnotation, tmp, err)
554568
return
555569
}
556570

@@ -579,7 +593,7 @@ func getStatuses(ctx context.Context, namespace string, name string, annotations
579593
Connections: 0,
580594
}
581595
} else {
582-
err = errors.New(fmt.Sprintf("more than one port in %s, but autoneg configuration is for one or no ports", negAnnotation))
596+
err = fmt.Errorf("more than one port in %s, but autoneg configuration is for one or no ports", negAnnotation)
583597
return
584598
}
585599
}
@@ -593,6 +607,7 @@ func getStatuses(ctx context.Context, namespace string, name string, annotations
593607
}
594608
// Found a status, decode
595609
if err = json.Unmarshal([]byte(tmp), &s.oldStatus); err != nil {
610+
err = fmt.Errorf("failed to decode %s annotation %s: %w", oldAutonegStatusAnnotation, tmp, err)
596611
return
597612
}
598613
}

controllers/autoneg_test.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,10 @@ var (
720720
AutonegConfig: configBasic,
721721
NEGStatus: negStatus,
722722
}
723+
statusBasicWithEmptyNEGs = AutonegStatus{
724+
AutonegConfig: configBasic,
725+
NEGStatus: NEGStatus{},
726+
}
723727
backendsBasicWithNEGs = map[string]map[string]Backends{"80": map[string]Backends{"test": Backends{name: "test", backends: []compute.Backend{
724728
statusBasicWithNEGs.Backend("test", "80", getGroup(fakeProject, "zone1", fakeNeg)),
725729
statusBasicWithNEGs.Backend("test", "80", getGroup(fakeProject, "zone2", fakeNeg)),
@@ -991,6 +995,7 @@ func Test_checkOperation(t *testing.T) {
991995

992996
func TestReconcileBackendsDeletionWithMissingBackend(t *testing.T) {
993997
s := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) {
998+
t.Logf("Got request: %s", req.URL.String())
994999
// Return not found on backend service get.
9951000
res.WriteHeader(http.StatusNotFound)
9961001
}))
@@ -1002,7 +1007,7 @@ func TestReconcileBackendsDeletionWithMissingBackend(t *testing.T) {
10021007
project: "test-project",
10031008
s: cs,
10041009
}
1005-
err = bc.ReconcileBackends(statusBasicWithNEGs, AutonegStatus{
1010+
err = bc.ReconcileBackends(context.Background(), statusBasicWithNEGs, AutonegStatus{
10061011
// On deletion, the intended state is set to empty.
10071012
AutonegConfig: AutonegConfig{},
10081013
NEGStatus: negStatus,
@@ -1012,6 +1017,43 @@ func TestReconcileBackendsDeletionWithMissingBackend(t *testing.T) {
10121017
}
10131018
}
10141019

1020+
func TestReconcileBackendsDeletionWithEmptyNEGStatus(t *testing.T) {
1021+
s := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) {
1022+
t.Logf("Got request: %s", req.URL.String())
1023+
// Return not found on backend service get.
1024+
res.WriteHeader(http.StatusNotFound)
1025+
}))
1026+
cs, err := compute.NewService(context.Background(), option.WithEndpoint(s.URL), option.WithoutAuthentication())
1027+
if err != nil {
1028+
t.Fatalf("Failed to instantiate compute service: %v", err)
1029+
}
1030+
bc := ProdBackendController{
1031+
project: "test-project",
1032+
s: cs,
1033+
}
1034+
err = bc.ReconcileBackends(context.Background(), AutonegStatus{
1035+
AutonegConfig: AutonegConfig{
1036+
BackendServices: map[string]map[string]AutonegNEGConfig{
1037+
"80": {
1038+
"test": AutonegNEGConfig{
1039+
Name: "test",
1040+
Rate: 100,
1041+
},
1042+
},
1043+
},
1044+
},
1045+
NEGStatus: NEGStatus{}, // NEG status not populated by GKE NEG controller.
1046+
}, AutonegStatus{
1047+
AutonegConfig: AutonegConfig{
1048+
BackendServices: map[string]map[string]AutonegNEGConfig{},
1049+
},
1050+
NEGStatus: NEGStatus{},
1051+
})
1052+
if err != nil {
1053+
t.Errorf("ReconcileBackends() got err: %v, want none", err)
1054+
}
1055+
}
1056+
10151057
type fakeReader struct {
10161058
client.Reader
10171059
svcNeg *v1beta1.ServiceNetworkEndpointGroup

controllers/service_controller.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"encoding/json"
2222
"errors"
23+
"fmt"
2324
"reflect"
2425
"time"
2526

@@ -35,7 +36,7 @@ import (
3536
)
3637

3738
type BackendController interface {
38-
ReconcileBackends(AutonegStatus, AutonegStatus) error
39+
ReconcileBackends(context.Context, AutonegStatus, AutonegStatus) error
3940
}
4041

4142
// ServiceReconciler reconciles a Service object
@@ -104,7 +105,7 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
104105
AutonegConfig: status.config,
105106
NEGStatus: status.negStatus,
106107
}
107-
logger.Info("Existing status", "status", status)
108+
logger.Info("Existing status", "status", fmt.Sprintf("%+v", status))
108109
if status.syncConfig != nil {
109110
intendedStatus.AutonegSyncConfig = status.syncConfig
110111
}
@@ -124,7 +125,7 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
124125
// Reconcile differences
125126
logger.Info("Applying intended status", "status", intendedStatus)
126127

127-
if err = r.ReconcileBackends(status.status, intendedStatus); err != nil {
128+
if err = r.ReconcileBackends(ctx, status.status, intendedStatus); err != nil {
128129
var e *errNotFound
129130
if !(deleting && errors.As(err, &e)) {
130131
r.Recorder.Event(svc, "Warning", "BackendError", err.Error())

controllers/suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ type TestBackendController struct {
105105
Counter int
106106
}
107107

108-
func (t *TestBackendController) ReconcileBackends(AutonegStatus, AutonegStatus) error {
108+
func (t *TestBackendController) ReconcileBackends(context.Context, AutonegStatus, AutonegStatus) error {
109109
t.Counter++
110110
fmt.Print(t.Counter)
111111
return nil

0 commit comments

Comments
 (0)