From 4920dfedb38d312e966d8cfab2fdaa6e19c57b71 Mon Sep 17 00:00:00 2001 From: p4u1 Date: Fri, 19 Jan 2024 15:09:00 +0000 Subject: [PATCH] fix: retry docker volume remove (!399) Closes https://git.coopcloud.tech/coop-cloud/organising/issues/509 Reviewed-on: https://git.coopcloud.tech/coop-cloud/abra/pulls/399 Reviewed-by: decentral1se Co-authored-by: p4u1 Co-committed-by: p4u1 --- cli/app/remove.go | 26 ++++++++++++++++++++++++-- cli/app/remove_test.go | 26 ++++++++++++++++++++++++++ tests/integration/app_remove.bats | 8 +------- 3 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 cli/app/remove_test.go diff --git a/cli/app/remove.go b/cli/app/remove.go index ea4efedf..82499fd1 100644 --- a/cli/app/remove.go +++ b/cli/app/remove.go @@ -3,7 +3,9 @@ package app import ( "context" "fmt" + "log" "os" + "time" "coopcloud.tech/abra/cli/internal" "coopcloud.tech/abra/pkg/autocomplete" @@ -124,9 +126,11 @@ flag. if len(vols) > 0 { for _, vol := range vols { - err := cl.VolumeRemove(context.Background(), vol, internal.Force) // last argument is for force removing + err = retryFunc(5, func() error { + return cl.VolumeRemove(context.Background(), vol, internal.Force) // last argument is for force removing + }) if err != nil { - logrus.Fatal(err) + log.Fatalf("removing volumes failed: %s", err) } logrus.Info(fmt.Sprintf("volume %s removed", vol)) } @@ -143,3 +147,21 @@ flag. return nil }, } + +// retryFunc retries the given function for the given retries. After the nth +// retry it waits (n + 1)^2 seconds before the next retry (starting with n=0). +// It returns an error if the function still failed after the last retry. +func retryFunc(retries int, fn func() error) error { + for i := 0; i < retries; i++ { + err := fn() + if err == nil { + return nil + } + if i+1 < retries { + sleep := time.Duration(i+1) * time.Duration(i+1) + logrus.Infof("%s: waiting %d seconds before next retry", err, sleep) + time.Sleep(sleep * time.Second) + } + } + return fmt.Errorf("%d retries failed", retries) +} diff --git a/cli/app/remove_test.go b/cli/app/remove_test.go new file mode 100644 index 00000000..c3c9f8a0 --- /dev/null +++ b/cli/app/remove_test.go @@ -0,0 +1,26 @@ +package app + +import ( + "fmt" + "testing" +) + +func TestRetryFunc(t *testing.T) { + err := retryFunc(1, func() error { return nil }) + if err != nil { + t.Errorf("should not return an error: %s", err) + } + + i := 0 + fn := func() error { + i++ + return fmt.Errorf("oh no, something went wrong!") + } + err = retryFunc(2, fn) + if err == nil { + t.Error("should return an error") + } + if i != 2 { + t.Errorf("The function should have been called 1 times, got %d", i) + } +} diff --git a/tests/integration/app_remove.bats b/tests/integration/app_remove.bats index 8b53984c..8800e70a 100644 --- a/tests/integration/app_remove.bats +++ b/tests/integration/app_remove.bats @@ -104,10 +104,7 @@ teardown(){ _undeploy_app - # NOTE(d1): to let the stack come down before nuking volumes - sleep 5 - - run $ABRA app volume rm "$TEST_APP_DOMAIN" --force + run $ABRA app volume rm "$TEST_APP_DOMAIN" assert_success run $ABRA app volume ls "$TEST_APP_DOMAIN" @@ -132,9 +129,6 @@ teardown(){ _undeploy_app - # NOTE(d1): to let the stack come down before nuking volumes - sleep 5 - run $ABRA app rm "$TEST_APP_DOMAIN" --no-input assert_success assert_output --partial 'test-volume'