From db4201534248ecda7b43f9bb763bbbf82048a807 Mon Sep 17 00:00:00 2001 From: James Bardin Date: Wed, 9 Aug 2017 15:05:51 -0400 Subject: [PATCH] backoff retries in remote-exec provisioner Add a simple backoff to the remote-exec retryFunc. Backoff between tries, up to a 10s max. --- .../remote-exec/resource_provisioner.go | 22 +++++++++++-- .../remote-exec/resource_provisioner_test.go | 33 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/builtin/provisioners/remote-exec/resource_provisioner.go b/builtin/provisioners/remote-exec/resource_provisioner.go index 7dd86daf0..ba811dafe 100644 --- a/builtin/provisioners/remote-exec/resource_provisioner.go +++ b/builtin/provisioners/remote-exec/resource_provisioner.go @@ -19,6 +19,10 @@ import ( "github.com/mitchellh/go-linereader" ) +// maxBackoffDealy is the maximum delay between retry attempts +var maxBackoffDelay = 10 * time.Second +var initialBackoffDelay = time.Second + func Provisioner() terraform.ResourceProvisioner { return &schema.Provisioner{ Schema: map[string]*schema.Schema{ @@ -246,7 +250,6 @@ func copyOutput( } // retryFunc is used to retry a function for a given duration -// TODO: this should probably backoff too func retryFunc(ctx context.Context, timeout time.Duration, f func() error) error { // Build a new context with the timeout ctx, done := context.WithTimeout(ctx, timeout) @@ -263,12 +266,13 @@ func retryFunc(ctx context.Context, timeout time.Duration, f func() error) error go func() { defer close(doneCh) + delay := time.Duration(0) for { // If our context ended, we want to exit right away. select { case <-ctx.Done(): return - default: + case <-time.After(delay): } // Try the function call @@ -279,7 +283,19 @@ func retryFunc(ctx context.Context, timeout time.Duration, f func() error) error return } - log.Printf("Retryable error: %v", err) + log.Printf("[WARN] retryable error: %v", err) + + delay *= 2 + + if delay == 0 { + delay = initialBackoffDelay + } + + if delay > maxBackoffDelay { + delay = maxBackoffDelay + } + + log.Printf("[INFO] sleeping for %s", delay) } }() diff --git a/builtin/provisioners/remote-exec/resource_provisioner_test.go b/builtin/provisioners/remote-exec/resource_provisioner_test.go index 67faf1fe4..8c447788d 100644 --- a/builtin/provisioners/remote-exec/resource_provisioner_test.go +++ b/builtin/provisioners/remote-exec/resource_provisioner_test.go @@ -211,6 +211,16 @@ func TestResourceProvider_CollectScripts_scriptsEmpty(t *testing.T) { } func TestRetryFunc(t *testing.T) { + origMax := maxBackoffDelay + maxBackoffDelay = time.Second + origStart := initialBackoffDelay + initialBackoffDelay = 10 * time.Millisecond + + defer func() { + maxBackoffDelay = origMax + initialBackoffDelay = origStart + }() + // succeed on the third try errs := []error{io.EOF, &net.OpError{Err: errors.New("ERROR")}, nil} count := 0 @@ -235,6 +245,29 @@ func TestRetryFunc(t *testing.T) { } } +func TestRetryFuncBackoff(t *testing.T) { + origMax := maxBackoffDelay + maxBackoffDelay = time.Second + origStart := initialBackoffDelay + initialBackoffDelay = 100 * time.Millisecond + + defer func() { + maxBackoffDelay = origMax + initialBackoffDelay = origStart + }() + + count := 0 + + retryFunc(context.Background(), time.Second, func() error { + count++ + return io.EOF + }) + + if count > 4 { + t.Fatalf("retry func failed to backoff. called %d times", count) + } +} + func testConfig(t *testing.T, c map[string]interface{}) *terraform.ResourceConfig { r, err := config.NewRawConfig(c) if err != nil {