From fba02f0bea55a7b846a98a73bdae3177ff109dee Mon Sep 17 00:00:00 2001 From: bclodius Date: Sun, 1 Oct 2017 08:22:04 -0400 Subject: [PATCH] retry on s3 backend internal errors Internal errors from S3 are usually transient, and can be immediately retried. Make 2 attempts at retreiving the state object before returning an error. --- backend/remote-state/s3/client.go | 39 +++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/backend/remote-state/s3/client.go b/backend/remote-state/s3/client.go index 87ddf0ff5..678519c8b 100644 --- a/backend/remote-state/s3/client.go +++ b/backend/remote-state/s3/client.go @@ -22,7 +22,10 @@ import ( ) // Store the last saved serial in dynamo with this suffix for consistency checks. -const stateIDSuffix = "-md5" +const ( + stateIDSuffix = "-md5" + s3ErrCodeInternalError = "InternalError" +) type RemoteClient struct { s3Client *s3.S3 @@ -92,21 +95,33 @@ func (c *RemoteClient) Get() (payload *remote.Payload, err error) { } func (c *RemoteClient) get() (*remote.Payload, error) { - output, err := c.s3Client.GetObject(&s3.GetObjectInput{ - Bucket: &c.bucketName, - Key: &c.path, - }) + var output *s3.GetObjectOutput + var err error - if err != nil { - if awserr := err.(awserr.Error); awserr != nil { - if awserr.Code() == "NoSuchKey" { - return nil, nil - } else { - return nil, err + // we immediately retry on an internal error, as those are usually transient + maxRetries := 2 + for retryCount := 0; ; retryCount++ { + output, err = c.s3Client.GetObject(&s3.GetObjectInput{ + Bucket: &c.bucketName, + Key: &c.path, + }) + + if err != nil { + if awserr, ok := err.(awserr.Error); ok { + switch awserr.Code() { + case s3.ErrCodeNoSuchKey: + return nil, nil + case s3ErrCodeInternalError: + if retryCount > maxRetries { + return nil, err + } + log.Println("[WARN] s3 internal error, retrying...") + continue + } } - } else { return nil, err } + break } defer output.Body.Close()