diff --git a/builtin/providers/aws/resource_aws_spot_fleet_request.go b/builtin/providers/aws/resource_aws_spot_fleet_request.go index dfb0056ef..f1f157ae1 100644 --- a/builtin/providers/aws/resource_aws_spot_fleet_request.go +++ b/builtin/providers/aws/resource_aws_spot_fleet_request.go @@ -936,7 +936,7 @@ func resourceAwsSpotFleetRequestDelete(d *schema.ResourceData, meta interface{}) conn := meta.(*AWSClient).ec2conn log.Printf("[INFO] Cancelling spot fleet request: %s", d.Id()) - _, err := conn.CancelSpotFleetRequests(&ec2.CancelSpotFleetRequestsInput{ + resp, err := conn.CancelSpotFleetRequests(&ec2.CancelSpotFleetRequestsInput{ SpotFleetRequestIds: []*string{aws.String(d.Id())}, TerminateInstances: aws.Bool(d.Get("terminate_instances_with_expiration").(bool)), }) @@ -945,7 +945,36 @@ func resourceAwsSpotFleetRequestDelete(d *schema.ResourceData, meta interface{}) return fmt.Errorf("Error cancelling spot request (%s): %s", d.Id(), err) } - return nil + // check response successfulFleetRequestSet to make sure our request was canceled + var found bool + for _, s := range resp.SuccessfulFleetRequests { + if *s.SpotFleetRequestId == d.Id() { + found = true + } + } + + if !found { + return fmt.Errorf("[ERR] Spot Fleet request (%s) was not found to be successfully canceled, dangling resources may exit", d.Id()) + } + + return resource.Retry(5*time.Minute, func() *resource.RetryError { + resp, err := conn.DescribeSpotFleetInstances(&ec2.DescribeSpotFleetInstancesInput{ + SpotFleetRequestId: aws.String(d.Id()), + }) + if err != nil { + return resource.NonRetryableError(err) + } + + if len(resp.ActiveInstances) == 0 { + log.Printf("[DEBUG] Active instance count is 0 for Spot Fleet Request (%s), removing", d.Id()) + return nil + } + + log.Printf("[DEBUG] Active instance count in Spot Fleet Request (%s): %d", d.Id(), len(resp.ActiveInstances)) + + return resource.RetryableError( + fmt.Errorf("fleet still has (%d) running instances", len(resp.ActiveInstances))) + }) } func hashEphemeralBlockDevice(v interface{}) int { diff --git a/builtin/providers/aws/resource_aws_spot_fleet_request_test.go b/builtin/providers/aws/resource_aws_spot_fleet_request_test.go index 0dbae30ac..1bddbf0e2 100644 --- a/builtin/providers/aws/resource_aws_spot_fleet_request_test.go +++ b/builtin/providers/aws/resource_aws_spot_fleet_request_test.go @@ -3,7 +3,9 @@ package aws import ( "encoding/base64" "fmt" + "log" "testing" + "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" @@ -248,6 +250,20 @@ func TestAccAWSSpotFleetRequest_withWeightedCapacity(t *testing.T) { var sfr ec2.SpotFleetRequestConfig rName := acctest.RandString(10) + fulfillSleep := func() resource.TestCheckFunc { + // sleep so that EC2 can fuflill the request. We do this to guard against a + // regression and possible leak where we'll destroy the request and the + // associated IAM role before anything is actually provisioned and running, + // thus leaking when those newly started instances are attempted to be + // destroyed + // See https://github.com/hashicorp/terraform/pull/8938 + return func(s *terraform.State) error { + log.Printf("[DEBUG] Test: Sleep to allow EC2 to actually begin fulfilling TestAccAWSSpotFleetRequest_withWeightedCapacity request") + time.Sleep(1 * time.Minute) + return nil + } + } + resource.Test(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, Providers: testAccProviders, @@ -256,6 +272,7 @@ func TestAccAWSSpotFleetRequest_withWeightedCapacity(t *testing.T) { resource.TestStep{ Config: testAccAWSSpotFleetRequestConfigWithWeightedCapacity(rName), Check: resource.ComposeAggregateTestCheckFunc( + fulfillSleep(), testAccCheckAWSSpotFleetRequestExists( "aws_spot_fleet_request.foo", &sfr), resource.TestCheckResourceAttr(