provider/aws: ASGs can wait for ELB InService

This enhances the waiting strategy introduced in #1839 to wait for each ASG instance to become InService in attached ELBs before continuing.
2015-05-14 12:45:21 -05:00 · 2015-05-14 12:45:21 -05:00 · b56a42627f
parent 8a3b75d361
commit b56a42627f
3 changed files with 135 additions and 24 deletions
--- a/builtin/providers/aws/resource_aws_autoscaling_group.go
+++ b/builtin/providers/aws/resource_aws_autoscaling_group.go
@ -11,6 +11,7 @@ import (

 	"github.com/awslabs/aws-sdk-go/aws"
 	"github.com/awslabs/aws-sdk-go/service/autoscaling"
+	"github.com/awslabs/aws-sdk-go/service/elb"
 )

 func resourceAwsAutoscalingGroup() *schema.Resource {
@ -38,6 +39,11 @@ func resourceAwsAutoscalingGroup() *schema.Resource {
 				Computed: true,
 			},

+			"min_elb_capacity": &schema.Schema{
+				Type:     schema.TypeInt,
+				Optional: true,
+			},
+
 			"min_size": &schema.Schema{
 				Type:     schema.TypeInt,
 				Required: true,
@ -386,13 +392,19 @@ var waitForASGCapacityTimeout = 10 * time.Minute
 // Waits for a minimum number of healthy instances to show up as healthy in the
 // ASG before continuing. Waits up to `waitForASGCapacityTimeout` for
 // "desired_capacity", or "min_size" if desired capacity is not specified.
+//
+// If "min_elb_capacity" is specified, will also wait for that number of
+// instances to show up InService in all attached ELBs. See "Waiting for
+// Capacity" in docs for more discussion of the feature.
 func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
-	waitFor := d.Get("min_size").(int)
+	wantASG := d.Get("min_size").(int)
 	if v := d.Get("desired_capacity").(int); v > 0 {
-		waitFor = v
+		wantASG = v
 	}
+	wantELB := d.Get("min_elb_capacity").(int)
+
+	log.Printf("[DEBUG] Wanting for capacity: %d ASG, %d ELB", wantASG, wantELB)

-	log.Printf("[DEBUG] Waiting for group to have %d healthy instances", waitFor)
 	return resource.Retry(waitForASGCapacityTimeout, func() error {
 		g, err := getAwsAutoscalingGroup(d, meta)
 		if err != nil {
@ -401,24 +413,76 @@ func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
 		if g == nil {
 			return nil
 		}
+		lbis, err := getLBInstanceStates(g, meta)
+		if err != nil {
+			return resource.RetryError{Err: err}
+		}
+
+		haveASG := 0
+		haveELB := 0

-		healthy := 0
 		for _, i := range g.Instances {
-			if i.HealthStatus == nil {
+			if i.HealthStatus == nil || i.InstanceID == nil || i.LifecycleState == nil {
 				continue
 			}
-			if strings.EqualFold(*i.HealthStatus, "Healthy") {
-				healthy++
+
+			if !strings.EqualFold(*i.HealthStatus, "Healthy") {
+				continue
+			}
+
+			if !strings.EqualFold(*i.LifecycleState, "InService") {
+				continue
+			}
+
+			haveASG++
+
+			if wantELB > 0 {
+				inAllLbs := true
+				for _, states := range lbis {
+					state, ok := states[*i.InstanceID]
+					if !ok || !strings.EqualFold(state, "InService") {
+						inAllLbs = false
+					}
+				}
+				if inAllLbs {
+					haveELB++
+				}
 			}
 		}

-		log.Printf(
-			"[DEBUG] %q has %d/%d healthy instances", d.Id(), healthy, waitFor)
+		log.Printf("[DEBUG] %q Capacity: %d/%d ASG, %d/%d ELB",
+			d.Id(), haveASG, wantASG, haveELB, wantELB)

-		if healthy >= waitFor {
+		if haveASG >= wantASG && haveELB >= wantELB {
 			return nil
 		}

-		return fmt.Errorf("Waiting for healthy instances: %d/%d", healthy, waitFor)
+		return fmt.Errorf("Still need to wait for more healthy instances.")
 	})
 }
+
+// Returns a mapping of the instance states of all the ELBs attached to the
+// provided ASG.
+//
+// Nested like: lbName -> instanceId -> instanceState
+func getLBInstanceStates(g *autoscaling.AutoScalingGroup, meta interface{}) (map[string]map[string]string, error) {
+	lbInstanceStates := make(map[string]map[string]string)
+	elbconn := meta.(*AWSClient).elbconn
+
+	for _, lbName := range g.LoadBalancerNames {
+		lbInstanceStates[*lbName] = make(map[string]string)
+		opts := &elb.DescribeInstanceHealthInput{LoadBalancerName: lbName}
+		r, err := elbconn.DescribeInstanceHealth(opts)
+		if err != nil {
+			return nil, err
+		}
+		for _, is := range r.InstanceStates {
+			if is.InstanceID == nil || is.State == nil {
+				continue
+			}
+			lbInstanceStates[*lbName][*is.InstanceID] = *is.State
+		}
+	}
+
+	return lbInstanceStates, nil
+}
--- a/builtin/providers/aws/resource_aws_autoscaling_group_test.go
+++ b/builtin/providers/aws/resource_aws_autoscaling_group_test.go
@ -347,26 +347,36 @@ resource "aws_elb" "bar" {
  availability_zones = ["us-west-2a"]

  listener {
-    instance_port = 8000
+    instance_port = 80
    instance_protocol = "http"
    lb_port = 80
    lb_protocol = "http"
  }
+
+  health_check {
+    healthy_threshold = 2
+    unhealthy_threshold = 2
+    target = "HTTP:80/"
+    interval = 5
+    timeout = 2
+  }
 }

 resource "aws_launch_configuration" "foobar" {
-  image_id = "ami-21f78e11"
-  instance_type = "t1.micro"
+  // need an AMI that listens on :80 at boot, this is:
+  // bitnami-nginxstack-1.6.1-0-linux-ubuntu-14.04.1-x86_64-hvm-ebs-ami-99f5b1a9-3
+  image_id = "ami-b5b3fc85"
+  instance_type = "t2.micro"
 }

 resource "aws_autoscaling_group" "bar" {
  availability_zones = ["us-west-2a"]
  name = "foobar3-terraform-test"
-  max_size = 5
+  max_size = 2
  min_size = 2
  health_check_grace_period = 300
  health_check_type = "ELB"
-  desired_capacity = 4
+  min_elb_capacity = 1
  force_delete = true

  launch_configuration = "${aws_launch_configuration.foobar.name}"
--- a/website/source/docs/providers/aws/r/autoscale.html.markdown
+++ b/website/source/docs/providers/aws/r/autoscale.html.markdown
@ -43,19 +43,18 @@ The following arguments are supported:

 * `name` - (Required) The name of the auto scale group.
 * `max_size` - (Required) The maximum size of the auto scale group.
-* `min_size` - (Required) The minimum size of the auto scale group. Terraform
-  waits after ASG creation for this number of healthy instances to show up in
-  the ASG before continuing. Currently, it will wait for a maxiumum of 10m, if
-  ASG creation is taking more than a few minutes, it's worth investigating for
-  scaling actvity errors caused by problems with the selected Launch
-  Configuration.
+* `min_size` - (Required) The minimum size of the auto scale group.
+    (See also [Waiting for Capacity](#waiting-for-capacity) below.)
 * `availability_zones` - (Required) A list of AZs to launch resources in.
 * `launch_configuration` - (Required) The ID of the launch configuration to use.
 * `health_check_grace_period` - (Optional) Time after instance comes into service before checking health.
 * `health_check_type` - (Optional) "EC2" or "ELB". Controls how health checking is done.
 * `desired_capacity` - (Optional) The number of Amazon EC2 instances that
-  should be running in the group. (If this is specified, Terraform will wait for
-  this number of healthy instances after ASG creation instead of `min_size`.)
+    should be running in the group. (See also [Waiting for
+    Capacity](#waiting-for-capacity) below.)
+* `min_elb_capacity` - (Optional) Setting this will cause Terraform to wait
+    for this number of healthy instances all attached load balancers.
+    (See also [Waiting for Capacity](#waiting-for-capacity) below.)
 * `force_delete` - (Optional) Allows deleting the autoscaling group without waiting
   for all instances in the pool to terminate.
 * `load_balancers` (Optional) A list of load balancer names to add to the autoscaling
@ -88,3 +87,41 @@ The following attributes are exported:
 * `vpc_zone_identifier` - The VPC zone identifier
 * `load_balancers` (Optional) The load balancer names associated with the
   autoscaling group.
+
+<a id="waiting-for-capacity"></a>
+## Waiting for Capacity
+
+A newly-created ASG is initially empty and begins to scale to `min_size` (or
+`desired_capacity`, if specified) by launching instances using the provided
+Launch Configuration. These instances take time to launch and boot.
+
+Terraform provides two mechanisms to help consistently manage ASG scale up
+time across dependent resources.
+
+#### Waiting for ASG Capacity
+
+The first is default behavior. Terraform waits after ASG creation for
+`min_size` (or `desired_capacity`, if specified) healthy instances to show up
+in the ASG before continuing.
+
+Terraform considers an instance "healthy" when the ASG reports `HealthStatus:
+"Healthy"` and `LifecycleState: "InService"`. See the [AWS AutoScaling
+Docs](https://docs.aws.amazon.com/AutoScaling/latest/DeveloperGuide/AutoScalingGroupLifecycle.html)
+for more information on an ASG's lifecycle.
+
+Terraform will wait for healthy instances for up to 10 minutes. If ASG creation
+is taking more than a few minutes, it's worth investigating for scaling actvity
+errors, which can be caused by problems with the selected Launch Configuration.
+
+#### Waiting for ELB Capacity
+
+The second mechanism is optional, and affects ASGs with attached Load
+Balancers. If `min_elb_capacity` is set, Terraform will wait for that number of
+Instances to be `"InService"` in all attached `load_balancers`. This can be
+used to ensure that service is being provided before Terraform moves on.
+
+As with ASG Capacity, Terraform will wait for up to 10 minutes for
+`"InService"` instances. If ASG creation takes more than a few minutes, this
+could indicate one of a number of configuration problems. See the [AWS Docs on
+Load Balancer Troubleshooting](https://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-troubleshooting.html)
+for more information.