From b56a42627ff0a03689ef6c27c504af0f3e2a25b2 Mon Sep 17 00:00:00 2001 From: Paul Hinze Date: Thu, 14 May 2015 12:45:21 -0500 Subject: [PATCH] provider/aws: ASGs can wait for ELB InService This enhances the waiting strategy introduced in #1839 to wait for each ASG instance to become InService in attached ELBs before continuing. --- .../aws/resource_aws_autoscaling_group.go | 86 ++++++++++++++++--- .../resource_aws_autoscaling_group_test.go | 20 +++-- .../providers/aws/r/autoscale.html.markdown | 53 ++++++++++-- 3 files changed, 135 insertions(+), 24 deletions(-) diff --git a/builtin/providers/aws/resource_aws_autoscaling_group.go b/builtin/providers/aws/resource_aws_autoscaling_group.go index bd78ee747..a53a4b636 100644 --- a/builtin/providers/aws/resource_aws_autoscaling_group.go +++ b/builtin/providers/aws/resource_aws_autoscaling_group.go @@ -11,6 +11,7 @@ import ( "github.com/awslabs/aws-sdk-go/aws" "github.com/awslabs/aws-sdk-go/service/autoscaling" + "github.com/awslabs/aws-sdk-go/service/elb" ) func resourceAwsAutoscalingGroup() *schema.Resource { @@ -38,6 +39,11 @@ func resourceAwsAutoscalingGroup() *schema.Resource { Computed: true, }, + "min_elb_capacity": &schema.Schema{ + Type: schema.TypeInt, + Optional: true, + }, + "min_size": &schema.Schema{ Type: schema.TypeInt, Required: true, @@ -386,13 +392,19 @@ var waitForASGCapacityTimeout = 10 * time.Minute // Waits for a minimum number of healthy instances to show up as healthy in the // ASG before continuing. Waits up to `waitForASGCapacityTimeout` for // "desired_capacity", or "min_size" if desired capacity is not specified. +// +// If "min_elb_capacity" is specified, will also wait for that number of +// instances to show up InService in all attached ELBs. See "Waiting for +// Capacity" in docs for more discussion of the feature. func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error { - waitFor := d.Get("min_size").(int) + wantASG := d.Get("min_size").(int) if v := d.Get("desired_capacity").(int); v > 0 { - waitFor = v + wantASG = v } + wantELB := d.Get("min_elb_capacity").(int) + + log.Printf("[DEBUG] Wanting for capacity: %d ASG, %d ELB", wantASG, wantELB) - log.Printf("[DEBUG] Waiting for group to have %d healthy instances", waitFor) return resource.Retry(waitForASGCapacityTimeout, func() error { g, err := getAwsAutoscalingGroup(d, meta) if err != nil { @@ -401,24 +413,76 @@ func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error { if g == nil { return nil } + lbis, err := getLBInstanceStates(g, meta) + if err != nil { + return resource.RetryError{Err: err} + } + + haveASG := 0 + haveELB := 0 - healthy := 0 for _, i := range g.Instances { - if i.HealthStatus == nil { + if i.HealthStatus == nil || i.InstanceID == nil || i.LifecycleState == nil { continue } - if strings.EqualFold(*i.HealthStatus, "Healthy") { - healthy++ + + if !strings.EqualFold(*i.HealthStatus, "Healthy") { + continue + } + + if !strings.EqualFold(*i.LifecycleState, "InService") { + continue + } + + haveASG++ + + if wantELB > 0 { + inAllLbs := true + for _, states := range lbis { + state, ok := states[*i.InstanceID] + if !ok || !strings.EqualFold(state, "InService") { + inAllLbs = false + } + } + if inAllLbs { + haveELB++ + } } } - log.Printf( - "[DEBUG] %q has %d/%d healthy instances", d.Id(), healthy, waitFor) + log.Printf("[DEBUG] %q Capacity: %d/%d ASG, %d/%d ELB", + d.Id(), haveASG, wantASG, haveELB, wantELB) - if healthy >= waitFor { + if haveASG >= wantASG && haveELB >= wantELB { return nil } - return fmt.Errorf("Waiting for healthy instances: %d/%d", healthy, waitFor) + return fmt.Errorf("Still need to wait for more healthy instances.") }) } + +// Returns a mapping of the instance states of all the ELBs attached to the +// provided ASG. +// +// Nested like: lbName -> instanceId -> instanceState +func getLBInstanceStates(g *autoscaling.AutoScalingGroup, meta interface{}) (map[string]map[string]string, error) { + lbInstanceStates := make(map[string]map[string]string) + elbconn := meta.(*AWSClient).elbconn + + for _, lbName := range g.LoadBalancerNames { + lbInstanceStates[*lbName] = make(map[string]string) + opts := &elb.DescribeInstanceHealthInput{LoadBalancerName: lbName} + r, err := elbconn.DescribeInstanceHealth(opts) + if err != nil { + return nil, err + } + for _, is := range r.InstanceStates { + if is.InstanceID == nil || is.State == nil { + continue + } + lbInstanceStates[*lbName][*is.InstanceID] = *is.State + } + } + + return lbInstanceStates, nil +} diff --git a/builtin/providers/aws/resource_aws_autoscaling_group_test.go b/builtin/providers/aws/resource_aws_autoscaling_group_test.go index 9e3600438..5097d5833 100644 --- a/builtin/providers/aws/resource_aws_autoscaling_group_test.go +++ b/builtin/providers/aws/resource_aws_autoscaling_group_test.go @@ -347,26 +347,36 @@ resource "aws_elb" "bar" { availability_zones = ["us-west-2a"] listener { - instance_port = 8000 + instance_port = 80 instance_protocol = "http" lb_port = 80 lb_protocol = "http" } + + health_check { + healthy_threshold = 2 + unhealthy_threshold = 2 + target = "HTTP:80/" + interval = 5 + timeout = 2 + } } resource "aws_launch_configuration" "foobar" { - image_id = "ami-21f78e11" - instance_type = "t1.micro" + // need an AMI that listens on :80 at boot, this is: + // bitnami-nginxstack-1.6.1-0-linux-ubuntu-14.04.1-x86_64-hvm-ebs-ami-99f5b1a9-3 + image_id = "ami-b5b3fc85" + instance_type = "t2.micro" } resource "aws_autoscaling_group" "bar" { availability_zones = ["us-west-2a"] name = "foobar3-terraform-test" - max_size = 5 + max_size = 2 min_size = 2 health_check_grace_period = 300 health_check_type = "ELB" - desired_capacity = 4 + min_elb_capacity = 1 force_delete = true launch_configuration = "${aws_launch_configuration.foobar.name}" diff --git a/website/source/docs/providers/aws/r/autoscale.html.markdown b/website/source/docs/providers/aws/r/autoscale.html.markdown index 7f3a4be9d..2e4021e94 100644 --- a/website/source/docs/providers/aws/r/autoscale.html.markdown +++ b/website/source/docs/providers/aws/r/autoscale.html.markdown @@ -43,19 +43,18 @@ The following arguments are supported: * `name` - (Required) The name of the auto scale group. * `max_size` - (Required) The maximum size of the auto scale group. -* `min_size` - (Required) The minimum size of the auto scale group. Terraform - waits after ASG creation for this number of healthy instances to show up in - the ASG before continuing. Currently, it will wait for a maxiumum of 10m, if - ASG creation is taking more than a few minutes, it's worth investigating for - scaling actvity errors caused by problems with the selected Launch - Configuration. +* `min_size` - (Required) The minimum size of the auto scale group. + (See also [Waiting for Capacity](#waiting-for-capacity) below.) * `availability_zones` - (Required) A list of AZs to launch resources in. * `launch_configuration` - (Required) The ID of the launch configuration to use. * `health_check_grace_period` - (Optional) Time after instance comes into service before checking health. * `health_check_type` - (Optional) "EC2" or "ELB". Controls how health checking is done. * `desired_capacity` - (Optional) The number of Amazon EC2 instances that - should be running in the group. (If this is specified, Terraform will wait for - this number of healthy instances after ASG creation instead of `min_size`.) + should be running in the group. (See also [Waiting for + Capacity](#waiting-for-capacity) below.) +* `min_elb_capacity` - (Optional) Setting this will cause Terraform to wait + for this number of healthy instances all attached load balancers. + (See also [Waiting for Capacity](#waiting-for-capacity) below.) * `force_delete` - (Optional) Allows deleting the autoscaling group without waiting for all instances in the pool to terminate. * `load_balancers` (Optional) A list of load balancer names to add to the autoscaling @@ -88,3 +87,41 @@ The following attributes are exported: * `vpc_zone_identifier` - The VPC zone identifier * `load_balancers` (Optional) The load balancer names associated with the autoscaling group. + + +## Waiting for Capacity + +A newly-created ASG is initially empty and begins to scale to `min_size` (or +`desired_capacity`, if specified) by launching instances using the provided +Launch Configuration. These instances take time to launch and boot. + +Terraform provides two mechanisms to help consistently manage ASG scale up +time across dependent resources. + +#### Waiting for ASG Capacity + +The first is default behavior. Terraform waits after ASG creation for +`min_size` (or `desired_capacity`, if specified) healthy instances to show up +in the ASG before continuing. + +Terraform considers an instance "healthy" when the ASG reports `HealthStatus: +"Healthy"` and `LifecycleState: "InService"`. See the [AWS AutoScaling +Docs](https://docs.aws.amazon.com/AutoScaling/latest/DeveloperGuide/AutoScalingGroupLifecycle.html) +for more information on an ASG's lifecycle. + +Terraform will wait for healthy instances for up to 10 minutes. If ASG creation +is taking more than a few minutes, it's worth investigating for scaling actvity +errors, which can be caused by problems with the selected Launch Configuration. + +#### Waiting for ELB Capacity + +The second mechanism is optional, and affects ASGs with attached Load +Balancers. If `min_elb_capacity` is set, Terraform will wait for that number of +Instances to be `"InService"` in all attached `load_balancers`. This can be +used to ensure that service is being provided before Terraform moves on. + +As with ASG Capacity, Terraform will wait for up to 10 minutes for +`"InService"` instances. If ASG creation takes more than a few minutes, this +could indicate one of a number of configuration problems. See the [AWS Docs on +Load Balancer Troubleshooting](https://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-troubleshooting.html) +for more information.