provider/aws: ASGs can wait for ELB InService

This enhances the waiting strategy introduced in #1839 to wait for each
ASG instance to become InService in attached ELBs before continuing.
This commit is contained in:
Paul Hinze 2015-05-14 12:45:21 -05:00
parent 8a3b75d361
commit b56a42627f
3 changed files with 135 additions and 24 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/awslabs/aws-sdk-go/aws" "github.com/awslabs/aws-sdk-go/aws"
"github.com/awslabs/aws-sdk-go/service/autoscaling" "github.com/awslabs/aws-sdk-go/service/autoscaling"
"github.com/awslabs/aws-sdk-go/service/elb"
) )
func resourceAwsAutoscalingGroup() *schema.Resource { func resourceAwsAutoscalingGroup() *schema.Resource {
@ -38,6 +39,11 @@ func resourceAwsAutoscalingGroup() *schema.Resource {
Computed: true, Computed: true,
}, },
"min_elb_capacity": &schema.Schema{
Type: schema.TypeInt,
Optional: true,
},
"min_size": &schema.Schema{ "min_size": &schema.Schema{
Type: schema.TypeInt, Type: schema.TypeInt,
Required: true, Required: true,
@ -386,13 +392,19 @@ var waitForASGCapacityTimeout = 10 * time.Minute
// Waits for a minimum number of healthy instances to show up as healthy in the // Waits for a minimum number of healthy instances to show up as healthy in the
// ASG before continuing. Waits up to `waitForASGCapacityTimeout` for // ASG before continuing. Waits up to `waitForASGCapacityTimeout` for
// "desired_capacity", or "min_size" if desired capacity is not specified. // "desired_capacity", or "min_size" if desired capacity is not specified.
//
// If "min_elb_capacity" is specified, will also wait for that number of
// instances to show up InService in all attached ELBs. See "Waiting for
// Capacity" in docs for more discussion of the feature.
func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error { func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
waitFor := d.Get("min_size").(int) wantASG := d.Get("min_size").(int)
if v := d.Get("desired_capacity").(int); v > 0 { if v := d.Get("desired_capacity").(int); v > 0 {
waitFor = v wantASG = v
} }
wantELB := d.Get("min_elb_capacity").(int)
log.Printf("[DEBUG] Wanting for capacity: %d ASG, %d ELB", wantASG, wantELB)
log.Printf("[DEBUG] Waiting for group to have %d healthy instances", waitFor)
return resource.Retry(waitForASGCapacityTimeout, func() error { return resource.Retry(waitForASGCapacityTimeout, func() error {
g, err := getAwsAutoscalingGroup(d, meta) g, err := getAwsAutoscalingGroup(d, meta)
if err != nil { if err != nil {
@ -401,24 +413,76 @@ func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
if g == nil { if g == nil {
return nil return nil
} }
lbis, err := getLBInstanceStates(g, meta)
if err != nil {
return resource.RetryError{Err: err}
}
haveASG := 0
haveELB := 0
healthy := 0
for _, i := range g.Instances { for _, i := range g.Instances {
if i.HealthStatus == nil { if i.HealthStatus == nil || i.InstanceID == nil || i.LifecycleState == nil {
continue continue
} }
if strings.EqualFold(*i.HealthStatus, "Healthy") {
healthy++ if !strings.EqualFold(*i.HealthStatus, "Healthy") {
continue
}
if !strings.EqualFold(*i.LifecycleState, "InService") {
continue
}
haveASG++
if wantELB > 0 {
inAllLbs := true
for _, states := range lbis {
state, ok := states[*i.InstanceID]
if !ok || !strings.EqualFold(state, "InService") {
inAllLbs = false
}
}
if inAllLbs {
haveELB++
}
} }
} }
log.Printf( log.Printf("[DEBUG] %q Capacity: %d/%d ASG, %d/%d ELB",
"[DEBUG] %q has %d/%d healthy instances", d.Id(), healthy, waitFor) d.Id(), haveASG, wantASG, haveELB, wantELB)
if healthy >= waitFor { if haveASG >= wantASG && haveELB >= wantELB {
return nil return nil
} }
return fmt.Errorf("Waiting for healthy instances: %d/%d", healthy, waitFor) return fmt.Errorf("Still need to wait for more healthy instances.")
}) })
} }
// Returns a mapping of the instance states of all the ELBs attached to the
// provided ASG.
//
// Nested like: lbName -> instanceId -> instanceState
func getLBInstanceStates(g *autoscaling.AutoScalingGroup, meta interface{}) (map[string]map[string]string, error) {
lbInstanceStates := make(map[string]map[string]string)
elbconn := meta.(*AWSClient).elbconn
for _, lbName := range g.LoadBalancerNames {
lbInstanceStates[*lbName] = make(map[string]string)
opts := &elb.DescribeInstanceHealthInput{LoadBalancerName: lbName}
r, err := elbconn.DescribeInstanceHealth(opts)
if err != nil {
return nil, err
}
for _, is := range r.InstanceStates {
if is.InstanceID == nil || is.State == nil {
continue
}
lbInstanceStates[*lbName][*is.InstanceID] = *is.State
}
}
return lbInstanceStates, nil
}

View File

@ -347,26 +347,36 @@ resource "aws_elb" "bar" {
availability_zones = ["us-west-2a"] availability_zones = ["us-west-2a"]
listener { listener {
instance_port = 8000 instance_port = 80
instance_protocol = "http" instance_protocol = "http"
lb_port = 80 lb_port = 80
lb_protocol = "http" lb_protocol = "http"
} }
health_check {
healthy_threshold = 2
unhealthy_threshold = 2
target = "HTTP:80/"
interval = 5
timeout = 2
}
} }
resource "aws_launch_configuration" "foobar" { resource "aws_launch_configuration" "foobar" {
image_id = "ami-21f78e11" // need an AMI that listens on :80 at boot, this is:
instance_type = "t1.micro" // bitnami-nginxstack-1.6.1-0-linux-ubuntu-14.04.1-x86_64-hvm-ebs-ami-99f5b1a9-3
image_id = "ami-b5b3fc85"
instance_type = "t2.micro"
} }
resource "aws_autoscaling_group" "bar" { resource "aws_autoscaling_group" "bar" {
availability_zones = ["us-west-2a"] availability_zones = ["us-west-2a"]
name = "foobar3-terraform-test" name = "foobar3-terraform-test"
max_size = 5 max_size = 2
min_size = 2 min_size = 2
health_check_grace_period = 300 health_check_grace_period = 300
health_check_type = "ELB" health_check_type = "ELB"
desired_capacity = 4 min_elb_capacity = 1
force_delete = true force_delete = true
launch_configuration = "${aws_launch_configuration.foobar.name}" launch_configuration = "${aws_launch_configuration.foobar.name}"

View File

@ -43,19 +43,18 @@ The following arguments are supported:
* `name` - (Required) The name of the auto scale group. * `name` - (Required) The name of the auto scale group.
* `max_size` - (Required) The maximum size of the auto scale group. * `max_size` - (Required) The maximum size of the auto scale group.
* `min_size` - (Required) The minimum size of the auto scale group. Terraform * `min_size` - (Required) The minimum size of the auto scale group.
waits after ASG creation for this number of healthy instances to show up in (See also [Waiting for Capacity](#waiting-for-capacity) below.)
the ASG before continuing. Currently, it will wait for a maxiumum of 10m, if
ASG creation is taking more than a few minutes, it's worth investigating for
scaling actvity errors caused by problems with the selected Launch
Configuration.
* `availability_zones` - (Required) A list of AZs to launch resources in. * `availability_zones` - (Required) A list of AZs to launch resources in.
* `launch_configuration` - (Required) The ID of the launch configuration to use. * `launch_configuration` - (Required) The ID of the launch configuration to use.
* `health_check_grace_period` - (Optional) Time after instance comes into service before checking health. * `health_check_grace_period` - (Optional) Time after instance comes into service before checking health.
* `health_check_type` - (Optional) "EC2" or "ELB". Controls how health checking is done. * `health_check_type` - (Optional) "EC2" or "ELB". Controls how health checking is done.
* `desired_capacity` - (Optional) The number of Amazon EC2 instances that * `desired_capacity` - (Optional) The number of Amazon EC2 instances that
should be running in the group. (If this is specified, Terraform will wait for should be running in the group. (See also [Waiting for
this number of healthy instances after ASG creation instead of `min_size`.) Capacity](#waiting-for-capacity) below.)
* `min_elb_capacity` - (Optional) Setting this will cause Terraform to wait
for this number of healthy instances all attached load balancers.
(See also [Waiting for Capacity](#waiting-for-capacity) below.)
* `force_delete` - (Optional) Allows deleting the autoscaling group without waiting * `force_delete` - (Optional) Allows deleting the autoscaling group without waiting
for all instances in the pool to terminate. for all instances in the pool to terminate.
* `load_balancers` (Optional) A list of load balancer names to add to the autoscaling * `load_balancers` (Optional) A list of load balancer names to add to the autoscaling
@ -88,3 +87,41 @@ The following attributes are exported:
* `vpc_zone_identifier` - The VPC zone identifier * `vpc_zone_identifier` - The VPC zone identifier
* `load_balancers` (Optional) The load balancer names associated with the * `load_balancers` (Optional) The load balancer names associated with the
autoscaling group. autoscaling group.
<a id="waiting-for-capacity"></a>
## Waiting for Capacity
A newly-created ASG is initially empty and begins to scale to `min_size` (or
`desired_capacity`, if specified) by launching instances using the provided
Launch Configuration. These instances take time to launch and boot.
Terraform provides two mechanisms to help consistently manage ASG scale up
time across dependent resources.
#### Waiting for ASG Capacity
The first is default behavior. Terraform waits after ASG creation for
`min_size` (or `desired_capacity`, if specified) healthy instances to show up
in the ASG before continuing.
Terraform considers an instance "healthy" when the ASG reports `HealthStatus:
"Healthy"` and `LifecycleState: "InService"`. See the [AWS AutoScaling
Docs](https://docs.aws.amazon.com/AutoScaling/latest/DeveloperGuide/AutoScalingGroupLifecycle.html)
for more information on an ASG's lifecycle.
Terraform will wait for healthy instances for up to 10 minutes. If ASG creation
is taking more than a few minutes, it's worth investigating for scaling actvity
errors, which can be caused by problems with the selected Launch Configuration.
#### Waiting for ELB Capacity
The second mechanism is optional, and affects ASGs with attached Load
Balancers. If `min_elb_capacity` is set, Terraform will wait for that number of
Instances to be `"InService"` in all attached `load_balancers`. This can be
used to ensure that service is being provided before Terraform moves on.
As with ASG Capacity, Terraform will wait for up to 10 minutes for
`"InService"` instances. If ASG creation takes more than a few minutes, this
could indicate one of a number of configuration problems. See the [AWS Docs on
Load Balancer Troubleshooting](https://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-troubleshooting.html)
for more information.