provider/aws: ASGs can wait for ELB InService

This enhances the waiting strategy introduced in #1839 to wait for each
ASG instance to become InService in attached ELBs before continuing.
This commit is contained in:
Paul Hinze 2015-05-14 12:45:21 -05:00
parent 8a3b75d361
commit b56a42627f
3 changed files with 135 additions and 24 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/awslabs/aws-sdk-go/aws"
"github.com/awslabs/aws-sdk-go/service/autoscaling"
"github.com/awslabs/aws-sdk-go/service/elb"
)
func resourceAwsAutoscalingGroup() *schema.Resource {
@ -38,6 +39,11 @@ func resourceAwsAutoscalingGroup() *schema.Resource {
Computed: true,
},
"min_elb_capacity": &schema.Schema{
Type: schema.TypeInt,
Optional: true,
},
"min_size": &schema.Schema{
Type: schema.TypeInt,
Required: true,
@ -386,13 +392,19 @@ var waitForASGCapacityTimeout = 10 * time.Minute
// Waits for a minimum number of healthy instances to show up as healthy in the
// ASG before continuing. Waits up to `waitForASGCapacityTimeout` for
// "desired_capacity", or "min_size" if desired capacity is not specified.
//
// If "min_elb_capacity" is specified, will also wait for that number of
// instances to show up InService in all attached ELBs. See "Waiting for
// Capacity" in docs for more discussion of the feature.
func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
waitFor := d.Get("min_size").(int)
wantASG := d.Get("min_size").(int)
if v := d.Get("desired_capacity").(int); v > 0 {
waitFor = v
wantASG = v
}
wantELB := d.Get("min_elb_capacity").(int)
log.Printf("[DEBUG] Wanting for capacity: %d ASG, %d ELB", wantASG, wantELB)
log.Printf("[DEBUG] Waiting for group to have %d healthy instances", waitFor)
return resource.Retry(waitForASGCapacityTimeout, func() error {
g, err := getAwsAutoscalingGroup(d, meta)
if err != nil {
@ -401,24 +413,76 @@ func waitForASGCapacity(d *schema.ResourceData, meta interface{}) error {
if g == nil {
return nil
}
lbis, err := getLBInstanceStates(g, meta)
if err != nil {
return resource.RetryError{Err: err}
}
haveASG := 0
haveELB := 0
healthy := 0
for _, i := range g.Instances {
if i.HealthStatus == nil {
if i.HealthStatus == nil || i.InstanceID == nil || i.LifecycleState == nil {
continue
}
if strings.EqualFold(*i.HealthStatus, "Healthy") {
healthy++
if !strings.EqualFold(*i.HealthStatus, "Healthy") {
continue
}
if !strings.EqualFold(*i.LifecycleState, "InService") {
continue
}
haveASG++
if wantELB > 0 {
inAllLbs := true
for _, states := range lbis {
state, ok := states[*i.InstanceID]
if !ok || !strings.EqualFold(state, "InService") {
inAllLbs = false
}
}
if inAllLbs {
haveELB++
}
}
}
log.Printf(
"[DEBUG] %q has %d/%d healthy instances", d.Id(), healthy, waitFor)
log.Printf("[DEBUG] %q Capacity: %d/%d ASG, %d/%d ELB",
d.Id(), haveASG, wantASG, haveELB, wantELB)
if healthy >= waitFor {
if haveASG >= wantASG && haveELB >= wantELB {
return nil
}
return fmt.Errorf("Waiting for healthy instances: %d/%d", healthy, waitFor)
return fmt.Errorf("Still need to wait for more healthy instances.")
})
}
// Returns a mapping of the instance states of all the ELBs attached to the
// provided ASG.
//
// Nested like: lbName -> instanceId -> instanceState
func getLBInstanceStates(g *autoscaling.AutoScalingGroup, meta interface{}) (map[string]map[string]string, error) {
lbInstanceStates := make(map[string]map[string]string)
elbconn := meta.(*AWSClient).elbconn
for _, lbName := range g.LoadBalancerNames {
lbInstanceStates[*lbName] = make(map[string]string)
opts := &elb.DescribeInstanceHealthInput{LoadBalancerName: lbName}
r, err := elbconn.DescribeInstanceHealth(opts)
if err != nil {
return nil, err
}
for _, is := range r.InstanceStates {
if is.InstanceID == nil || is.State == nil {
continue
}
lbInstanceStates[*lbName][*is.InstanceID] = *is.State
}
}
return lbInstanceStates, nil
}

View File

@ -347,26 +347,36 @@ resource "aws_elb" "bar" {
availability_zones = ["us-west-2a"]
listener {
instance_port = 8000
instance_port = 80
instance_protocol = "http"
lb_port = 80
lb_protocol = "http"
}
health_check {
healthy_threshold = 2
unhealthy_threshold = 2
target = "HTTP:80/"
interval = 5
timeout = 2
}
}
resource "aws_launch_configuration" "foobar" {
image_id = "ami-21f78e11"
instance_type = "t1.micro"
// need an AMI that listens on :80 at boot, this is:
// bitnami-nginxstack-1.6.1-0-linux-ubuntu-14.04.1-x86_64-hvm-ebs-ami-99f5b1a9-3
image_id = "ami-b5b3fc85"
instance_type = "t2.micro"
}
resource "aws_autoscaling_group" "bar" {
availability_zones = ["us-west-2a"]
name = "foobar3-terraform-test"
max_size = 5
max_size = 2
min_size = 2
health_check_grace_period = 300
health_check_type = "ELB"
desired_capacity = 4
min_elb_capacity = 1
force_delete = true
launch_configuration = "${aws_launch_configuration.foobar.name}"

View File

@ -43,19 +43,18 @@ The following arguments are supported:
* `name` - (Required) The name of the auto scale group.
* `max_size` - (Required) The maximum size of the auto scale group.
* `min_size` - (Required) The minimum size of the auto scale group. Terraform
waits after ASG creation for this number of healthy instances to show up in
the ASG before continuing. Currently, it will wait for a maxiumum of 10m, if
ASG creation is taking more than a few minutes, it's worth investigating for
scaling actvity errors caused by problems with the selected Launch
Configuration.
* `min_size` - (Required) The minimum size of the auto scale group.
(See also [Waiting for Capacity](#waiting-for-capacity) below.)
* `availability_zones` - (Required) A list of AZs to launch resources in.
* `launch_configuration` - (Required) The ID of the launch configuration to use.
* `health_check_grace_period` - (Optional) Time after instance comes into service before checking health.
* `health_check_type` - (Optional) "EC2" or "ELB". Controls how health checking is done.
* `desired_capacity` - (Optional) The number of Amazon EC2 instances that
should be running in the group. (If this is specified, Terraform will wait for
this number of healthy instances after ASG creation instead of `min_size`.)
should be running in the group. (See also [Waiting for
Capacity](#waiting-for-capacity) below.)
* `min_elb_capacity` - (Optional) Setting this will cause Terraform to wait
for this number of healthy instances all attached load balancers.
(See also [Waiting for Capacity](#waiting-for-capacity) below.)
* `force_delete` - (Optional) Allows deleting the autoscaling group without waiting
for all instances in the pool to terminate.
* `load_balancers` (Optional) A list of load balancer names to add to the autoscaling
@ -88,3 +87,41 @@ The following attributes are exported:
* `vpc_zone_identifier` - The VPC zone identifier
* `load_balancers` (Optional) The load balancer names associated with the
autoscaling group.
<a id="waiting-for-capacity"></a>
## Waiting for Capacity
A newly-created ASG is initially empty and begins to scale to `min_size` (or
`desired_capacity`, if specified) by launching instances using the provided
Launch Configuration. These instances take time to launch and boot.
Terraform provides two mechanisms to help consistently manage ASG scale up
time across dependent resources.
#### Waiting for ASG Capacity
The first is default behavior. Terraform waits after ASG creation for
`min_size` (or `desired_capacity`, if specified) healthy instances to show up
in the ASG before continuing.
Terraform considers an instance "healthy" when the ASG reports `HealthStatus:
"Healthy"` and `LifecycleState: "InService"`. See the [AWS AutoScaling
Docs](https://docs.aws.amazon.com/AutoScaling/latest/DeveloperGuide/AutoScalingGroupLifecycle.html)
for more information on an ASG's lifecycle.
Terraform will wait for healthy instances for up to 10 minutes. If ASG creation
is taking more than a few minutes, it's worth investigating for scaling actvity
errors, which can be caused by problems with the selected Launch Configuration.
#### Waiting for ELB Capacity
The second mechanism is optional, and affects ASGs with attached Load
Balancers. If `min_elb_capacity` is set, Terraform will wait for that number of
Instances to be `"InService"` in all attached `load_balancers`. This can be
used to ensure that service is being provided before Terraform moves on.
As with ASG Capacity, Terraform will wait for up to 10 minutes for
`"InService"` instances. If ASG creation takes more than a few minutes, this
could indicate one of a number of configuration problems. See the [AWS Docs on
Load Balancer Troubleshooting](https://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-troubleshooting.html)
for more information.