Merge pull request #19155 from bdwyertech/chef-exit-codes
Chef: Gracefully Handle RFC062 Exit Codes
This commit is contained in:
commit
e912dc8551
|
@ -15,6 +15,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"text/template"
|
"text/template"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/terraform/communicator"
|
"github.com/hashicorp/terraform/communicator"
|
||||||
"github.com/hashicorp/terraform/communicator/remote"
|
"github.com/hashicorp/terraform/communicator/remote"
|
||||||
|
@ -97,6 +98,7 @@ type provisioner struct {
|
||||||
PolicyName string
|
PolicyName string
|
||||||
HTTPProxy string
|
HTTPProxy string
|
||||||
HTTPSProxy string
|
HTTPSProxy string
|
||||||
|
MaxRetries int
|
||||||
NamedRunList string
|
NamedRunList string
|
||||||
NOProxy []string
|
NOProxy []string
|
||||||
NodeName string
|
NodeName string
|
||||||
|
@ -104,6 +106,7 @@ type provisioner struct {
|
||||||
OSType string
|
OSType string
|
||||||
RecreateClient bool
|
RecreateClient bool
|
||||||
PreventSudo bool
|
PreventSudo bool
|
||||||
|
RetryOnExitCode map[int]bool
|
||||||
RunList []string
|
RunList []string
|
||||||
SecretKey string
|
SecretKey string
|
||||||
ServerURL string
|
ServerURL string
|
||||||
|
@ -114,6 +117,7 @@ type provisioner struct {
|
||||||
UserKey string
|
UserKey string
|
||||||
Vaults map[string][]string
|
Vaults map[string][]string
|
||||||
Version string
|
Version string
|
||||||
|
WaitForRetry time.Duration
|
||||||
|
|
||||||
cleanupUserKeyCmd string
|
cleanupUserKeyCmd string
|
||||||
createConfigFiles provisionFn
|
createConfigFiles provisionFn
|
||||||
|
@ -197,6 +201,11 @@ func Provisioner() terraform.ResourceProvisioner {
|
||||||
Type: schema.TypeString,
|
Type: schema.TypeString,
|
||||||
Optional: true,
|
Optional: true,
|
||||||
},
|
},
|
||||||
|
"max_retries": &schema.Schema{
|
||||||
|
Type: schema.TypeInt,
|
||||||
|
Optional: true,
|
||||||
|
Default: 0,
|
||||||
|
},
|
||||||
"no_proxy": &schema.Schema{
|
"no_proxy": &schema.Schema{
|
||||||
Type: schema.TypeList,
|
Type: schema.TypeList,
|
||||||
Elem: &schema.Schema{Type: schema.TypeString},
|
Elem: &schema.Schema{Type: schema.TypeString},
|
||||||
|
@ -215,12 +224,17 @@ func Provisioner() terraform.ResourceProvisioner {
|
||||||
Type: schema.TypeString,
|
Type: schema.TypeString,
|
||||||
Optional: true,
|
Optional: true,
|
||||||
},
|
},
|
||||||
|
"prevent_sudo": &schema.Schema{
|
||||||
|
Type: schema.TypeBool,
|
||||||
|
Optional: true,
|
||||||
|
},
|
||||||
"recreate_client": &schema.Schema{
|
"recreate_client": &schema.Schema{
|
||||||
Type: schema.TypeBool,
|
Type: schema.TypeBool,
|
||||||
Optional: true,
|
Optional: true,
|
||||||
},
|
},
|
||||||
"prevent_sudo": &schema.Schema{
|
"retry_on_exit_code": &schema.Schema{
|
||||||
Type: schema.TypeBool,
|
Type: schema.TypeList,
|
||||||
|
Elem: &schema.Schema{Type: schema.TypeInt},
|
||||||
Optional: true,
|
Optional: true,
|
||||||
},
|
},
|
||||||
"run_list": &schema.Schema{
|
"run_list": &schema.Schema{
|
||||||
|
@ -252,6 +266,11 @@ func Provisioner() terraform.ResourceProvisioner {
|
||||||
Type: schema.TypeString,
|
Type: schema.TypeString,
|
||||||
Optional: true,
|
Optional: true,
|
||||||
},
|
},
|
||||||
|
"wait_for_retry": &schema.Schema{
|
||||||
|
Type: schema.TypeInt,
|
||||||
|
Optional: true,
|
||||||
|
Default: 30,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
ApplyFunc: applyFn,
|
ApplyFunc: applyFn,
|
||||||
|
@ -371,11 +390,55 @@ func applyFn(ctx context.Context) error {
|
||||||
once.Do(cleanupUserKey)
|
once.Do(cleanupUserKey)
|
||||||
|
|
||||||
o.Output("Starting initial Chef-Client run...")
|
o.Output("Starting initial Chef-Client run...")
|
||||||
if err := p.runChefClient(o, comm); err != nil {
|
|
||||||
return err
|
for attempt := 0; attempt <= p.MaxRetries; attempt++ {
|
||||||
|
// We need a new retry context for each attempt, to make sure
|
||||||
|
// they all get the correct timeout.
|
||||||
|
retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Make sure to (re)connect before trying to run Chef-Client.
|
||||||
|
if err := communicator.Retry(retryCtx, func() error {
|
||||||
|
return comm.Connect(o)
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = p.runChefClient(o, comm)
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow RFC062 Exit Codes:
|
||||||
|
// https://github.com/chef/chef-rfc/blob/master/rfc062-exit-status.md
|
||||||
|
exitError, ok := err.(*remote.ExitError)
|
||||||
|
if !ok {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch exitError.ExitStatus {
|
||||||
|
case 35:
|
||||||
|
o.Output("Reboot has been scheduled in the run state")
|
||||||
|
err = nil
|
||||||
|
case 37:
|
||||||
|
o.Output("Reboot needs to be completed")
|
||||||
|
err = nil
|
||||||
|
case 213:
|
||||||
|
o.Output("Chef has exited during a client upgrade")
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !p.RetryOnExitCode[exitError.ExitStatus] {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if attempt < p.MaxRetries {
|
||||||
|
o.Output(fmt.Sprintf("Waiting %s before retrying Chef-Client run...", p.WaitForRetry))
|
||||||
|
time.Sleep(p.WaitForRetry)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func validateFn(c *terraform.ResourceConfig) (ws []string, es []error) {
|
func validateFn(c *terraform.ResourceConfig) (ws []string, es []error) {
|
||||||
|
@ -730,12 +793,14 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
|
||||||
HTTPProxy: d.Get("http_proxy").(string),
|
HTTPProxy: d.Get("http_proxy").(string),
|
||||||
HTTPSProxy: d.Get("https_proxy").(string),
|
HTTPSProxy: d.Get("https_proxy").(string),
|
||||||
NOProxy: getStringList(d.Get("no_proxy")),
|
NOProxy: getStringList(d.Get("no_proxy")),
|
||||||
|
MaxRetries: d.Get("max_retries").(int),
|
||||||
NamedRunList: d.Get("named_run_list").(string),
|
NamedRunList: d.Get("named_run_list").(string),
|
||||||
NodeName: d.Get("node_name").(string),
|
NodeName: d.Get("node_name").(string),
|
||||||
OhaiHints: getStringList(d.Get("ohai_hints")),
|
OhaiHints: getStringList(d.Get("ohai_hints")),
|
||||||
OSType: d.Get("os_type").(string),
|
OSType: d.Get("os_type").(string),
|
||||||
RecreateClient: d.Get("recreate_client").(bool),
|
RecreateClient: d.Get("recreate_client").(bool),
|
||||||
PreventSudo: d.Get("prevent_sudo").(bool),
|
PreventSudo: d.Get("prevent_sudo").(bool),
|
||||||
|
RetryOnExitCode: getRetryOnExitCodes(d),
|
||||||
RunList: getStringList(d.Get("run_list")),
|
RunList: getStringList(d.Get("run_list")),
|
||||||
SecretKey: d.Get("secret_key").(string),
|
SecretKey: d.Get("secret_key").(string),
|
||||||
ServerURL: d.Get("server_url").(string),
|
ServerURL: d.Get("server_url").(string),
|
||||||
|
@ -745,6 +810,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
|
||||||
UserName: d.Get("user_name").(string),
|
UserName: d.Get("user_name").(string),
|
||||||
UserKey: d.Get("user_key").(string),
|
UserKey: d.Get("user_key").(string),
|
||||||
Version: d.Get("version").(string),
|
Version: d.Get("version").(string),
|
||||||
|
WaitForRetry: time.Duration(d.Get("wait_for_retry").(int)) * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure the supplied URL has a trailing slash
|
// Make sure the supplied URL has a trailing slash
|
||||||
|
@ -794,6 +860,31 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
|
||||||
return p, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getRetryOnExitCodes(d *schema.ResourceData) map[int]bool {
|
||||||
|
result := make(map[int]bool)
|
||||||
|
|
||||||
|
v, ok := d.GetOk("retry_on_exit_code")
|
||||||
|
if !ok || v == nil {
|
||||||
|
// Use default exit codes
|
||||||
|
result[35] = true
|
||||||
|
result[37] = true
|
||||||
|
result[213] = true
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
switch v := v.(type) {
|
||||||
|
case []interface{}:
|
||||||
|
for _, vv := range v {
|
||||||
|
if vv, ok := vv.(int); ok {
|
||||||
|
result[vv] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("Unsupported type: %T", v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func getStringList(v interface{}) []string {
|
func getStringList(v interface{}) []string {
|
||||||
var result []string
|
var result []string
|
||||||
|
|
||||||
|
|
|
@ -52,13 +52,12 @@ func New(s *terraform.InstanceState) (*Communicator, error) {
|
||||||
|
|
||||||
// Connect implementation of communicator.Communicator interface
|
// Connect implementation of communicator.Communicator interface
|
||||||
func (c *Communicator) Connect(o terraform.UIOutput) error {
|
func (c *Communicator) Connect(o terraform.UIOutput) error {
|
||||||
if c.client != nil {
|
// Set the client to nil since we'll (re)create it
|
||||||
return nil
|
c.client = nil
|
||||||
}
|
|
||||||
|
|
||||||
params := winrm.DefaultParameters
|
params := winrm.DefaultParameters
|
||||||
params.Timeout = formatDuration(c.Timeout())
|
params.Timeout = formatDuration(c.Timeout())
|
||||||
if c.connInfo.NTLM == true {
|
if c.connInfo.NTLM {
|
||||||
params.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
|
params.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,7 +188,7 @@ func (c *Communicator) newCopyClient() (*winrmcp.Winrmcp, error) {
|
||||||
MaxOperationsPerShell: 15, // lowest common denominator
|
MaxOperationsPerShell: 15, // lowest common denominator
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.connInfo.NTLM == true {
|
if c.connInfo.NTLM {
|
||||||
config.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
|
config.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@ resource "aws_instance" "web" {
|
||||||
recreate_client = true
|
recreate_client = true
|
||||||
user_name = "bork"
|
user_name = "bork"
|
||||||
user_key = "${file("../bork.pem")}"
|
user_key = "${file("../bork.pem")}"
|
||||||
version = "12.4.1"
|
version = "15.10.13"
|
||||||
# If you have a self signed cert on your chef server change this to :verify_none
|
# If you have a self signed cert on your chef server change this to :verify_none
|
||||||
ssl_verify_mode = ":verify_peer"
|
ssl_verify_mode = ":verify_peer"
|
||||||
}
|
}
|
||||||
|
@ -109,6 +109,9 @@ The following arguments are supported:
|
||||||
|
|
||||||
* `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections.
|
* `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections.
|
||||||
|
|
||||||
|
* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process
|
||||||
|
after receiving an exit code in the `retry_on_error` list. Defaults to `0`
|
||||||
|
|
||||||
* `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the
|
* `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the
|
||||||
initial Chef Client run. The run-list must already exist in the Policyfile that defines
|
initial Chef Client run. The run-list must already exist in the Policyfile that defines
|
||||||
`policy_name`. Only applies when `use_policyfile` is `true`.
|
`policy_name`. Only applies when `use_policyfile` is `true`.
|
||||||
|
@ -131,6 +134,11 @@ The following arguments are supported:
|
||||||
* `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and
|
* `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and
|
||||||
Client before registering the new Chef Client.
|
Client before registering the new Chef Client.
|
||||||
|
|
||||||
|
* `retry_on_error (array)` - (Optional) The error codes upon which Terraform should
|
||||||
|
gracefully retry the provisioning process. Intended for use with
|
||||||
|
[Chef RFC062 codes](https://github.com/chef-boneyard/chef-rfc/blob/master/rfc062-exit-status.md).
|
||||||
|
(Defaults to `[35, 37, 213]`)
|
||||||
|
|
||||||
* `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial
|
* `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial
|
||||||
Chef Client run. The run-list will also be saved to the Chef Server after a successful
|
Chef Client run. The run-list will also be saved to the Chef Server after a successful
|
||||||
initial run. Required if `use_policyfile` is `false`; ignored when `use_policyfile` is `true`
|
initial run. Required if `use_policyfile` is `false`; ignored when `use_policyfile` is `true`
|
||||||
|
@ -169,3 +177,7 @@ The following arguments are supported:
|
||||||
|
|
||||||
* `version (string)` - (Optional) The Chef Client version to install on the remote machine.
|
* `version (string)` - (Optional) The Chef Client version to install on the remote machine.
|
||||||
If not set, the latest available version will be installed.
|
If not set, the latest available version will be installed.
|
||||||
|
|
||||||
|
* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before
|
||||||
|
retrying the provisionining process after receiving an exit code in the `retry_on_error`
|
||||||
|
list. Defaults to `30`.
|
||||||
|
|
Loading…
Reference in New Issue