diff --git a/backend/atlas/backend.go b/backend/atlas/backend.go new file mode 100644 index 000000000..0002a3b21 --- /dev/null +++ b/backend/atlas/backend.go @@ -0,0 +1,196 @@ +package atlas + +import ( + "context" + "fmt" + "net/url" + "os" + "strings" + "sync" + + "github.com/hashicorp/terraform/backend" + "github.com/hashicorp/terraform/helper/schema" + "github.com/hashicorp/terraform/state" + "github.com/hashicorp/terraform/state/remote" + "github.com/hashicorp/terraform/terraform" + "github.com/mitchellh/cli" + "github.com/mitchellh/colorstring" +) + +// Backend is an implementation of EnhancedBackend that performs all operations +// in Atlas. State must currently also be stored in Atlas, although it is worth +// investigating in the future if state storage can be external as well. +type Backend struct { + // CLI and Colorize control the CLI output. If CLI is nil then no CLI + // output will be done. If CLIColor is nil then no coloring will be done. + CLI cli.Ui + CLIColor *colorstring.Colorize + + // ContextOpts are the base context options to set when initializing a + // Terraform context. Many of these will be overridden or merged by + // Operation. See Operation for more details. + ContextOpts *terraform.ContextOpts + + //--------------------------------------------------------------- + // Internal fields, do not set + //--------------------------------------------------------------- + // stateClient is the legacy state client, setup in Configure + stateClient *stateClient + + // schema is the schema for configuration, set by init + schema *schema.Backend + once sync.Once + + // opLock locks operations + opLock sync.Mutex +} + +func (b *Backend) Input( + ui terraform.UIInput, c *terraform.ResourceConfig) (*terraform.ResourceConfig, error) { + b.once.Do(b.init) + return b.schema.Input(ui, c) +} + +func (b *Backend) Validate(c *terraform.ResourceConfig) ([]string, []error) { + b.once.Do(b.init) + return b.schema.Validate(c) +} + +func (b *Backend) Configure(c *terraform.ResourceConfig) error { + b.once.Do(b.init) + return b.schema.Configure(c) +} + +func (b *Backend) State() (state.State, error) { + return &remote.State{Client: b.stateClient}, nil +} + +// Operation implements backend.Enhanced +// +// This will initialize an in-memory terraform.Context to perform the +// operation within this process. +// +// The given operation parameter will be merged with the ContextOpts on +// the structure with the following rules. If a rule isn't specified and the +// name conflicts, assume that the field is overwritten if set. +func (b *Backend) Operation(ctx context.Context, op *backend.Operation) (*backend.RunningOperation, error) { + // Determine the function to call for our operation + var f func(context.Context, *backend.Operation, *backend.RunningOperation) + switch op.Type { + /* + case backend.OperationTypeRefresh: + f = b.opRefresh + case backend.OperationTypePlan: + f = b.opPlan + case backend.OperationTypeApply: + f = b.opApply + */ + default: + return nil, fmt.Errorf( + "Unsupported operation type: %s\n\n"+ + "This is a bug in Terraform and should be reported.", + op.Type) + } + + // Lock + b.opLock.Lock() + + // Build our running operation + runningCtx, runningCtxCancel := context.WithCancel(context.Background()) + runningOp := &backend.RunningOperation{Context: runningCtx} + + // Do it + go func() { + defer b.opLock.Unlock() + defer runningCtxCancel() + f(ctx, op, runningOp) + }() + + // Return + return runningOp, nil +} + +// Colorize returns the Colorize structure that can be used for colorizing +// output. This is gauranteed to always return a non-nil value and so is useful +// as a helper to wrap any potentially colored strings. +func (b *Backend) Colorize() *colorstring.Colorize { + if b.CLIColor != nil { + return b.CLIColor + } + + return &colorstring.Colorize{ + Colors: colorstring.DefaultColors, + Disable: true, + } +} + +func (b *Backend) init() { + b.schema = &schema.Backend{ + Schema: map[string]*schema.Schema{ + "name": &schema.Schema{ + Type: schema.TypeString, + Required: true, + Description: schemaDescriptions["name"], + }, + + "access_token": &schema.Schema{ + Type: schema.TypeString, + Required: true, + Description: schemaDescriptions["access_token"], + DefaultFunc: schema.EnvDefaultFunc("ATLAS_TOKEN", nil), + }, + + "address": &schema.Schema{ + Type: schema.TypeString, + Optional: true, + Default: defaultAtlasServer, + Description: schemaDescriptions["address"], + }, + }, + + ConfigureFunc: b.schemaConfigure, + } +} + +func (b *Backend) schemaConfigure(ctx context.Context) error { + d := schema.FromContextBackendConfig(ctx) + + // Parse the address + addr := d.Get("address").(string) + addrUrl, err := url.Parse(addr) + if err != nil { + return fmt.Errorf("Error parsing 'address': %s", err) + } + + // Parse the org/env + name := d.Get("name").(string) + parts := strings.Split(name, "/") + if len(parts) != 2 { + return fmt.Errorf("malformed name '%s', expected format '/'", name) + } + org := parts[0] + env := parts[1] + + // Setup the client + b.stateClient = &stateClient{ + Server: addr, + ServerURL: addrUrl, + AccessToken: d.Get("access_token").(string), + User: org, + Name: env, + + // This is optionally set during Atlas Terraform runs. + RunId: os.Getenv("ATLAS_RUN_ID"), + } + + return nil +} + +var schemaDescriptions = map[string]string{ + "name": "Full name of the environment in Atlas, such as 'hashicorp/myenv'", + "access_token": "Access token to use to access Atlas. If ATLAS_TOKEN is set then\n" + + "this will override any saved value for this.", + "address": "Address to your Atlas installation. This defaults to the publicly\n" + + "hosted version at 'https://atlas.hashicorp.com/'. This address\n" + + "should contain the full HTTP scheme to use.", +} diff --git a/backend/atlas/backend_test.go b/backend/atlas/backend_test.go new file mode 100644 index 000000000..511df6e28 --- /dev/null +++ b/backend/atlas/backend_test.go @@ -0,0 +1,11 @@ +package atlas + +import ( + "testing" + + "github.com/hashicorp/terraform/backend" +) + +func TestImpl(t *testing.T) { + var _ backend.Backend = new(Backend) +} diff --git a/backend/atlas/state_client.go b/backend/atlas/state_client.go new file mode 100644 index 000000000..e49cb7192 --- /dev/null +++ b/backend/atlas/state_client.go @@ -0,0 +1,319 @@ +package atlas + +import ( + "bytes" + "crypto/md5" + "crypto/tls" + "crypto/x509" + "encoding/base64" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "path" + + "github.com/hashicorp/go-cleanhttp" + "github.com/hashicorp/go-retryablehttp" + "github.com/hashicorp/go-rootcerts" + "github.com/hashicorp/terraform/state/remote" + "github.com/hashicorp/terraform/terraform" +) + +const ( + // defaultAtlasServer is used when no address is given + defaultAtlasServer = "https://atlas.hashicorp.com/" + atlasTokenHeader = "X-Atlas-Token" +) + +// AtlasClient implements the Client interface for an Atlas compatible server. +type stateClient struct { + Server string + ServerURL *url.URL + User string + Name string + AccessToken string + RunId string + HTTPClient *retryablehttp.Client + + conflictHandlingAttempted bool +} + +func (c *stateClient) Get() (*remote.Payload, error) { + // Make the HTTP request + req, err := retryablehttp.NewRequest("GET", c.url().String(), nil) + if err != nil { + return nil, fmt.Errorf("Failed to make HTTP request: %v", err) + } + + req.Header.Set(atlasTokenHeader, c.AccessToken) + + // Request the url + client, err := c.http() + if err != nil { + return nil, err + } + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + // Handle the common status codes + switch resp.StatusCode { + case http.StatusOK: + // Handled after + case http.StatusNoContent: + return nil, nil + case http.StatusNotFound: + return nil, nil + case http.StatusUnauthorized: + return nil, fmt.Errorf("HTTP remote state endpoint requires auth") + case http.StatusForbidden: + return nil, fmt.Errorf("HTTP remote state endpoint invalid auth") + case http.StatusInternalServerError: + return nil, fmt.Errorf("HTTP remote state internal server error") + default: + return nil, fmt.Errorf( + "Unexpected HTTP response code: %d\n\nBody: %s", + resp.StatusCode, c.readBody(resp.Body)) + } + + // Read in the body + buf := bytes.NewBuffer(nil) + if _, err := io.Copy(buf, resp.Body); err != nil { + return nil, fmt.Errorf("Failed to read remote state: %v", err) + } + + // Create the payload + payload := &remote.Payload{ + Data: buf.Bytes(), + } + + if len(payload.Data) == 0 { + return nil, nil + } + + // Check for the MD5 + if raw := resp.Header.Get("Content-MD5"); raw != "" { + md5, err := base64.StdEncoding.DecodeString(raw) + if err != nil { + return nil, fmt.Errorf("Failed to decode Content-MD5 '%s': %v", raw, err) + } + + payload.MD5 = md5 + } else { + // Generate the MD5 + hash := md5.Sum(payload.Data) + payload.MD5 = hash[:] + } + + return payload, nil +} + +func (c *stateClient) Put(state []byte) error { + // Get the target URL + base := c.url() + + // Generate the MD5 + hash := md5.Sum(state) + b64 := base64.StdEncoding.EncodeToString(hash[:]) + + // Make the HTTP client and request + req, err := retryablehttp.NewRequest("PUT", base.String(), bytes.NewReader(state)) + if err != nil { + return fmt.Errorf("Failed to make HTTP request: %v", err) + } + + // Prepare the request + req.Header.Set(atlasTokenHeader, c.AccessToken) + req.Header.Set("Content-MD5", b64) + req.Header.Set("Content-Type", "application/json") + req.ContentLength = int64(len(state)) + + // Make the request + client, err := c.http() + if err != nil { + return err + } + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("Failed to upload state: %v", err) + } + defer resp.Body.Close() + + // Handle the error codes + switch resp.StatusCode { + case http.StatusOK: + return nil + case http.StatusConflict: + return c.handleConflict(c.readBody(resp.Body), state) + default: + return fmt.Errorf( + "HTTP error: %d\n\nBody: %s", + resp.StatusCode, c.readBody(resp.Body)) + } +} + +func (c *stateClient) Delete() error { + // Make the HTTP request + req, err := retryablehttp.NewRequest("DELETE", c.url().String(), nil) + if err != nil { + return fmt.Errorf("Failed to make HTTP request: %v", err) + } + req.Header.Set(atlasTokenHeader, c.AccessToken) + + // Make the request + client, err := c.http() + if err != nil { + return err + } + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("Failed to delete state: %v", err) + } + defer resp.Body.Close() + + // Handle the error codes + switch resp.StatusCode { + case http.StatusOK: + return nil + case http.StatusNoContent: + return nil + case http.StatusNotFound: + return nil + default: + return fmt.Errorf( + "HTTP error: %d\n\nBody: %s", + resp.StatusCode, c.readBody(resp.Body)) + } +} + +func (c *stateClient) readBody(b io.Reader) string { + var buf bytes.Buffer + if _, err := io.Copy(&buf, b); err != nil { + return fmt.Sprintf("Error reading body: %s", err) + } + + result := buf.String() + if result == "" { + result = "" + } + + return result +} + +func (c *stateClient) url() *url.URL { + values := url.Values{} + + values.Add("atlas_run_id", c.RunId) + + return &url.URL{ + Scheme: c.ServerURL.Scheme, + Host: c.ServerURL.Host, + Path: path.Join("api/v1/terraform/state", c.User, c.Name), + RawQuery: values.Encode(), + } +} + +func (c *stateClient) http() (*retryablehttp.Client, error) { + if c.HTTPClient != nil { + return c.HTTPClient, nil + } + tlsConfig := &tls.Config{} + err := rootcerts.ConfigureTLS(tlsConfig, &rootcerts.Config{ + CAFile: os.Getenv("ATLAS_CAFILE"), + CAPath: os.Getenv("ATLAS_CAPATH"), + }) + if err != nil { + return nil, err + } + rc := retryablehttp.NewClient() + + rc.CheckRetry = func(resp *http.Response, err error) (bool, error) { + if err != nil { + // don't bother retrying if the certs don't match + if err, ok := err.(*url.Error); ok { + if _, ok := err.Err.(x509.UnknownAuthorityError); ok { + return false, nil + } + } + // continue retrying + return true, nil + } + return retryablehttp.DefaultRetryPolicy(resp, err) + } + + t := cleanhttp.DefaultTransport() + t.TLSClientConfig = tlsConfig + rc.HTTPClient.Transport = t + + c.HTTPClient = rc + return rc, nil +} + +// Atlas returns an HTTP 409 - Conflict if the pushed state reports the same +// Serial number but the checksum of the raw content differs. This can +// sometimes happen when Terraform changes state representation internally +// between versions in a way that's semantically neutral but affects the JSON +// output and therefore the checksum. +// +// Here we detect and handle this situation by ticking the serial and retrying +// iff for the previous state and the proposed state: +// +// * the serials match +// * the parsed states are Equal (semantically equivalent) +// +// In other words, in this situation Terraform can override Atlas's detected +// conflict by asserting that the state it is pushing is indeed correct. +func (c *stateClient) handleConflict(msg string, state []byte) error { + log.Printf("[DEBUG] Handling Atlas conflict response: %s", msg) + + if c.conflictHandlingAttempted { + log.Printf("[DEBUG] Already attempted conflict resolution; returning conflict.") + } else { + c.conflictHandlingAttempted = true + log.Printf("[DEBUG] Atlas reported conflict, checking for equivalent states.") + + payload, err := c.Get() + if err != nil { + return conflictHandlingError(err) + } + + currentState, err := terraform.ReadState(bytes.NewReader(payload.Data)) + if err != nil { + return conflictHandlingError(err) + } + + proposedState, err := terraform.ReadState(bytes.NewReader(state)) + if err != nil { + return conflictHandlingError(err) + } + + if statesAreEquivalent(currentState, proposedState) { + log.Printf("[DEBUG] States are equivalent, incrementing serial and retrying.") + proposedState.Serial++ + var buf bytes.Buffer + if err := terraform.WriteState(proposedState, &buf); err != nil { + return conflictHandlingError(err) + + } + return c.Put(buf.Bytes()) + } else { + log.Printf("[DEBUG] States are not equivalent, returning conflict.") + } + } + + return fmt.Errorf( + "Atlas detected a remote state conflict.\n\nMessage: %s", msg) +} + +func conflictHandlingError(err error) error { + return fmt.Errorf( + "Error while handling a conflict response from Atlas: %s", err) +} + +func statesAreEquivalent(current, proposed *terraform.State) bool { + return current.Serial == proposed.Serial && current.Equal(proposed) +} diff --git a/backend/atlas/state_client_test.go b/backend/atlas/state_client_test.go new file mode 100644 index 000000000..035502fd6 --- /dev/null +++ b/backend/atlas/state_client_test.go @@ -0,0 +1,384 @@ +package atlas + +import ( + "bytes" + "crypto/md5" + "crypto/tls" + "crypto/x509" + "encoding/json" + "net/http" + "net/http/httptest" + "net/url" + "os" + "testing" + "time" + + "github.com/hashicorp/terraform/backend" + "github.com/hashicorp/terraform/helper/acctest" + "github.com/hashicorp/terraform/state/remote" + "github.com/hashicorp/terraform/terraform" +) + +func testStateClient(t *testing.T, c map[string]interface{}) remote.Client { + b := backend.TestBackendConfig(t, &Backend{}, c) + raw, err := b.State() + if err != nil { + t.Fatalf("err: %s", err) + } + + s := raw.(*remote.State) + return s.Client +} + +func TestStateClient_impl(t *testing.T) { + var _ remote.Client = new(stateClient) +} + +func TestStateClient(t *testing.T) { + acctest.RemoteTestPrecheck(t) + + token := os.Getenv("ATLAS_TOKEN") + if token == "" { + t.Skipf("skipping, ATLAS_TOKEN must be set") + } + + client := testStateClient(t, map[string]interface{}{ + "access_token": token, + "name": "hashicorp/test-remote-state", + }) + + remote.TestClient(t, client) +} + +func TestStateClient_noRetryOnBadCerts(t *testing.T) { + acctest.RemoteTestPrecheck(t) + + client := testStateClient(t, map[string]interface{}{ + "access_token": "NOT_REQUIRED", + "name": "hashicorp/test-remote-state", + }) + + ac := client.(*stateClient) + // trigger the StateClient to build the http client and assign HTTPClient + httpClient, err := ac.http() + if err != nil { + t.Fatal(err) + } + + // remove the CA certs from the client + brokenCfg := &tls.Config{ + RootCAs: new(x509.CertPool), + } + httpClient.HTTPClient.Transport.(*http.Transport).TLSClientConfig = brokenCfg + + // Instrument CheckRetry to make sure we didn't retry + retries := 0 + oldCheck := httpClient.CheckRetry + httpClient.CheckRetry = func(resp *http.Response, err error) (bool, error) { + if retries > 0 { + t.Fatal("retried after certificate error") + } + retries++ + return oldCheck(resp, err) + } + + _, err = client.Get() + if err != nil { + if err, ok := err.(*url.Error); ok { + if _, ok := err.Err.(x509.UnknownAuthorityError); ok { + return + } + } + } + + t.Fatalf("expected x509.UnknownAuthorityError, got %v", err) +} + +func TestStateClient_ReportedConflictEqualStates(t *testing.T) { + fakeAtlas := newFakeAtlas(t, testStateModuleOrderChange) + srv := fakeAtlas.Server() + defer srv.Close() + + client := testStateClient(t, map[string]interface{}{ + "access_token": "sometoken", + "name": "someuser/some-test-remote-state", + "address": srv.URL, + }) + + state, err := terraform.ReadState(bytes.NewReader(testStateModuleOrderChange)) + if err != nil { + t.Fatalf("err: %s", err) + } + + var stateJson bytes.Buffer + if err := terraform.WriteState(state, &stateJson); err != nil { + t.Fatalf("err: %s", err) + } + if err := client.Put(stateJson.Bytes()); err != nil { + t.Fatalf("err: %s", err) + } +} + +func TestStateClient_NoConflict(t *testing.T) { + fakeAtlas := newFakeAtlas(t, testStateSimple) + srv := fakeAtlas.Server() + defer srv.Close() + + client := testStateClient(t, map[string]interface{}{ + "access_token": "sometoken", + "name": "someuser/some-test-remote-state", + "address": srv.URL, + }) + + state, err := terraform.ReadState(bytes.NewReader(testStateSimple)) + if err != nil { + t.Fatalf("err: %s", err) + } + + fakeAtlas.NoConflictAllowed(true) + + var stateJson bytes.Buffer + if err := terraform.WriteState(state, &stateJson); err != nil { + t.Fatalf("err: %s", err) + } + + if err := client.Put(stateJson.Bytes()); err != nil { + t.Fatalf("err: %s", err) + } +} + +func TestStateClient_LegitimateConflict(t *testing.T) { + fakeAtlas := newFakeAtlas(t, testStateSimple) + srv := fakeAtlas.Server() + defer srv.Close() + + client := testStateClient(t, map[string]interface{}{ + "access_token": "sometoken", + "name": "someuser/some-test-remote-state", + "address": srv.URL, + }) + + state, err := terraform.ReadState(bytes.NewReader(testStateSimple)) + if err != nil { + t.Fatalf("err: %s", err) + } + + var buf bytes.Buffer + terraform.WriteState(state, &buf) + + // Changing the state but not the serial. Should generate a conflict. + state.RootModule().Outputs["drift"] = &terraform.OutputState{ + Type: "string", + Sensitive: false, + Value: "happens", + } + + var stateJson bytes.Buffer + if err := terraform.WriteState(state, &stateJson); err != nil { + t.Fatalf("err: %s", err) + } + if err := client.Put(stateJson.Bytes()); err == nil { + t.Fatal("Expected error from state conflict, got none.") + } +} + +func TestStateClient_UnresolvableConflict(t *testing.T) { + fakeAtlas := newFakeAtlas(t, testStateSimple) + + // Something unexpected causes Atlas to conflict in a way that we can't fix. + fakeAtlas.AlwaysConflict(true) + + srv := fakeAtlas.Server() + defer srv.Close() + + client := testStateClient(t, map[string]interface{}{ + "access_token": "sometoken", + "name": "someuser/some-test-remote-state", + "address": srv.URL, + }) + + state, err := terraform.ReadState(bytes.NewReader(testStateSimple)) + if err != nil { + t.Fatalf("err: %s", err) + } + + var stateJson bytes.Buffer + if err := terraform.WriteState(state, &stateJson); err != nil { + t.Fatalf("err: %s", err) + } + doneCh := make(chan struct{}) + go func() { + defer close(doneCh) + if err := client.Put(stateJson.Bytes()); err == nil { + t.Fatal("Expected error from state conflict, got none.") + } + }() + + select { + case <-doneCh: + // OK + case <-time.After(500 * time.Millisecond): + t.Fatalf("Timed out after 500ms, probably because retrying infinitely.") + } +} + +// Stub Atlas HTTP API for a given state JSON string; does checksum-based +// conflict detection equivalent to Atlas's. +type fakeAtlas struct { + state []byte + t *testing.T + + // Used to test that we only do the special conflict handling retry once. + alwaysConflict bool + + // Used to fail the test immediately if a conflict happens. + noConflictAllowed bool +} + +func newFakeAtlas(t *testing.T, state []byte) *fakeAtlas { + return &fakeAtlas{ + state: state, + t: t, + } +} + +func (f *fakeAtlas) Server() *httptest.Server { + return httptest.NewServer(http.HandlerFunc(f.handler)) +} + +func (f *fakeAtlas) CurrentState() *terraform.State { + // we read the state manually here, because terraform may alter state + // during read + currentState := &terraform.State{} + err := json.Unmarshal(f.state, currentState) + if err != nil { + f.t.Fatalf("err: %s", err) + } + return currentState +} + +func (f *fakeAtlas) CurrentSerial() int64 { + return f.CurrentState().Serial +} + +func (f *fakeAtlas) CurrentSum() [md5.Size]byte { + return md5.Sum(f.state) +} + +func (f *fakeAtlas) AlwaysConflict(b bool) { + f.alwaysConflict = b +} + +func (f *fakeAtlas) NoConflictAllowed(b bool) { + f.noConflictAllowed = b +} + +func (f *fakeAtlas) handler(resp http.ResponseWriter, req *http.Request) { + // access tokens should only be sent as a header + if req.FormValue("access_token") != "" { + http.Error(resp, "access_token in request params", http.StatusBadRequest) + return + } + + if req.Header.Get(atlasTokenHeader) == "" { + http.Error(resp, "missing access token", http.StatusBadRequest) + return + } + + switch req.Method { + case "GET": + // Respond with the current stored state. + resp.Header().Set("Content-Type", "application/json") + resp.Write(f.state) + case "PUT": + var buf bytes.Buffer + buf.ReadFrom(req.Body) + sum := md5.Sum(buf.Bytes()) + + // we read the state manually here, because terraform may alter state + // during read + state := &terraform.State{} + err := json.Unmarshal(buf.Bytes(), state) + if err != nil { + f.t.Fatalf("err: %s", err) + } + + conflict := f.CurrentSerial() == state.Serial && f.CurrentSum() != sum + conflict = conflict || f.alwaysConflict + if conflict { + if f.noConflictAllowed { + f.t.Fatal("Got conflict when NoConflictAllowed was set.") + } + http.Error(resp, "Conflict", 409) + } else { + f.state = buf.Bytes() + resp.WriteHeader(200) + } + } +} + +// This is a tfstate file with the module order changed, which is a structural +// but not a semantic difference. Terraform will sort these modules as it +// loads the state. +var testStateModuleOrderChange = []byte( + `{ + "version": 3, + "serial": 1, + "modules": [ + { + "path": [ + "root", + "child2", + "grandchild" + ], + "outputs": { + "foo": { + "sensitive": false, + "type": "string", + "value": "bar" + } + }, + "resources": null + }, + { + "path": [ + "root", + "child1", + "grandchild" + ], + "outputs": { + "foo": { + "sensitive": false, + "type": "string", + "value": "bar" + } + }, + "resources": null + } + ] +} +`) + +var testStateSimple = []byte( + `{ + "version": 3, + "serial": 2, + "lineage": "c00ad9ac-9b35-42fe-846e-b06f0ef877e9", + "modules": [ + { + "path": [ + "root" + ], + "outputs": { + "foo": { + "sensitive": false, + "type": "string", + "value": "bar" + } + }, + "resources": {}, + "depends_on": [] + } + ] +} +`)