Merge pull request #24149 from mlafeldt/fix-oss-state-locking

Fix & improve state locking of OSS backend
This commit is contained in:
James Bardin 2020-03-11 10:28:12 -04:00 committed by GitHub
commit f6221100ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 89 additions and 87 deletions

View File

@ -4,6 +4,11 @@ import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"runtime"
"strings"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses"
"github.com/aliyun/alibaba-cloud-sdk-go/services/sts"
@ -12,10 +17,11 @@ import (
"github.com/hashicorp/terraform/helper/schema"
"github.com/hashicorp/terraform/helper/validation"
"github.com/jmespath/go-jmespath"
"io/ioutil"
"os"
"runtime"
"strings"
"log"
"net/http"
"strconv"
"time"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
@ -24,10 +30,6 @@ import (
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/terraform/version"
"github.com/mitchellh/go-homedir"
"log"
"net/http"
"strconv"
"time"
)
// New creates a new backend for OSS remote state.

View File

@ -12,9 +12,10 @@ import (
"github.com/hashicorp/terraform/state/remote"
"github.com/hashicorp/terraform/states"
"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
"log"
"path"
"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
)
const (
@ -38,28 +39,12 @@ func (b *Backend) remoteClient(name string) (*RemoteClient, error) {
otsClient: b.otsClient,
}
if b.otsEndpoint != "" && b.otsTable != "" {
table, err := b.otsClient.DescribeTable(&tablestore.DescribeTableRequest{
_, err := b.otsClient.DescribeTable(&tablestore.DescribeTableRequest{
TableName: b.otsTable,
})
if err != nil {
return client, fmt.Errorf("Error describing table store %s: %#v", b.otsTable, err)
}
for _, t := range table.TableMeta.SchemaEntry {
pkMeta := TableStorePrimaryKeyMeta{
PKName: *t.Name,
}
if *t.Type == tablestore.PrimaryKeyType_INTEGER {
pkMeta.PKType = "Integer"
} else if *t.Type == tablestore.PrimaryKeyType_STRING {
pkMeta.PKType = "String"
} else if *t.Type == tablestore.PrimaryKeyType_BINARY {
pkMeta.PKType = "Binary"
} else {
return client, fmt.Errorf("Unsupported PrimaryKey type: %d.", *t.Type)
}
client.otsTabkePK = pkMeta
break
}
}
return client, nil

View File

@ -177,11 +177,11 @@ func deleteOSSBucket(t *testing.T, ossClient *oss.Client, bucketName string) {
}
}
// create the dynamoDB table, and wait until we can query it.
// create the tablestore table, and wait until we can query it.
func createTablestoreTable(t *testing.T, otsClient *tablestore.TableStoreClient, tableName string) {
tableMeta := new(tablestore.TableMeta)
tableMeta.TableName = tableName
tableMeta.AddPrimaryKeyColumn("testbackend", tablestore.PrimaryKeyType_STRING)
tableMeta.AddPrimaryKeyColumn(pkName, tablestore.PrimaryKeyType_STRING)
tableOption := new(tablestore.TableOption)
tableOption.TimeToAlive = -1

View File

@ -8,23 +8,24 @@ import (
"io"
"encoding/hex"
"log"
"sync"
"time"
"github.com/aliyun/aliyun-oss-go-sdk/oss"
"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
"github.com/hashicorp/go-multierror"
uuid "github.com/hashicorp/go-uuid"
"github.com/hashicorp/terraform/helper/hashcode"
"github.com/hashicorp/terraform/state"
"github.com/hashicorp/terraform/state/remote"
"github.com/pkg/errors"
"log"
"sync"
"time"
)
// Store the last saved serial in tablestore with this suffix for consistency checks.
const (
// Store the last saved serial in tablestore with this suffix for consistency checks.
stateIDSuffix = "-md5"
statePKValue = "terraform-remote-state-lock"
pkName = "LockID"
)
var (
@ -39,11 +40,6 @@ var (
// test hook called when checksums don't match
var testChecksumHook func()
type TableStorePrimaryKeyMeta struct {
PKName string
PKType string
}
type RemoteClient struct {
ossClient *oss.Client
otsClient *tablestore.TableStoreClient
@ -55,7 +51,6 @@ type RemoteClient struct {
info *state.LockInfo
mu sync.Mutex
otsTable string
otsTabkePK TableStorePrimaryKeyMeta
}
func (c *RemoteClient) Get() (payload *remote.Payload, err error) {
@ -157,6 +152,8 @@ func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
return "", nil
}
info.Path = c.lockPath()
if info.ID == "" {
lockID, err := uuid.GenerateUUID()
if err != nil {
@ -170,16 +167,12 @@ func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath(),
},
},
},
Columns: []tablestore.AttributeColumn{
{
ColumnName: "LockID",
Value: c.lockFile,
},
{
ColumnName: "Info",
Value: string(info.Marshal()),
@ -190,7 +183,7 @@ func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
},
}
log.Printf("[DEBUG] Recoring state lock in tablestore: %#v", putParams)
log.Printf("[DEBUG] Recording state lock in tablestore: %#v", putParams)
_, err := c.otsClient.PutRow(&tablestore.PutRowRequest{
PutRowChange: putParams,
@ -223,12 +216,12 @@ func (c *RemoteClient) getMD5() ([]byte, error) {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath() + stateIDSuffix,
},
},
},
ColumnsToGet: []string{"LockID", "Digest"},
ColumnsToGet: []string{pkName, "Digest"},
MaxVersion: 1,
}
@ -270,23 +263,19 @@ func (c *RemoteClient) putMD5(sum []byte) error {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath() + stateIDSuffix,
},
},
},
Columns: []tablestore.AttributeColumn{
{
ColumnName: "LockID",
Value: c.lockPath() + stateIDSuffix,
},
{
ColumnName: "Digest",
Value: hex.EncodeToString(sum),
},
},
Condition: &tablestore.RowCondition{
RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST,
RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE,
},
}
@ -315,8 +304,8 @@ func (c *RemoteClient) deleteMD5() error {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath() + stateIDSuffix,
},
},
},
@ -341,12 +330,12 @@ func (c *RemoteClient) getLockInfo() (*state.LockInfo, error) {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath(),
},
},
},
ColumnsToGet: []string{"LockID", "Info"},
ColumnsToGet: []string{pkName, "Info"},
MaxVersion: 1,
}
@ -394,8 +383,8 @@ func (c *RemoteClient) Unlock(id string) error {
PrimaryKey: &tablestore.PrimaryKey{
PrimaryKeys: []*tablestore.PrimaryKeyColumn{
{
ColumnName: c.otsTabkePK.PKName,
Value: c.getPKValue(),
ColumnName: pkName,
Value: c.lockPath(),
},
},
},
@ -457,23 +446,6 @@ func (c *RemoteClient) getObj() (*remote.Payload, error) {
return payload, nil
}
func (c *RemoteClient) getPKValue() (value interface{}) {
value = statePKValue
if c.otsTabkePK.PKType == "Integer" {
value = hashcode.String(statePKValue)
} else if c.otsTabkePK.PKType == "Binary" {
value = stringToBin(statePKValue)
}
return
}
func stringToBin(s string) (binString string) {
for _, c := range s {
binString = fmt.Sprintf("%s%b", binString, c)
}
return
}
const errBadChecksumFmt = `state data in OSS does not have the expected content.
This may be caused by unusually long delays in OSS processing a previous state

View File

@ -8,6 +8,7 @@ import (
"bytes"
"crypto/md5"
"github.com/hashicorp/terraform/backend"
"github.com/hashicorp/terraform/state"
"github.com/hashicorp/terraform/state/remote"
@ -84,6 +85,52 @@ func TestRemoteClientLocks(t *testing.T) {
remote.TestRemoteLocks(t, s1.(*remote.State).Client, s2.(*remote.State).Client)
}
// verify that the backend can handle more than one state in the same table
func TestRemoteClientLocks_multipleStates(t *testing.T) {
testACC(t)
bucketName := fmt.Sprintf("tf-remote-oss-test-force-%x", time.Now().Unix())
tableName := fmt.Sprintf("tfRemoteTestForce%x", time.Now().Unix())
path := "testState"
b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{
"bucket": bucketName,
"prefix": path,
"encrypt": true,
"tablestore_table": tableName,
"tablestore_endpoint": RemoteTestUsedOTSEndpoint,
})).(*Backend)
b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{
"bucket": bucketName,
"prefix": path,
"encrypt": true,
"tablestore_table": tableName,
"tablestore_endpoint": RemoteTestUsedOTSEndpoint,
})).(*Backend)
createOSSBucket(t, b1.ossClient, bucketName)
defer deleteOSSBucket(t, b1.ossClient, bucketName)
createTablestoreTable(t, b1.otsClient, tableName)
defer deleteTablestoreTable(t, b1.otsClient, tableName)
s1, err := b1.StateMgr("s1")
if err != nil {
t.Fatal(err)
}
if _, err := s1.Lock(state.NewLockInfo()); err != nil {
t.Fatal("failed to get lock for s1:", err)
}
// s1 is now locked, s2 should not be locked as it's a different state file
s2, err := b2.StateMgr("s2")
if err != nil {
t.Fatal(err)
}
if _, err := s2.Lock(state.NewLockInfo()); err != nil {
t.Fatal("failed to get lock for s2:", err)
}
}
// verify that we can unlock a state with an existing lock
func TestRemoteForceUnlock(t *testing.T) {
testACC(t)

View File

@ -18,9 +18,6 @@ the `tablestore_table` field to an existing TableStore table name.
-> **Note:** The OSS backend is available from terraform version 0.12.2.
!> **Warning:** If you set `tablestore_table`, please ensure the table does not contain primary key named
`LockID`, `Info` and `Digest`. Otherwise, there will throw an error `OTSParameterInvalid Duplicated attribute column ...`.
## Example Configuration
```hcl
@ -39,7 +36,7 @@ terraform {
This assumes we have a [OSS Bucket](https://www.terraform.io/docs/providers/alicloud/r/oss_bucket.html) created called `bucket-for-terraform-state`,
a [OTS Instance](https://www.terraform.io/docs/providers/alicloud/r/ots_instance.html) called `terraform-remote` and
a [OTS TableStore](https://www.terraform.io/docs/providers/alicloud/r/ots_table.html) called `statelock`. The
Terraform state will be written into the file `path/mystate/version-1.tfstate`. The `TableStore` must have a primary key of type `string`.
Terraform state will be written into the file `path/mystate/version-1.tfstate`. The `TableStore` must have a primary key named `LockID` of type `String`.
## Data Source Configuration
@ -90,7 +87,7 @@ The following configuration options or environment variables are supported:
* `prefix` - (Opeional) The path directory of the state file will be stored. Default to "env:".
* `key` - (Optional) The name of the state file. Defaults to `terraform.tfstate`.
* `tablestore_endpoint` / `ALICLOUD_TABLESTORE_ENDPOINT` - (Optional) A custom endpoint for the TableStore API.
* `tablestore_table` - (Optional) A TableStore table for state locking and consistency.
* `tablestore_table` - (Optional) A TableStore table for state locking and consistency. The table must have a primary key named `LockID` of type `String`.
* `encrypt` - (Optional) Whether to enable server side
encryption of the state file. If it is true, OSS will use 'AES256' encryption algorithm to encrypt state file.
* `acl` - (Optional) [Object
@ -113,4 +110,3 @@ The nested `assume_role` block supports the following:
* `session_expiration` - (Optional) The time after which the established session for assuming role expires. Valid value range: [900-3600] seconds. Default to 3600 (in this case Alibaba Cloud use own default value). It supports environment variable `ALICLOUD_ASSUME_ROLE_SESSION_EXPIRATION`.
-> **Note:** If you want to store state in the custom OSS endpoint, you can specify a environment variable `OSS_ENDPOINT`, like "oss-cn-beijing-internal.aliyuncs.com"