From 3e5bfa736418e61bd882b2aafe08fb0ba4b80574 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Mon, 12 Jul 2021 14:26:35 -0700 Subject: [PATCH] refactoring: Stubbing of the logic for handling moves This is a whole lot of nothing right now, just stubbing out some control flow that ultimately just leads to TODOs that cause it to do nothing at all. My intent here is to get this cross-cutting skeleton in place and thus make it easier for us to collaborate on adding the meat to it, so that it's more likely we can work on different parts separately and still get a result that tessellates. --- internal/addrs/move_endpoint_module.go | 114 ++++++++++-- internal/refactoring/move_execute.go | 150 ++++++++++++--- internal/refactoring/move_execute_test.go | 213 ++++++++++++++++++++++ internal/refactoring/move_statement.go | 15 +- internal/refactoring/move_validate.go | 40 ++++ internal/terraform/context.go | 32 ++++ 6 files changed, 528 insertions(+), 36 deletions(-) create mode 100644 internal/refactoring/move_execute_test.go create mode 100644 internal/refactoring/move_validate.go diff --git a/internal/addrs/move_endpoint_module.go b/internal/addrs/move_endpoint_module.go index db07af2cd..0b6461bd9 100644 --- a/internal/addrs/move_endpoint_module.go +++ b/internal/addrs/move_endpoint_module.go @@ -1,6 +1,7 @@ package addrs import ( + "fmt" "strings" "github.com/hashicorp/terraform/internal/tfdiags" @@ -63,22 +64,66 @@ func (e *MoveEndpointInModule) String() string { return buf.String() } -// SelectsMoveable returns true if the reciever directly selects the object -// represented by the given address, without any consideration of nesting. +// SelectsModule returns true if the reciever directly selects either +// the given module or a resource nested directly inside that module. // -// This is a good function to use for deciding whether a specific object -// found in the state should be acted on by a particular move statement. -func (e *MoveEndpointInModule) SelectsMoveable(addr AbsMoveable) bool { - // Only addresses of the same kind can possibly match. This guarantees - // that our logic below only needs to deal with combinations of resources - // and resource instances or with combinations of module calls and - // module instances. - if e.ObjectKind() != absMoveableEndpointKind(addr) { +// This is a good function to use to decide which modules in a state +// to consider when processing a particular move statement. For a +// module move the given module itself is what will move, while a +// resource move indicates that we should search each of the resources in +// the given module to see if they match. +func (e *MoveEndpointInModule) SelectsModule(addr ModuleInstance) bool { + // In order to match the given module path should be at least as + // long as the path to the module where the move endpoint was defined. + if len(addr) < len(e.module) { return false } - // TODO: implement - return false + containerPart := addr[:len(e.module)] + relPart := addr[len(e.module):] + + // The names of all of the steps that align with e.module must match, + // though the instance keys are wildcards for this part. + for i := range e.module { + if containerPart[i].Name != e.module[i] { + return false + } + } + + // The remaining module address steps must match both name and key. + // The logic for all of these is similar but we will retrieve the + // module address differently for each type. + var relMatch ModuleInstance + switch relAddr := e.relSubject.(type) { + case ModuleInstance: + relMatch = relAddr + case AbsModuleCall: + // This one requires a little more fuss because the call effectively + // slices in two the final step of the module address. + if len(relPart) != len(relAddr.Module)+1 { + return false + } + callPart := relPart[len(relPart)-1] + if callPart.Name != relAddr.Call.Name { + return false + } + case AbsResource: + relMatch = relAddr.Module + case AbsResourceInstance: + relMatch = relAddr.Module + default: + panic(fmt.Sprintf("unhandled relative address type %T", relAddr)) + } + + if len(relPart) != len(relMatch) { + return false + } + for i := range relMatch { + if relPart[i] != relMatch[i] { + return false + } + } + return true } // CanChainFrom returns true if the reciever describes an address that could @@ -99,3 +144,48 @@ func (e *MoveEndpointInModule) NestedWithin(other *MoveEndpointInModule) bool { // TODO: implement return false } + +// MoveDestination considers a an address representing a module +// instance in the context of source and destination move endpoints and then, +// if the module address matches the from endpoint, returns the corresponding +// new module address that the object should move to. +// +// MoveDestination will return false in its second return value if the receiver +// doesn't match fromMatch, indicating that the given move statement doesn't +// apply to this object. +// +// Both of the given endpoints must be from the same move statement and thus +// must have matching object types. If not, MoveDestination will panic. +func (m ModuleInstance) MoveDestination(fromMatch, toMatch *MoveEndpointInModule) (ModuleInstance, bool) { + return nil, false +} + +// MoveDestination considers a an address representing a resource +// in the context of source and destination move endpoints and then, +// if the resource address matches the from endpoint, returns the corresponding +// new resource address that the object should move to. +// +// MoveDestination will return false in its second return value if the receiver +// doesn't match fromMatch, indicating that the given move statement doesn't +// apply to this object. +// +// Both of the given endpoints must be from the same move statement and thus +// must have matching object types. If not, MoveDestination will panic. +func (r AbsResource) MoveDestination(fromMatch, toMatch *MoveEndpointInModule) (AbsResource, bool) { + return AbsResource{}, false +} + +// MoveDestination considers a an address representing a resource +// instance in the context of source and destination move endpoints and then, +// if the instance address matches the from endpoint, returns the corresponding +// new instance address that the object should move to. +// +// MoveDestination will return false in its second return value if the receiver +// doesn't match fromMatch, indicating that the given move statement doesn't +// apply to this object. +// +// Both of the given endpoints must be from the same move statement and thus +// must have matching object types. If not, MoveDestination will panic. +func (r AbsResourceInstance) MoveDestination(fromMatch, toMatch *MoveEndpointInModule) (AbsResourceInstance, bool) { + return AbsResourceInstance{}, false +} diff --git a/internal/refactoring/move_execute.go b/internal/refactoring/move_execute.go index 3581793df..1fc60f85f 100644 --- a/internal/refactoring/move_execute.go +++ b/internal/refactoring/move_execute.go @@ -1,6 +1,8 @@ package refactoring import ( + "fmt" + "github.com/hashicorp/terraform/internal/addrs" "github.com/hashicorp/terraform/internal/dag" "github.com/hashicorp/terraform/internal/states" @@ -22,6 +24,9 @@ type MoveResult struct { // ignore any unresolvable move statements. Validation of a set of moves is // a separate concern applied to the configuration, because validity of // moves is always dependent only on the configuration, not on the state. +// +// ApplyMoves expects exclusive access to the given state while it's running. +// Don't read or write any part of the state structure until ApplyMoves returns. func ApplyMoves(stmts []MoveStatement, state *states.State) map[addrs.UniqueKey]MoveResult { // The methodology here is to construct a small graph of all of the move // statements where the edges represent where a particular statement @@ -29,24 +34,7 @@ func ApplyMoves(stmts []MoveStatement, state *states.State) map[addrs.UniqueKey] // That then means we can traverse the graph in topological sort order // to gradually move objects through potentially multiple moves each. - g := &dag.AcyclicGraph{} - for _, stmt := range stmts { - // The graph nodes are pointers to the actual statements directly. - g.Add(&stmt) - } - - // Now we'll add the edges representing chaining and nesting relationships. - // We assume that a reasonable configuration will have at most tens of - // move statements and thus this N*M algorithm is acceptable. - for _, depender := range stmts { - for _, dependee := range stmts { - dependeeTo := dependee.To - dependerFrom := depender.From - if dependerFrom.CanChainFrom(dependeeTo) || dependerFrom.NestedWithin(dependeeTo) { - g.Connect(dag.BasicEdge(depender, dependee)) - } - } - } + g := buildMoveStatementGraph(stmts) // If there are any cycles in the graph then we'll not take any action // at all. The separate validation step should detect this and return @@ -56,8 +44,128 @@ func ApplyMoves(stmts []MoveStatement, state *states.State) map[addrs.UniqueKey] } // The starting nodes are the ones that don't depend on any other nodes. - //startNodes := make(dag.Set, len(stmts)) - //g.DepthFirstWalk() + startNodes := make(dag.Set, len(stmts)) + for _, v := range g.Vertices() { + if len(g.UpEdges(v)) == 0 { + startNodes.Add(v) + } + } - return nil + results := make(map[addrs.UniqueKey]MoveResult) + g.DepthFirstWalk(startNodes, func(v dag.Vertex, depth int) error { + stmt := v.(*MoveStatement) + + for _, ms := range state.Modules { + modAddr := ms.Addr + if !stmt.From.SelectsModule(modAddr) { + continue + } + + // We now know that the current module is relevant but what + // we'll do with it depends on the object kind. + switch kind := stmt.ObjectKind(); kind { + case addrs.MoveEndpointModule: + // For a module endpoint we just try the module address + // directly. + if newAddr, matches := modAddr.MoveDestination(stmt.From, stmt.To); matches { + // We need to visit all of the resource instances in the + // module and record them individually as results. + for _, rs := range ms.Resources { + relAddr := rs.Addr.Resource + for key := range rs.Instances { + oldInst := relAddr.Instance(key).Absolute(modAddr) + newInst := relAddr.Instance(key).Absolute(newAddr) + result := MoveResult{ + From: oldInst, + To: newInst, + } + results[oldInst.UniqueKey()] = result + results[newInst.UniqueKey()] = result + } + } + + state.MoveModuleInstance(modAddr, newAddr) + continue + } + case addrs.MoveEndpointResource: + // For a resource endpoint we need to search each of the + // resources and resource instances in the module. + for _, rs := range ms.Resources { + rAddr := rs.Addr + if newAddr, matches := rAddr.MoveDestination(stmt.From, stmt.To); matches { + for key := range rs.Instances { + oldInst := rAddr.Instance(key) + newInst := newAddr.Instance(key) + result := MoveResult{ + From: oldInst, + To: newInst, + } + results[oldInst.UniqueKey()] = result + results[newInst.UniqueKey()] = result + } + state.MoveAbsResource(rAddr, newAddr) + continue + } + for key := range rs.Instances { + iAddr := rAddr.Instance(key) + if newAddr, matches := iAddr.MoveDestination(stmt.From, stmt.To); matches { + result := MoveResult{From: iAddr, To: newAddr} + results[iAddr.UniqueKey()] = result + results[newAddr.UniqueKey()] = result + + state.MoveAbsResourceInstance(iAddr, newAddr) + continue + } + } + } + default: + panic(fmt.Sprintf("unhandled move object kind %s", kind)) + } + } + + return nil + }) + + // FIXME: In the case of either chained or nested moves, "results" will + // be left in a pretty interesting shape where the "old" address will + // refer to a result that describes only the first step, while the "new" + // address will refer to a result that describes only the last step. + // To make that actually useful we'll need a different strategy where + // the result describes the _effective_ source and destination, skipping + // over any intermediate steps we took to get there, so that ultimately + // we'll have enough information to annotate items in the plan with the + // addresses the originally moved from. + + return results +} + +// buildMoveStatementGraph constructs a dependency graph of the given move +// statements, where the nodes are all pointers to statements in the given +// slice and the edges represent either chaining or nesting relationships. +// +// buildMoveStatementGraph doesn't do any validation of the graph, so it +// may contain cycles and other sorts of invalidity. +func buildMoveStatementGraph(stmts []MoveStatement) *dag.AcyclicGraph { + g := &dag.AcyclicGraph{} + for _, stmt := range stmts { + // The graph nodes are pointers to the actual statements directly. + g.Add(&stmt) + } + + // Now we'll add the edges representing chaining and nesting relationships. + // We assume that a reasonable configuration will have at most tens of + // move statements and thus this N*M algorithm is acceptable. + for dependerI := range stmts { + depender := &stmts[dependerI] + for dependeeI := range stmts { + dependee := &stmts[dependeeI] + dependeeTo := dependee.To + dependerFrom := depender.From + if dependerFrom.CanChainFrom(dependeeTo) || dependerFrom.NestedWithin(dependeeTo) { + g.Connect(dag.BasicEdge(depender, dependee)) + } + } + } + + return g } diff --git a/internal/refactoring/move_execute_test.go b/internal/refactoring/move_execute_test.go new file mode 100644 index 000000000..eed56e911 --- /dev/null +++ b/internal/refactoring/move_execute_test.go @@ -0,0 +1,213 @@ +package refactoring + +import ( + "fmt" + "sort" + "strings" + "testing" + + "github.com/davecgh/go-spew/spew" + "github.com/google/go-cmp/cmp" + "github.com/hashicorp/hcl/v2" + "github.com/hashicorp/hcl/v2/hclsyntax" + "github.com/hashicorp/terraform/internal/addrs" + "github.com/hashicorp/terraform/internal/states" +) + +func TestApplyMoves(t *testing.T) { + // TODO: Renable this once we're ready to implement the intended behaviors + // it is describing. + t.Skip("ApplyMoves is not yet fully implemented") + + providerAddr := addrs.AbsProviderConfig{ + Module: addrs.RootModule, + Provider: addrs.MustParseProviderSourceString("example.com/foo/bar"), + } + rootNoKeyResourceAddr := [...]addrs.AbsResourceInstance{ + addrs.Resource{ + Mode: addrs.ManagedResourceMode, + Type: "foo", + Name: "from", + }.Instance(addrs.NoKey).Absolute(addrs.RootModuleInstance), + addrs.Resource{ + Mode: addrs.ManagedResourceMode, + Type: "foo", + Name: "to", + }.Instance(addrs.NoKey).Absolute(addrs.RootModuleInstance), + } + rootIntKeyResourceAddr := [...]addrs.AbsResourceInstance{ + addrs.Resource{ + Mode: addrs.ManagedResourceMode, + Type: "foo", + Name: "from", + }.Instance(addrs.IntKey(0)).Absolute(addrs.RootModuleInstance), + addrs.Resource{ + Mode: addrs.ManagedResourceMode, + Type: "foo", + Name: "to", + }.Instance(addrs.IntKey(0)).Absolute(addrs.RootModuleInstance), + } + + tests := map[string]struct { + Stmts []MoveStatement + State *states.State + + WantResults map[addrs.UniqueKey]MoveResult + WantInstanceAddrs []string + }{ + "no moves and empty state": { + []MoveStatement{}, + states.NewState(), + nil, + nil, + }, + "no moves": { + []MoveStatement{}, + states.BuildState(func(s *states.SyncState) { + s.SetResourceInstanceCurrent( + rootNoKeyResourceAddr[0], + &states.ResourceInstanceObjectSrc{ + Status: states.ObjectReady, + AttrsJSON: []byte(`{}`), + }, + providerAddr, + ) + }), + nil, + []string{ + `foo.from`, + }, + }, + "single move of whole singleton resource": { + []MoveStatement{ + testMoveStatement(t, "", "foo.from", "foo.to"), + }, + states.BuildState(func(s *states.SyncState) { + s.SetResourceInstanceCurrent( + rootNoKeyResourceAddr[0], + &states.ResourceInstanceObjectSrc{ + Status: states.ObjectReady, + AttrsJSON: []byte(`{}`), + }, + providerAddr, + ) + }), + map[addrs.UniqueKey]MoveResult{ + rootNoKeyResourceAddr[0].UniqueKey(): { + From: rootNoKeyResourceAddr[0], + To: rootNoKeyResourceAddr[1], + }, + rootNoKeyResourceAddr[1].UniqueKey(): { + From: rootNoKeyResourceAddr[1], + To: rootNoKeyResourceAddr[1], + }, + }, + []string{ + `foo.to`, + }, + }, + "single move of whole 'count' resource": { + []MoveStatement{ + testMoveStatement(t, "", "foo.from", "foo.to"), + }, + states.BuildState(func(s *states.SyncState) { + s.SetResourceInstanceCurrent( + rootIntKeyResourceAddr[0], + &states.ResourceInstanceObjectSrc{ + Status: states.ObjectReady, + AttrsJSON: []byte(`{}`), + }, + providerAddr, + ) + }), + map[addrs.UniqueKey]MoveResult{ + rootNoKeyResourceAddr[0].UniqueKey(): { + From: rootIntKeyResourceAddr[0], + To: rootIntKeyResourceAddr[1], + }, + rootNoKeyResourceAddr[1].UniqueKey(): { + From: rootIntKeyResourceAddr[0], + To: rootIntKeyResourceAddr[1], + }, + }, + []string{ + `foo.to[0]`, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + var stmtsBuf strings.Builder + for _, stmt := range test.Stmts { + fmt.Fprintf(&stmtsBuf, "- from: %s\n to: %s", stmt.From, stmt.To) + } + t.Logf("move statements:\n%s", stmtsBuf.String()) + + t.Logf("resource instances in prior state:\n%s", spew.Sdump(allResourceInstanceAddrsInState(test.State))) + + state := test.State.DeepCopy() // don't modify the test case in-place + gotResults := ApplyMoves(test.Stmts, state) + + if diff := cmp.Diff(test.WantResults, gotResults); diff != "" { + t.Errorf("wrong results\n%s", diff) + } + + gotInstAddrs := allResourceInstanceAddrsInState(state) + if diff := cmp.Diff(test.WantInstanceAddrs, gotInstAddrs); diff != "" { + t.Errorf("wrong resource instances in final state\n%s", diff) + } + }) + } +} + +func testMoveStatement(t *testing.T, module string, from string, to string) MoveStatement { + t.Helper() + + moduleAddr := addrs.RootModule + if len(module) != 0 { + moduleAddr = addrs.Module(strings.Split(module, ".")) + } + + fromTraversal, hclDiags := hclsyntax.ParseTraversalAbs([]byte(from), "from", hcl.InitialPos) + if hclDiags.HasErrors() { + t.Fatalf("invalid 'from' argument: %s", hclDiags.Error()) + } + fromAddr, diags := addrs.ParseMoveEndpoint(fromTraversal) + if diags.HasErrors() { + t.Fatalf("invalid 'from' argument: %s", diags.Err().Error()) + } + toTraversal, hclDiags := hclsyntax.ParseTraversalAbs([]byte(to), "to", hcl.InitialPos) + if diags.HasErrors() { + t.Fatalf("invalid 'to' argument: %s", hclDiags.Error()) + } + toAddr, diags := addrs.ParseMoveEndpoint(toTraversal) + if diags.HasErrors() { + t.Fatalf("invalid 'from' argument: %s", diags.Err().Error()) + } + + fromInModule, toInModule := addrs.UnifyMoveEndpoints(moduleAddr, fromAddr, toAddr) + if fromInModule == nil || toInModule == nil { + t.Fatalf("incompatible endpoints") + } + + return MoveStatement{ + From: fromInModule, + To: toInModule, + + // DeclRange not populated because it's unimportant for our tests + } +} + +func allResourceInstanceAddrsInState(state *states.State) []string { + var ret []string + for _, ms := range state.Modules { + for _, rs := range ms.Resources { + for key := range rs.Instances { + ret = append(ret, rs.Addr.Instance(key).String()) + } + } + } + sort.Strings(ret) + return ret +} diff --git a/internal/refactoring/move_statement.go b/internal/refactoring/move_statement.go index cb2cff40c..59f7f55de 100644 --- a/internal/refactoring/move_statement.go +++ b/internal/refactoring/move_statement.go @@ -1,6 +1,8 @@ package refactoring import ( + "fmt" + "github.com/hashicorp/terraform/internal/addrs" "github.com/hashicorp/terraform/internal/configs" "github.com/hashicorp/terraform/internal/tfdiags" @@ -23,9 +25,9 @@ func findMoveStatements(cfg *configs.Config, into []MoveStatement) []MoveStateme for _, mc := range cfg.Module.Moved { fromAddr, toAddr := addrs.UnifyMoveEndpoints(modAddr, mc.From, mc.To) if fromAddr == nil || toAddr == nil { - // Invalid combination should get caught by our separate - // validation rules elsewhere. - continue + // Invalid combination should've been caught during original + // configuration decoding, in the configs package. + panic(fmt.Sprintf("incompatible move endpoints in %s", mc.DeclRange)) } into = append(into, MoveStatement{ @@ -41,3 +43,10 @@ func findMoveStatements(cfg *configs.Config, into []MoveStatement) []MoveStateme return into } + +func (s *MoveStatement) ObjectKind() addrs.MoveEndpointKind { + // addrs.UnifyMoveEndpoints guarantees that both of our addresses have + // the same kind, so we can just arbitrary use From and assume To will + // match it. + return s.From.ObjectKind() +} diff --git a/internal/refactoring/move_validate.go b/internal/refactoring/move_validate.go new file mode 100644 index 000000000..40ee5f654 --- /dev/null +++ b/internal/refactoring/move_validate.go @@ -0,0 +1,40 @@ +package refactoring + +import ( + "fmt" + + "github.com/hashicorp/terraform/internal/configs" + "github.com/hashicorp/terraform/internal/instances" + "github.com/hashicorp/terraform/internal/tfdiags" +) + +// ValidateMoves tests whether all of the given move statements comply with +// both the single-statement validation rules and the "big picture" rules +// that constrain statements in relation to one another. +// +// The validation rules are primarily in terms of the configuration, but +// ValidateMoves also takes the expander that resulted from creating a plan +// so that it can see which instances are defined for each module and resource, +// to precisely validate move statements involving specific-instance addresses. +// +// Because validation depends on the planning result but move execution must +// happen _before_ planning, we have the unusual situation where sibling +// function ApplyMoves must run before ValidateMoves and must therefore +// tolerate and ignore any invalid statements. The plan walk will then +// construct in incorrect plan (because it'll be starting from the wrong +// prior state) but ValidateMoves will block actually showing that invalid +// plan to the user. +func ValidateMoves(stmts []MoveStatement, rootCfg *configs.Config, expander *instances.Expander) tfdiags.Diagnostics { + var diags tfdiags.Diagnostics + + g := buildMoveStatementGraph(stmts) + + if len(g.Cycles()) != 0 { + // TODO: proper error messages for this + diags = diags.Append(fmt.Errorf("move statement cycles")) + } + + // TODO: Various other validation rules + + return diags +} diff --git a/internal/terraform/context.go b/internal/terraform/context.go index c0934d778..3ae479eed 100644 --- a/internal/terraform/context.go +++ b/internal/terraform/context.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/terraform/internal/plans" "github.com/hashicorp/terraform/internal/providers" "github.com/hashicorp/terraform/internal/provisioners" + "github.com/hashicorp/terraform/internal/refactoring" "github.com/hashicorp/terraform/internal/states" "github.com/hashicorp/terraform/internal/tfdiags" "github.com/zclconf/go-cty/cty" @@ -608,6 +609,33 @@ The -target option is not for routine use, and is provided only for exceptional )) } + moveStmts := refactoring.FindMoveStatements(c.config) + if len(moveStmts) != 0 { + // TEMP: we haven't fully implemented moving yet, so we'll just + // reject it outright for now to reduce confusion. + diags = diags.Append(tfdiags.Sourceless( + tfdiags.Error, + "Moves are not yet supported", + "There is currently no handling of \"moved\" blocks in the configuration.", + )) + } + moveResults := refactoring.ApplyMoves(moveStmts, c.prevRunState) + if len(c.targets) > 0 { + for _, result := range moveResults { + matchesTarget := false + for _, targetAddr := range c.targets { + if targetAddr.TargetContains(result.From) { + matchesTarget = true + break + } + } + if !matchesTarget { + // TODO: Return an error stating that a targeted plan is + // only valid if it includes this address that was moved. + } + } + } + var plan *plans.Plan var planDiags tfdiags.Diagnostics switch c.planMode { @@ -625,6 +653,10 @@ The -target option is not for routine use, and is provided only for exceptional return nil, diags } + // TODO: Call refactoring.ValidateMoves, but need to figure out how to + // get hold of the plan's expander here, or somehow otherwise export + // the necessary subset of its data for ValidateMoves to do its work. + // convert the variables into the format expected for the plan varVals := make(map[string]plans.DynamicValue, len(c.variables)) for k, iv := range c.variables {