From 9cd88810f4683489da843736d344cfa1d49a1e44 Mon Sep 17 00:00:00 2001 From: Paul Hinze Date: Fri, 7 Aug 2015 14:48:13 -0500 Subject: [PATCH 1/3] core: log every 5s while waiting for dependencies Helps to flush out deadlocks in the dependency graph --- dag/dag.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/dag/dag.go b/dag/dag.go index 602cacd99..b609ac707 100644 --- a/dag/dag.go +++ b/dag/dag.go @@ -2,9 +2,11 @@ package dag import ( "fmt" + "log" "sort" "strings" "sync" + "time" "github.com/hashicorp/go-multierror" ) @@ -197,8 +199,19 @@ func (g *AcyclicGraph) Walk(cb WalkFunc) error { readyCh := make(chan bool) go func(v Vertex, deps []Vertex, chs []<-chan struct{}, readyCh chan<- bool) { // First wait for all the dependencies - for _, ch := range chs { - <-ch + for i, ch := range chs { + DepSatisfied: + for { + select { + case <-ch: + break DepSatisfied + case <-time.After(time.Second * 5): + log.Printf("[DEBUG] vertex %s, waiting for: %s", + VertexName(v), VertexName(deps[i])) + } + } + log.Printf("[DEBUG] vertex %s, got dep: %s", + VertexName(v), VertexName(deps[i])) } // Then, check the map to see if any of our dependencies failed From 7a464b1156d53e5e3bbacad3543e18d492671215 Mon Sep 17 00:00:00 2001 From: Paul Hinze Date: Mon, 10 Aug 2015 15:03:02 -0500 Subject: [PATCH 2/3] tests: extract deadlock checking test helper --- terraform/context_plan_test.go | 26 ++++++-------------------- terraform/context_test.go | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/terraform/context_plan_test.go b/terraform/context_plan_test.go index 289779511..61864fb5d 100644 --- a/terraform/context_plan_test.go +++ b/terraform/context_plan_test.go @@ -9,7 +9,6 @@ import ( "strings" "sync" "testing" - "time" ) func TestContext2Plan(t *testing.T) { @@ -176,16 +175,11 @@ func TestContext2Plan_moduleCycle(t *testing.T) { } func TestContext2Plan_moduleDeadlock(t *testing.T) { - m := testModule(t, "plan-module-deadlock") - p := testProvider("aws") - p.DiffFn = testDiffFn - timeout := make(chan bool, 1) - done := make(chan bool, 1) - go func() { - time.Sleep(3 * time.Second) - timeout <- true - }() - go func() { + testCheckDeadlock(t, func() { + m := testModule(t, "plan-module-deadlock") + p := testProvider("aws") + p.DiffFn = testDiffFn + ctx := testContext2(t, &ContextOpts{ Module: m, Providers: map[string]ResourceProviderFactory{ @@ -194,7 +188,6 @@ func TestContext2Plan_moduleDeadlock(t *testing.T) { }) plan, err := ctx.Plan() - done <- true if err != nil { t.Fatalf("err: %s", err) } @@ -215,14 +208,7 @@ STATE: if actual != expected { t.Fatalf("expected:\n%sgot:\n%s", expected, actual) } - }() - - select { - case <-timeout: - t.Fatalf("timed out! probably deadlock") - case <-done: - // ok - } + }) } func TestContext2Plan_moduleInput(t *testing.T) { diff --git a/terraform/context_test.go b/terraform/context_test.go index 8bcbf1b2b..65f840505 100644 --- a/terraform/context_test.go +++ b/terraform/context_test.go @@ -4,6 +4,7 @@ import ( "fmt" "strings" "testing" + "time" ) func testContext2(t *testing.T, opts *ContextOpts) *Context { @@ -170,6 +171,27 @@ func resourceState(resourceType, resourceID string) *ResourceState { } } +// Test helper that gives a function 3 seconds to finish, assumes deadlock and +// fails test if it does not. +func testCheckDeadlock(t *testing.T, f func()) { + timeout := make(chan bool, 1) + done := make(chan bool, 1) + go func() { + time.Sleep(3 * time.Second) + timeout <- true + }() + go func(f func(), done chan bool) { + defer func() { done <- true }() + f() + }(f, done) + select { + case <-timeout: + t.Fatalf("timed out! probably deadlock") + case <-done: + // ok + } +} + const testContextGraph = ` root: root aws_instance.bar From 52c4bfbe980258422bed1bbd64a39cc8ddeabef9 Mon Sep 17 00:00:00 2001 From: Paul Hinze Date: Mon, 10 Aug 2015 15:04:48 -0500 Subject: [PATCH 3/3] core: fix deadlock when dependable node replaced with non-dependable one In #2884, Terraform would hang on graphs with an orphaned resource depended on an orphaned module. This is because orphan module nodes (which are dependable) were getting expanded (replaced) with GraphNodeBasicSubgraph nodes (which are _not_ dependable). The old `graph.Replace()` code resulted in GraphNodeBasicSubgraph being entered into the lookaside table, even though it is not dependable. This resulted in an untraversable edge in the graph, so the graph would hang and wait forever. Now, we remove entries from the lookaside table when a dependable node is being replaced with a non-dependable node. This means we lose an edge, but we can move forward. It's ~probably~ never correct to be replacing depenable nodes with non-dependable ones, but this tweak seemed preferable to tossing a panic in there. --- terraform/context_refresh_test.go | 95 +++++++++++++++++++ terraform/graph.go | 6 +- terraform/graph_test.go | 37 +++++++- .../child/grandchild/main.tf | 3 + .../refresh-module-orphan/child/main.tf | 10 ++ .../refresh-module-orphan/main.tf | 10 ++ 6 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 terraform/test-fixtures/refresh-module-orphan/child/grandchild/main.tf create mode 100644 terraform/test-fixtures/refresh-module-orphan/child/main.tf create mode 100644 terraform/test-fixtures/refresh-module-orphan/main.tf diff --git a/terraform/context_refresh_test.go b/terraform/context_refresh_test.go index a657e767f..dbab70255 100644 --- a/terraform/context_refresh_test.go +++ b/terraform/context_refresh_test.go @@ -4,6 +4,7 @@ import ( "reflect" "sort" "strings" + "sync" "testing" ) @@ -685,6 +686,100 @@ func TestContext2Refresh_vars(t *testing.T) { } } +func TestContext2Refresh_orphanModule(t *testing.T) { + p := testProvider("aws") + m := testModule(t, "refresh-module-orphan") + + // Create a custom refresh function to track the order they were visited + var order []string + var orderLock sync.Mutex + p.RefreshFn = func( + info *InstanceInfo, + is *InstanceState) (*InstanceState, error) { + orderLock.Lock() + defer orderLock.Unlock() + + order = append(order, is.ID) + return is, nil + } + + state := &State{ + Modules: []*ModuleState{ + &ModuleState{ + Path: rootModulePath, + Resources: map[string]*ResourceState{ + "aws_instance.foo": &ResourceState{ + Primary: &InstanceState{ + ID: "i-abc123", + Attributes: map[string]string{ + "childid": "i-bcd234", + "grandchildid": "i-cde345", + }, + }, + Dependencies: []string{ + "module.child", + "module.child", + }, + }, + }, + }, + &ModuleState{ + Path: append(rootModulePath, "child"), + Resources: map[string]*ResourceState{ + "aws_instance.bar": &ResourceState{ + Primary: &InstanceState{ + ID: "i-bcd234", + Attributes: map[string]string{ + "grandchildid": "i-cde345", + }, + }, + Dependencies: []string{ + "module.grandchild", + }, + }, + }, + Outputs: map[string]string{ + "id": "i-bcd234", + "grandchild_id": "i-cde345", + }, + }, + &ModuleState{ + Path: append(rootModulePath, "child", "grandchild"), + Resources: map[string]*ResourceState{ + "aws_instance.baz": &ResourceState{ + Primary: &InstanceState{ + ID: "i-cde345", + }, + }, + }, + Outputs: map[string]string{ + "id": "i-cde345", + }, + }, + }, + } + ctx := testContext2(t, &ContextOpts{ + Module: m, + Providers: map[string]ResourceProviderFactory{ + "aws": testProviderFuncFixed(p), + }, + State: state, + }) + + testCheckDeadlock(t, func() { + _, err := ctx.Refresh() + if err != nil { + t.Fatalf("err: %s", err) + } + + // TODO: handle order properly for orphaned modules / resources + // expected := []string{"i-abc123", "i-bcd234", "i-cde345"} + // if !reflect.DeepEqual(order, expected) { + // t.Fatalf("expected: %#v, got: %#v", expected, order) + // } + }) +} + func TestContext2Validate(t *testing.T) { p := testProvider("aws") m := testModule(t, "validate-good") diff --git a/terraform/graph.go b/terraform/graph.go index 6744a931e..e75d93663 100644 --- a/terraform/graph.go +++ b/terraform/graph.go @@ -74,7 +74,11 @@ func (g *Graph) Replace(o, n dag.Vertex) bool { // Go through and update our lookaside to point to the new vertex for k, v := range g.dependableMap { if v == o { - g.dependableMap[k] = n + if _, ok := n.(GraphNodeDependable); ok { + g.dependableMap[k] = n + } else { + delete(g.dependableMap, k) + } } } diff --git a/terraform/graph_test.go b/terraform/graph_test.go index c56f5bfad..ef91fec4a 100644 --- a/terraform/graph_test.go +++ b/terraform/graph_test.go @@ -1,6 +1,7 @@ package terraform import ( + "reflect" "strings" "testing" ) @@ -20,7 +21,7 @@ func TestGraphAdd(t *testing.T) { func TestGraphConnectDependent(t *testing.T) { var g Graph - g.Add(&testGraphDependable{VertexName: "a", Mock: []string{"a"}}) + g.Add(&testGraphDependable{VertexName: "a"}) b := g.Add(&testGraphDependable{ VertexName: "b", DependentOnMock: []string{"a"}, @@ -37,10 +38,40 @@ func TestGraphConnectDependent(t *testing.T) { } } +func TestGraphReplace_DependableWithNonDependable(t *testing.T) { + var g Graph + a := g.Add(&testGraphDependable{VertexName: "a"}) + b := g.Add(&testGraphDependable{ + VertexName: "b", + DependentOnMock: []string{"a"}, + }) + newA := "non-dependable-a" + + if missing := g.ConnectDependent(b); len(missing) > 0 { + t.Fatalf("bad: %#v", missing) + } + + if !g.Replace(a, newA) { + t.Fatalf("failed to replace") + } + + c := g.Add(&testGraphDependable{ + VertexName: "c", + DependentOnMock: []string{"a"}, + }) + + // This should fail by reporting missing, since a node with dependable + // name "a" is no longer in the graph. + missing := g.ConnectDependent(c) + expected := []string{"a"} + if !reflect.DeepEqual(expected, missing) { + t.Fatalf("expected: %#v, got: %#v", expected, missing) + } +} + type testGraphDependable struct { VertexName string DependentOnMock []string - Mock []string } func (v *testGraphDependable) Name() string { @@ -48,7 +79,7 @@ func (v *testGraphDependable) Name() string { } func (v *testGraphDependable) DependableName() []string { - return v.Mock + return []string{v.VertexName} } func (v *testGraphDependable) DependentOn() []string { diff --git a/terraform/test-fixtures/refresh-module-orphan/child/grandchild/main.tf b/terraform/test-fixtures/refresh-module-orphan/child/grandchild/main.tf new file mode 100644 index 000000000..942e93dbc --- /dev/null +++ b/terraform/test-fixtures/refresh-module-orphan/child/grandchild/main.tf @@ -0,0 +1,3 @@ +resource "aws_instance" "baz" {} + +output "id" { value = "${aws_instance.baz.id}" } diff --git a/terraform/test-fixtures/refresh-module-orphan/child/main.tf b/terraform/test-fixtures/refresh-module-orphan/child/main.tf new file mode 100644 index 000000000..7c3fc842f --- /dev/null +++ b/terraform/test-fixtures/refresh-module-orphan/child/main.tf @@ -0,0 +1,10 @@ +module "grandchild" { + source = "./grandchild" +} + +resource "aws_instance" "bar" { + grandchildid = "${module.grandchild.id}" +} + +output "id" { value = "${aws_instance.bar.id}" } +output "grandchild_id" { value = "${module.grandchild.id}" } diff --git a/terraform/test-fixtures/refresh-module-orphan/main.tf b/terraform/test-fixtures/refresh-module-orphan/main.tf new file mode 100644 index 000000000..244374d9d --- /dev/null +++ b/terraform/test-fixtures/refresh-module-orphan/main.tf @@ -0,0 +1,10 @@ +/* +module "child" { + source = "./child" +} + +resource "aws_instance" "bar" { + childid = "${module.child.id}" + grandchildid = "${module.child.grandchild_id}" +} +*/