diff --git a/internal/lang/globalref/analyzer.go b/internal/lang/globalref/analyzer.go new file mode 100644 index 000000000..7a24d781e --- /dev/null +++ b/internal/lang/globalref/analyzer.go @@ -0,0 +1,68 @@ +package globalref + +import ( + "fmt" + + "github.com/hashicorp/terraform/internal/addrs" + "github.com/hashicorp/terraform/internal/configs" + "github.com/hashicorp/terraform/internal/providers" +) + +// Analyzer is the main component of this package, serving as a container for +// various state that the analysis algorithms depend on either for their core +// functionality or for producing results more quickly. +// +// Global reference analysis is currently intended only for "best effort" +// use-cases related to giving hints to the user or tailoring UI output. +// Avoid using it for anything that would cause changes to the analyzer being +// considered a breaking change under the v1 compatibility promises, because +// we expect to continue to refine and evolve these rules over time in ways +// that may cause us to detect either more or fewer references than today. +// Typically we will conservatively return more references than would be +// necessary dynamically, but that isn't guaranteed for all situations. +// +// In particular, we currently typically don't distinguish between multiple +// instances of the same module, and so we overgeneralize references from +// one instance of a module as references from the same location in all +// instances of that module. We may make this more precise in future, which +// would then remove various detected references from the analysis results. +// +// Each Analyzer works with a particular configs.Config object which it assumes +// represents the root module of a configuration. Config objects are typically +// immutable by convention anyway, but it's particularly important not to +// modify a configuration while it's attached to a live Analyzer, because +// the Analyzer contains caches derived from data in the configuration tree. +type Analyzer struct { + cfg *configs.Config + providerSchemas map[addrs.Provider]*providers.Schemas +} + +// NewAnalyzer constructs a new analyzer bound to the given configuration and +// provider schemas. +// +// The given object must represent a root module, or this function will panic. +// +// The given provider schemas must cover at least all of the providers used +// in the given configuration. If not then analysis results will be silently +// incomplete for any decision that requires checking schema. +func NewAnalyzer(cfg *configs.Config, providerSchemas map[addrs.Provider]*providers.Schemas) *Analyzer { + if !cfg.Path.IsRoot() { + panic(fmt.Sprintf("constructing an Analyzer with non-root module %s", cfg.Path)) + } + + ret := &Analyzer{ + cfg: cfg, + providerSchemas: providerSchemas, + } + return ret +} + +// ModuleConfig retrieves a module configuration from the configuration the +// analyzer belongs to, or nil if there is no module with the given address. +func (a *Analyzer) ModuleConfig(addr addrs.ModuleInstance) *configs.Module { + modCfg := a.cfg.DescendentForInstance(addr) + if modCfg == nil { + return nil + } + return modCfg.Module +} diff --git a/internal/lang/globalref/analyzer_contributing_resources.go b/internal/lang/globalref/analyzer_contributing_resources.go new file mode 100644 index 000000000..4024bafd0 --- /dev/null +++ b/internal/lang/globalref/analyzer_contributing_resources.go @@ -0,0 +1,130 @@ +package globalref + +import ( + "sort" + + "github.com/hashicorp/terraform/internal/addrs" +) + +// ContributingResources analyzes all of the given references and +// for each one tries to walk backwards through any named values to find all +// resources whose values contributed either directly or indirectly to any of +// them. +// +// This is a wrapper around ContributingResourceReferences which simplifies +// the result to only include distinct resource addresses, not full references. +// If the configuration includes several different references to different +// parts of a resource, ContributingResources will not preserve that detail. +func (a *Analyzer) ContributingResources(refs ...Reference) []addrs.AbsResource { + retRefs := a.ContributingResourceReferences(refs...) + if len(retRefs) == 0 { + return nil + } + + uniq := make(map[string]addrs.AbsResource, len(refs)) + for _, ref := range retRefs { + if addr, ok := resourceForAddr(ref.LocalRef.Subject); ok { + moduleAddr := ref.ModuleAddr() + absAddr := addr.Absolute(moduleAddr) + uniq[absAddr.String()] = absAddr + } + } + ret := make([]addrs.AbsResource, 0, len(uniq)) + for _, addr := range uniq { + ret = append(ret, addr) + } + sort.Slice(ret, func(i, j int) bool { + // We only have a sorting function for resource _instances_, but + // it'll do well enough if we just pretend we have no-key instances. + return ret[i].Instance(addrs.NoKey).Less(ret[j].Instance(addrs.NoKey)) + }) + return ret +} + +// ContributingResourceReferences analyzes all of the given references and +// for each one tries to walk backwards through any named values to find all +// references to resource attributes that contributed either directly or +// indirectly to any of them. +// +// This is a global operation that can be potentially quite expensive for +// complex configurations. +func (a *Analyzer) ContributingResourceReferences(refs ...Reference) []Reference { + // Our methodology here is to keep digging through MetaReferences + // until we've visited everything we encounter directly or indirectly, + // and keep track of any resources we find along the way. + + // We'll aggregate our result here, using the string representations of + // the resources as keys to avoid returning the same one more than once. + found := make(map[referenceAddrKey]Reference) + + // We might encounter the same object multiple times as we walk, + // but we won't learn anything more by traversing them again and so we'll + // just skip them instead. + visitedObjects := make(map[referenceAddrKey]struct{}) + + // A queue of objects we still need to visit. + // Note that if we find multiple references to the same object then we'll + // just arbitrary choose any one of them, because for our purposes here + // it's immaterial which reference we actually followed. + pendingObjects := make(map[referenceAddrKey]Reference) + + // Initial state: identify any directly-mentioned resources and + // queue up any named values we refer to. + for _, ref := range refs { + if _, ok := resourceForAddr(ref.LocalRef.Subject); ok { + found[ref.addrKey()] = ref + } + pendingObjects[ref.addrKey()] = ref + } + + for len(pendingObjects) > 0 { + // Note: we modify this map while we're iterating over it, which means + // that anything we add might be either visited within a later + // iteration of the inner loop or in a later iteration of the outer + // loop, but we get the correct result either way because we keep + // working until we've fully depleted the queue. + for key, ref := range pendingObjects { + delete(pendingObjects, key) + + // We do this _before_ the visit below just in case this is an + // invalid config with a self-referential local value, in which + // case we'll just silently ignore the self reference for our + // purposes here, and thus still eventually converge (albeit + // with an incomplete answer). + visitedObjects[key] = struct{}{} + + moreRefs := a.MetaReferences(ref) + for _, newRef := range moreRefs { + if _, ok := resourceForAddr(newRef.LocalRef.Subject); ok { + found[newRef.addrKey()] = newRef + } + + newKey := newRef.addrKey() + if _, visited := visitedObjects[newKey]; !visited { + pendingObjects[newKey] = newRef + } + } + } + } + + if len(found) == 0 { + return nil + } + + ret := make([]Reference, 0, len(found)) + for _, ref := range found { + ret = append(ret, ref) + } + return ret +} + +func resourceForAddr(addr addrs.Referenceable) (addrs.Resource, bool) { + switch addr := addr.(type) { + case addrs.Resource: + return addr, true + case addrs.ResourceInstance: + return addr.Resource, true + default: + return addrs.Resource{}, false + } +} diff --git a/internal/lang/globalref/analyzer_contributing_resources_test.go b/internal/lang/globalref/analyzer_contributing_resources_test.go new file mode 100644 index 000000000..038b3ed54 --- /dev/null +++ b/internal/lang/globalref/analyzer_contributing_resources_test.go @@ -0,0 +1,96 @@ +package globalref + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/hashicorp/terraform/internal/addrs" +) + +func TestAnalyzerContributingResources(t *testing.T) { + azr := testAnalyzer(t, "contributing-resources") + + tests := map[string]struct { + StartRefs func() []Reference + WantAddrs []string + }{ + "root output 'network'": { + func() []Reference { + return azr.ReferencesFromOutputValue( + addrs.OutputValue{Name: "network"}.Absolute(addrs.RootModuleInstance), + ) + }, + []string{ + `data.test_thing.environment`, + `module.network.test_thing.subnet`, + `module.network.test_thing.vpc`, + }, + }, + "root output 'c10s_url'": { + func() []Reference { + return azr.ReferencesFromOutputValue( + addrs.OutputValue{Name: "c10s_url"}.Absolute(addrs.RootModuleInstance), + ) + }, + []string{ + `data.test_thing.environment`, + `module.compute.test_thing.load_balancer`, + `module.network.test_thing.subnet`, + `module.network.test_thing.vpc`, + + // NOTE: module.compute.test_thing.controller isn't here + // because we can see statically that the output value refers + // only to the "string" attribute of + // module.compute.test_thing.load_balancer , and so we + // don't consider references inside the "list" blocks. + }, + }, + "module.compute.test_thing.load_balancer": { + func() []Reference { + return azr.ReferencesFromResourceInstance( + addrs.Resource{ + Mode: addrs.ManagedResourceMode, + Type: "test_thing", + Name: "load_balancer", + }.Instance(addrs.NoKey).Absolute(addrs.RootModuleInstance.Child("compute", addrs.NoKey)), + ) + }, + []string{ + `data.test_thing.environment`, + `module.compute.test_thing.controller`, + `module.network.test_thing.subnet`, + `module.network.test_thing.vpc`, + }, + }, + "data.test_thing.environment": { + func() []Reference { + return azr.ReferencesFromResourceInstance( + addrs.Resource{ + Mode: addrs.DataResourceMode, + Type: "test_thing", + Name: "environment", + }.Instance(addrs.NoKey).Absolute(addrs.RootModuleInstance), + ) + }, + []string{ + // Nothing! This one only refers to an input variable. + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + startRefs := test.StartRefs() + addrs := azr.ContributingResources(startRefs...) + + want := test.WantAddrs + got := make([]string, len(addrs)) + for i, addr := range addrs { + got[i] = addr.String() + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("wrong addresses\n%s", diff) + } + }) + } +} diff --git a/internal/lang/globalref/analyzer_meta_references.go b/internal/lang/globalref/analyzer_meta_references.go new file mode 100644 index 000000000..b7c6db22e --- /dev/null +++ b/internal/lang/globalref/analyzer_meta_references.go @@ -0,0 +1,586 @@ +package globalref + +import ( + "github.com/hashicorp/hcl/v2" + "github.com/hashicorp/terraform/internal/addrs" + "github.com/hashicorp/terraform/internal/configs/configschema" + "github.com/hashicorp/terraform/internal/lang" + "github.com/zclconf/go-cty/cty" + "github.com/zclconf/go-cty/cty/convert" + "github.com/zclconf/go-cty/cty/gocty" +) + +// MetaReferences inspects the configuration to find the references contained +// within the most specific object that the given address refers to. +// +// This finds only the direct references in that object, not any indirect +// references from those. This is a building block for some other Analyzer +// functions that can walk through multiple levels of reference. +// +// If the given reference refers to something that doesn't exist in the +// configuration we're analyzing then MetaReferences will return no +// meta-references at all, which is indistinguishable from an existing +// object that doesn't refer to anything. +func (a *Analyzer) MetaReferences(ref Reference) []Reference { + // This function is aiming to encapsulate the fact that a reference + // is actually quite a complex notion which includes both a specific + // object the reference is to, where each distinct object type has + // a very different representation in the configuration, and then + // also potentially an attribute or block within the definition of that + // object. Our goal is to make all of these different situations appear + // mostly the same to the caller, in that all of them can be reduced to + // a set of references regardless of which expression or expressions we + // derive those from. + + moduleAddr := ref.ModuleAddr() + remaining := ref.LocalRef.Remaining + + // Our first task then is to select an appropriate implementation based + // on which address type the reference refers to. + switch targetAddr := ref.LocalRef.Subject.(type) { + case addrs.InputVariable: + return a.metaReferencesInputVariable(moduleAddr, targetAddr, remaining) + case addrs.ModuleCallInstanceOutput: + return a.metaReferencesOutputValue(moduleAddr, targetAddr, remaining) + case addrs.ModuleCallInstance: + return a.metaReferencesModuleCall(moduleAddr, targetAddr, remaining) + case addrs.ModuleCall: + // TODO: It isn't really correct to say that a reference to a module + // call is a reference to its no-key instance. Really what we want to + // say here is that it's a reference to _all_ instances, or to an + // instance with an unknown key, but we don't have any representation + // of that. For the moment it's pretty immaterial since most of our + // other analysis ignores instance keys anyway, but maybe we'll revisit + // this latter to distingish these two cases better. + return a.metaReferencesModuleCall(moduleAddr, targetAddr.Instance(addrs.NoKey), remaining) + case addrs.CountAttr, addrs.ForEachAttr: + if resourceAddr, ok := ref.ResourceAddr(); ok { + return a.metaReferencesCountOrEach(resourceAddr) + } + return nil + case addrs.ResourceInstance: + return a.metaReferencesResourceInstance(moduleAddr, targetAddr, remaining) + case addrs.Resource: + // TODO: It isn't really correct to say that a reference to a resource + // is a reference to its no-key instance. Really what we want to say + // here is that it's a reference to _all_ instances, or to an instance + // with an unknown key, but we don't have any representation of that. + // For the moment it's pretty immaterial since most of our other + // analysis ignores instance keys anyway, but maybe we'll revisit this + // latter to distingish these two cases better. + return a.metaReferencesResourceInstance(moduleAddr, targetAddr.Instance(addrs.NoKey), remaining) + default: + // For anything we don't explicitly support we'll just return no + // references. This includes the reference types that don't really + // refer to configuration objects at all, like "path.module", + // and so which cannot possibly generate any references. + return nil + } +} + +func (a *Analyzer) metaReferencesInputVariable(calleeAddr addrs.ModuleInstance, addr addrs.InputVariable, remain hcl.Traversal) []Reference { + if calleeAddr.IsRoot() { + // A root module variable definition can never refer to anything, + // because it conceptually exists outside of any module. + return nil + } + + callerAddr, callAddr := calleeAddr.Call() + + // We need to find the module call inside the caller module. + callerCfg := a.ModuleConfig(callerAddr) + if callerCfg == nil { + return nil + } + call := callerCfg.ModuleCalls[callAddr.Name] + if call == nil { + return nil + } + + // Now we need to look for an attribute matching the variable name inside + // the module block body. + body := call.Config + schema := &hcl.BodySchema{ + Attributes: []hcl.AttributeSchema{ + {Name: addr.Name}, + }, + } + // We don't check for errors here because we'll make a best effort to + // analyze whatever partial result HCL is able to extract. + content, _, _ := body.PartialContent(schema) + attr := content.Attributes[addr.Name] + if attr == nil { + return nil + } + refs, _ := lang.ReferencesInExpr(attr.Expr) + return absoluteRefs(callerAddr, refs) +} + +func (a *Analyzer) metaReferencesOutputValue(callerAddr addrs.ModuleInstance, addr addrs.ModuleCallInstanceOutput, remain hcl.Traversal) []Reference { + calleeAddr := callerAddr.Child(addr.Call.Call.Name, addr.Call.Key) + + // We need to find the output value declaration inside the callee module. + calleeCfg := a.ModuleConfig(calleeAddr) + if calleeCfg == nil { + return nil + } + + oc := calleeCfg.Outputs[addr.Name] + if oc == nil { + return nil + } + + // We don't check for errors here because we'll make a best effort to + // analyze whatever partial result HCL is able to extract. + refs, _ := lang.ReferencesInExpr(oc.Expr) + return absoluteRefs(calleeAddr, refs) +} + +func (a *Analyzer) metaReferencesModuleCall(callerAddr addrs.ModuleInstance, addr addrs.ModuleCallInstance, remain hcl.Traversal) []Reference { + calleeAddr := callerAddr.Child(addr.Call.Name, addr.Key) + + // What we're really doing here is just rolling up all of the references + // from all of this module's output values. + calleeCfg := a.ModuleConfig(calleeAddr) + if calleeCfg == nil { + return nil + } + + var ret []Reference + for name := range calleeCfg.Outputs { + outputAddr := addrs.ModuleCallInstanceOutput{ + Call: addr, + Name: name, + } + moreRefs := a.metaReferencesOutputValue(callerAddr, outputAddr, nil) + ret = append(ret, moreRefs...) + } + return ret +} + +func (a *Analyzer) metaReferencesCountOrEach(resourceAddr addrs.AbsResource) []Reference { + return a.ReferencesFromResourceRepetition(resourceAddr) +} + +func (a *Analyzer) metaReferencesResourceInstance(moduleAddr addrs.ModuleInstance, addr addrs.ResourceInstance, remain hcl.Traversal) []Reference { + modCfg := a.ModuleConfig(moduleAddr) + if modCfg == nil { + return nil + } + + rc := modCfg.ResourceByAddr(addr.Resource) + if rc == nil { + return nil + } + + // In valid cases we should have the schema for this resource type + // available. In invalid cases we might be dealing with partial information, + // and so the schema might be nil so we won't be able to return reference + // information for this particular situation. + providerSchema := a.providerSchemas[rc.Provider] + if providerSchema == nil { + return nil + } + resourceTypeSchema, _ := providerSchema.SchemaForResourceAddr(addr.Resource) + if resourceTypeSchema == nil { + return nil + } + + // When analyzing the resource configuration to look for references, we'll + // make a best effort to narrow down to only a particular sub-portion of + // the configuration by following the remaining traversal steps. In the + // ideal case this will lead us to a specific expression, but as a + // compromise it might lead us to some nested blocks where at least we + // can limit our searching only to those. + bodies := []hcl.Body{rc.Config} + var exprs []hcl.Expression + schema := resourceTypeSchema + var steppingThrough *configschema.NestedBlock + var steppingThroughType string + nextStep := func(newBodies []hcl.Body, newExprs []hcl.Expression) { + // We append exprs but replace bodies because exprs represent extra + // expressions we collected on the path, such as dynamic block for_each, + // which can potentially contribute to the final evalcontext, but + // bodies never contribute any values themselves, and instead just + // narrow down where we're searching. + bodies = newBodies + exprs = append(exprs, newExprs...) + steppingThrough = nil + steppingThroughType = "" + // Caller must also update "schema" if necessary. + } + traverseInBlock := func(name string) ([]hcl.Body, []hcl.Expression) { + if attr := schema.Attributes[name]; attr != nil { + // When we reach a specific attribute we can't traverse any deeper, because attributes are the leaves of the schema. + schema = nil + return traverseAttr(bodies, name) + } else if blockType := schema.BlockTypes[name]; blockType != nil { + // We need to take a different action here depending on + // the nesting mode of the block type. Some require us + // to traverse in two steps in order to select a specific + // child block, while others we can just step through + // directly. + switch blockType.Nesting { + case configschema.NestingSingle, configschema.NestingGroup: + // There should be only zero or one blocks of this + // type, so we can traverse in only one step. + schema = &blockType.Block + return traverseNestedBlockSingle(bodies, name) + case configschema.NestingMap, configschema.NestingList, configschema.NestingSet: + steppingThrough = blockType + return bodies, exprs // Preserve current selections for the second step + default: + // The above should be exhaustive, but just in case + // we add something new in future we'll bail out + // here and conservatively return everything under + // the current traversal point. + schema = nil + return nil, nil + } + } + + // We'll get here if the given name isn't in the schema at all. If so, + // there's nothing else to be done here. + schema = nil + return nil, nil + } +Steps: + for _, step := range remain { + // If we filter out all of our bodies before we finish traversing then + // we know we won't find anything else, because all of our subsequent + // traversal steps won't have any bodies to search. + if len(bodies) == 0 { + return nil + } + // If we no longer have a schema then that suggests we've + // traversed as deep as what the schema covers (e.g. we reached + // a specific attribute) and so we'll stop early, assuming that + // any remaining steps are traversals into an attribute expression + // result. + if schema == nil { + break + } + + switch step := step.(type) { + + case hcl.TraverseAttr: + switch { + case steppingThrough != nil: + // If we're stepping through a NestingMap block then + // it's valid to use attribute syntax to select one of + // the blocks by its label. Other nesting types require + // TraverseIndex, so can never be valid. + if steppingThrough.Nesting != configschema.NestingMap { + nextStep(nil, nil) // bail out + continue + } + nextStep(traverseNestedBlockMap(bodies, steppingThroughType, step.Name)) + schema = &steppingThrough.Block + default: + nextStep(traverseInBlock(step.Name)) + if schema == nil { + // traverseInBlock determined that we've traversed as + // deep as we can with reference to schema, so we'll + // stop here and just process whatever's selected. + break Steps + } + } + case hcl.TraverseIndex: + switch { + case steppingThrough != nil: + switch steppingThrough.Nesting { + case configschema.NestingMap: + keyVal, err := convert.Convert(step.Key, cty.String) + if err != nil { // Invalid traversal, so can't have any refs + nextStep(nil, nil) // bail out + continue + } + nextStep(traverseNestedBlockMap(bodies, steppingThroughType, keyVal.AsString())) + schema = &steppingThrough.Block + case configschema.NestingList: + idxVal, err := convert.Convert(step.Key, cty.Number) + if err != nil { // Invalid traversal, so can't have any refs + nextStep(nil, nil) // bail out + continue + } + var idx int + err = gocty.FromCtyValue(idxVal, &idx) + if err != nil { // Invalid traversal, so can't have any refs + nextStep(nil, nil) // bail out + continue + } + nextStep(traverseNestedBlockList(bodies, steppingThroughType, idx)) + schema = &steppingThrough.Block + default: + // Note that NestingSet ends up in here because we don't + // actually allow traversing into set-backed block types, + // and so such a reference would be invalid. + nextStep(nil, nil) // bail out + continue + } + default: + // When indexing the contents of a block directly we always + // interpret the key as a string representing an attribute + // name. + nameVal, err := convert.Convert(step.Key, cty.String) + if err != nil { // Invalid traversal, so can't have any refs + nextStep(nil, nil) // bail out + continue + } + nextStep(traverseInBlock(nameVal.AsString())) + if schema == nil { + // traverseInBlock determined that we've traversed as + // deep as we can with reference to schema, so we'll + // stop here and just process whatever's selected. + break Steps + } + } + default: + // We shouldn't get here, because the above cases are exhaustive + // for all of the relative traversal types, but we'll be robust in + // case HCL adds more in future and just pretend the traversal + // ended a bit early if so. + break Steps + } + } + + if steppingThrough != nil { + // If we ended in the middle of "stepping through" then we'll conservatively + // use the bodies of _all_ nested blocks of the type we were stepping + // through, because the recipient of this value could refer to any + // of them dynamically. + var labelNames []string + if steppingThrough.Nesting == configschema.NestingMap { + labelNames = []string{"key"} + } + blocks := findBlocksInBodies(bodies, steppingThroughType, labelNames) + for _, block := range blocks { + bodies, exprs = blockParts(block) + } + } + + if len(bodies) == 0 && len(exprs) == 0 { + return nil + } + + var refs []*addrs.Reference + for _, expr := range exprs { + moreRefs, _ := lang.ReferencesInExpr(expr) + refs = append(refs, moreRefs...) + } + if schema != nil { + for _, body := range bodies { + moreRefs, _ := lang.ReferencesInBlock(body, schema) + refs = append(refs, moreRefs...) + } + } + return absoluteRefs(addr.Absolute(moduleAddr), refs) +} + +func traverseAttr(bodies []hcl.Body, name string) ([]hcl.Body, []hcl.Expression) { + if len(bodies) == 0 { + return nil, nil + } + schema := &hcl.BodySchema{ + Attributes: []hcl.AttributeSchema{ + {Name: name}, + }, + } + // We can find at most one expression per body, because attribute names + // are always unique within a body. + retExprs := make([]hcl.Expression, 0, len(bodies)) + for _, body := range bodies { + content, _, _ := body.PartialContent(schema) + if attr := content.Attributes[name]; attr != nil && attr.Expr != nil { + retExprs = append(retExprs, attr.Expr) + } + } + return nil, retExprs +} + +func traverseNestedBlockSingle(bodies []hcl.Body, typeName string) ([]hcl.Body, []hcl.Expression) { + if len(bodies) == 0 { + return nil, nil + } + + blocks := findBlocksInBodies(bodies, typeName, nil) + var retBodies []hcl.Body + var retExprs []hcl.Expression + for _, block := range blocks { + moreBodies, moreExprs := blockParts(block) + retBodies = append(retBodies, moreBodies...) + retExprs = append(retExprs, moreExprs...) + } + return retBodies, retExprs +} + +func traverseNestedBlockMap(bodies []hcl.Body, typeName string, key string) ([]hcl.Body, []hcl.Expression) { + if len(bodies) == 0 { + return nil, nil + } + + blocks := findBlocksInBodies(bodies, typeName, []string{"key"}) + var retBodies []hcl.Body + var retExprs []hcl.Expression + for _, block := range blocks { + switch block.Type { + case "dynamic": + // For dynamic blocks we allow the key to be chosen dynamically + // and so we'll just conservatively include all dynamic block + // bodies. However, we need to also look for references in some + // arguments of the dynamic block itself. + argExprs, contentBody := dynamicBlockParts(block.Body) + retExprs = append(retExprs, argExprs...) + if contentBody != nil { + retBodies = append(retBodies, contentBody) + } + case typeName: + if len(block.Labels) == 1 && block.Labels[0] == key && block.Body != nil { + retBodies = append(retBodies, block.Body) + } + } + } + return retBodies, retExprs +} + +func traverseNestedBlockList(bodies []hcl.Body, typeName string, idx int) ([]hcl.Body, []hcl.Expression) { + if len(bodies) == 0 { + return nil, nil + } + + schema := &hcl.BodySchema{ + Blocks: []hcl.BlockHeaderSchema{ + {Type: typeName, LabelNames: nil}, + {Type: "dynamic", LabelNames: []string{"type"}}, + }, + } + var retBodies []hcl.Body + var retExprs []hcl.Expression + for _, body := range bodies { + content, _, _ := body.PartialContent(schema) + blocks := content.Blocks + + // A tricky aspect of this scenario is that if there are any "dynamic" + // blocks then we can't statically predict how many concrete blocks they + // will generate, and so consequently we can't predict the indices of + // any statically-defined blocks that might appear after them. + firstDynamic := -1 // -1 means "no dynamic blocks" + for i, block := range blocks { + if block.Type == "dynamic" { + firstDynamic = i + break + } + } + + switch { + case firstDynamic >= 0 && idx >= firstDynamic: + // This is the unfortunate case where the selection could be + // any of the blocks from firstDynamic onwards, and so we + // need to conservatively include all of them in our result. + for _, block := range blocks[firstDynamic:] { + moreBodies, moreExprs := blockParts(block) + retBodies = append(retBodies, moreBodies...) + retExprs = append(retExprs, moreExprs...) + } + default: + // This is the happier case where we can select just a single + // static block based on idx. Note that this one is guaranteed + // to never be dynamic but we're using blockParts here just + // for consistency. + moreBodies, moreExprs := blockParts(blocks[idx]) + retBodies = append(retBodies, moreBodies...) + retExprs = append(retExprs, moreExprs...) + } + } + + return retBodies, retExprs +} + +func findBlocksInBodies(bodies []hcl.Body, typeName string, labelNames []string) []*hcl.Block { + // We need to look for both static blocks of the given type, and any + // dynamic blocks whose label gives the expected type name. + schema := &hcl.BodySchema{ + Blocks: []hcl.BlockHeaderSchema{ + {Type: typeName, LabelNames: labelNames}, + {Type: "dynamic", LabelNames: []string{"type"}}, + }, + } + var blocks []*hcl.Block + for _, body := range bodies { + // We ignore errors here because we'll just make a best effort to analyze + // whatever partial result HCL returns in that case. + content, _, _ := body.PartialContent(schema) + + for _, block := range content.Blocks { + switch block.Type { + case "dynamic": + if len(block.Labels) != 1 { // Invalid + continue + } + if block.Labels[0] == typeName { + blocks = append(blocks, block) + } + case typeName: + blocks = append(blocks, block) + } + } + } + + // NOTE: The caller still needs to check for dynamic vs. static in order + // to do further processing. The callers above all aim to encapsulate + // that. + return blocks +} + +func blockParts(block *hcl.Block) ([]hcl.Body, []hcl.Expression) { + switch block.Type { + case "dynamic": + exprs, contentBody := dynamicBlockParts(block.Body) + var bodies []hcl.Body + if contentBody != nil { + bodies = []hcl.Body{contentBody} + } + return bodies, exprs + default: + if block.Body == nil { + return nil, nil + } + return []hcl.Body{block.Body}, nil + } +} + +func dynamicBlockParts(body hcl.Body) ([]hcl.Expression, hcl.Body) { + if body == nil { + return nil, nil + } + + // This is a subset of the "dynamic" block schema defined by the HCL + // dynblock extension, covering only the two arguments that are allowed + // to be arbitrary expressions possibly referring elsewhere. + schema := &hcl.BodySchema{ + Attributes: []hcl.AttributeSchema{ + {Name: "for_each"}, + {Name: "labels"}, + }, + Blocks: []hcl.BlockHeaderSchema{ + {Type: "content"}, + }, + } + content, _, _ := body.PartialContent(schema) + var exprs []hcl.Expression + if len(content.Attributes) != 0 { + exprs = make([]hcl.Expression, 0, len(content.Attributes)) + } + for _, attr := range content.Attributes { + if attr.Expr != nil { + exprs = append(exprs, attr.Expr) + } + } + var contentBody hcl.Body + for _, block := range content.Blocks { + if block != nil && block.Type == "content" && block.Body != nil { + contentBody = block.Body + } + } + return exprs, contentBody +} diff --git a/internal/lang/globalref/analyzer_meta_references_shortcuts.go b/internal/lang/globalref/analyzer_meta_references_shortcuts.go new file mode 100644 index 000000000..acfaa904c --- /dev/null +++ b/internal/lang/globalref/analyzer_meta_references_shortcuts.go @@ -0,0 +1,87 @@ +package globalref + +import ( + "fmt" + + "github.com/hashicorp/terraform/internal/addrs" + "github.com/hashicorp/terraform/internal/lang" +) + +// ReferencesFromOutputValue returns all of the direct references from the +// value expression of the given output value. It doesn't include any indirect +// references. +func (a *Analyzer) ReferencesFromOutputValue(addr addrs.AbsOutputValue) []Reference { + mc := a.ModuleConfig(addr.Module) + if mc == nil { + return nil + } + oc := mc.Outputs[addr.OutputValue.Name] + if oc == nil { + return nil + } + refs, _ := lang.ReferencesInExpr(oc.Expr) + return absoluteRefs(addr.Module, refs) +} + +// ReferencesFromResource returns all of the direct references from the +// definition of the resource instance at the given address. It doesn't +// include any indirect references. +// +// The result doesn't directly include references from a "count" or "for_each" +// expression belonging to the associated resource, but it will include any +// references to count.index, each.key, or each.value that appear in the +// expressions which you can then, if you wish, resolve indirectly using +// Analyzer.MetaReferences. Alternatively, you can use +// Analyzer.ReferencesFromResourceRepetition to get that same result directly. +func (a *Analyzer) ReferencesFromResourceInstance(addr addrs.AbsResourceInstance) []Reference { + // Using MetaReferences for this is kinda overkill, since + // lang.ReferencesInBlock would be sufficient really, but + // this ensures we keep consistent in how we build the + // resulting absolute references and otherwise aside from + // some extra overhead this call boils down to a call to + // lang.ReferencesInBlock anyway. + fakeRef := Reference{ + ContainerAddr: addr.Module, + LocalRef: &addrs.Reference{ + Subject: addr.Resource, + }, + } + return a.MetaReferences(fakeRef) +} + +// ReferencesFromResourceRepetition returns the references from the given +// resource's for_each or count expression, or an empty set if the resource +// doesn't use repetition. +// +// This is a special-case sort of helper for use in situations where an +// expression might refer to count.index, each.key, or each.value, and thus +// we say that it depends indirectly on the repetition expression. +func (a *Analyzer) ReferencesFromResourceRepetition(addr addrs.AbsResource) []Reference { + modCfg := a.ModuleConfig(addr.Module) + if modCfg == nil { + return nil + } + rc := modCfg.ResourceByAddr(addr.Resource) + if rc == nil { + return nil + } + + // We're assuming here that resources can either have count or for_each, + // but never both, because that's a requirement enforced by the language + // decoder. But we'll assert it just to make sure we catch it if that + // changes for some reason. + if rc.ForEach != nil && rc.Count != nil { + panic(fmt.Sprintf("%s has both for_each and count", addr)) + } + + switch { + case rc.ForEach != nil: + refs, _ := lang.ReferencesInExpr(rc.ForEach) + return absoluteRefs(addr.Module, refs) + case rc.Count != nil: + refs, _ := lang.ReferencesInExpr(rc.Count) + return absoluteRefs(addr.Module, refs) + default: + return nil + } +} diff --git a/internal/lang/globalref/analyzer_meta_references_test.go b/internal/lang/globalref/analyzer_meta_references_test.go new file mode 100644 index 000000000..340e8760f --- /dev/null +++ b/internal/lang/globalref/analyzer_meta_references_test.go @@ -0,0 +1,163 @@ +package globalref + +import ( + "sort" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/hashicorp/terraform/internal/addrs" +) + +func TestAnalyzerMetaReferences(t *testing.T) { + tests := []struct { + InputContainer string + InputRef string + WantRefs []string + }{ + { + ``, + `local.a`, + nil, + }, + { + ``, + `test_thing.single`, + []string{ + "::local.a", + "::local.b", + }, + }, + { + ``, + `test_thing.single.string`, + []string{ + "::local.a", + }, + }, + { + ``, + `test_thing.for_each`, + []string{ + "::local.a", + "::test_thing.single.string", + }, + }, + { + ``, + `test_thing.for_each["whatever"]`, + []string{ + "::local.a", + "::test_thing.single.string", + }, + }, + { + ``, + `test_thing.for_each["whatever"].single`, + []string{ + "::test_thing.single.string", + }, + }, + { + ``, + `test_thing.for_each["whatever"].single.z`, + []string{ + "::test_thing.single.string", + }, + }, + { + ``, + `test_thing.count`, + []string{ + "::local.a", + }, + }, + { + ``, + `test_thing.count[0]`, + []string{ + "::local.a", + }, + }, + { + ``, + `module.single.a`, + []string{ + "module.single::test_thing.foo", + "module.single::var.a", + }, + }, + { + ``, + `module.for_each["whatever"].a`, + []string{ + `module.for_each["whatever"]::test_thing.foo`, + `module.for_each["whatever"]::var.a`, + }, + }, + { + ``, + `module.count[0].a`, + []string{ + `module.count[0]::test_thing.foo`, + `module.count[0]::var.a`, + }, + }, + { + `module.single`, + `var.a`, + []string{ + "::test_thing.single", + }, + }, + { + `module.single`, + `test_thing.foo`, + []string{ + "module.single::var.a", + }, + }, + } + + azr := testAnalyzer(t, "assorted") + + for _, test := range tests { + name := test.InputRef + if test.InputContainer != "" { + name = test.InputContainer + " " + test.InputRef + } + t.Run(name, func(t *testing.T) { + t.Logf("testing %s", name) + var containerAddr addrs.Targetable + containerAddr = addrs.RootModuleInstance + if test.InputContainer != "" { + moduleAddrTarget, diags := addrs.ParseTargetStr(test.InputContainer) + if diags.HasErrors() { + t.Fatalf("input module address is invalid: %s", diags.Err()) + } + containerAddr = moduleAddrTarget.Subject + } + + localRef, diags := addrs.ParseRefStr(test.InputRef) + if diags.HasErrors() { + t.Fatalf("input reference is invalid: %s", diags.Err()) + } + + ref := Reference{ + ContainerAddr: containerAddr, + LocalRef: localRef, + } + + refs := azr.MetaReferences(ref) + + want := test.WantRefs + var got []string + for _, ref := range refs { + got = append(got, ref.DebugString()) + } + sort.Strings(got) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("wrong references\n%s", diff) + } + }) + } +} diff --git a/internal/lang/globalref/analyzer_test.go b/internal/lang/globalref/analyzer_test.go new file mode 100644 index 000000000..0a66217e7 --- /dev/null +++ b/internal/lang/globalref/analyzer_test.go @@ -0,0 +1,98 @@ +package globalref + +import ( + "context" + "path/filepath" + "testing" + + "github.com/hashicorp/terraform/internal/addrs" + "github.com/hashicorp/terraform/internal/configs/configload" + "github.com/hashicorp/terraform/internal/configs/configschema" + "github.com/hashicorp/terraform/internal/initwd" + "github.com/hashicorp/terraform/internal/providers" + "github.com/hashicorp/terraform/internal/registry" + "github.com/zclconf/go-cty/cty" +) + +func testAnalyzer(t *testing.T, fixtureName string) *Analyzer { + configDir := filepath.Join("testdata", fixtureName) + + loader, cleanup := configload.NewLoaderForTests(t) + defer cleanup() + + inst := initwd.NewModuleInstaller(loader.ModulesDir(), registry.NewClient(nil, nil)) + _, instDiags := inst.InstallModules(context.Background(), configDir, true, initwd.ModuleInstallHooksImpl{}) + if instDiags.HasErrors() { + t.Fatalf("unexpected module installation errors: %s", instDiags.Err().Error()) + } + if err := loader.RefreshModules(); err != nil { + t.Fatalf("failed to refresh modules after install: %s", err) + } + + cfg, loadDiags := loader.LoadConfig(configDir) + if loadDiags.HasErrors() { + t.Fatalf("unexpected configuration errors: %s", loadDiags.Error()) + } + + resourceTypeSchema := &configschema.Block{ + Attributes: map[string]*configschema.Attribute{ + "string": {Type: cty.String, Optional: true}, + "number": {Type: cty.Number, Optional: true}, + "any": {Type: cty.DynamicPseudoType, Optional: true}, + }, + BlockTypes: map[string]*configschema.NestedBlock{ + "single": { + Nesting: configschema.NestingSingle, + Block: configschema.Block{ + Attributes: map[string]*configschema.Attribute{ + "z": {Type: cty.String, Optional: true}, + }, + }, + }, + "group": { + Nesting: configschema.NestingGroup, + Block: configschema.Block{ + Attributes: map[string]*configschema.Attribute{ + "z": {Type: cty.String, Optional: true}, + }, + }, + }, + "list": { + Nesting: configschema.NestingList, + Block: configschema.Block{ + Attributes: map[string]*configschema.Attribute{ + "z": {Type: cty.String, Optional: true}, + }, + }, + }, + "map": { + Nesting: configschema.NestingMap, + Block: configschema.Block{ + Attributes: map[string]*configschema.Attribute{ + "z": {Type: cty.String, Optional: true}, + }, + }, + }, + "set": { + Nesting: configschema.NestingSet, + Block: configschema.Block{ + Attributes: map[string]*configschema.Attribute{ + "z": {Type: cty.String, Optional: true}, + }, + }, + }, + }, + } + schemas := map[addrs.Provider]*providers.Schemas{ + addrs.MustParseProviderSourceString("hashicorp/test"): { + ResourceTypes: map[string]*configschema.Block{ + "test_thing": resourceTypeSchema, + }, + DataSources: map[string]*configschema.Block{ + "test_thing": resourceTypeSchema, + }, + }, + } + + return NewAnalyzer(cfg, schemas) +} diff --git a/internal/lang/globalref/doc.go b/internal/lang/globalref/doc.go new file mode 100644 index 000000000..133a9e7f2 --- /dev/null +++ b/internal/lang/globalref/doc.go @@ -0,0 +1,9 @@ +// Package globalref is home to some analysis algorithms that aim to answer +// questions about references between objects and object attributes across +// an entire configuration. +// +// This is a different problem than references within a single module, which +// we handle using some relatively simpler functions in the "lang" package +// in the parent directory. The globalref algorithms are often implemented +// in terms of those module-local reference-checking functions. +package globalref diff --git a/internal/lang/globalref/reference.go b/internal/lang/globalref/reference.go new file mode 100644 index 000000000..71920c304 --- /dev/null +++ b/internal/lang/globalref/reference.go @@ -0,0 +1,136 @@ +package globalref + +import ( + "fmt" + + "github.com/hashicorp/terraform/internal/addrs" +) + +// Reference combines an addrs.Reference with the address of the module +// instance or resource instance where it was found. +// +// Because of the design of the Terraform language, our main model of +// references only captures the module-local part of the reference and assumes +// that it's always clear from context which module a reference belongs to. +// That's not true for globalref because our whole purpose is to work across +// module boundaries, and so this package in particular has its own +// representation of references. +type Reference struct { + // ContainerAddr is always either addrs.ModuleInstance or + // addrs.AbsResourceInstance. The latter is required if LocalRef's + // subject is either an addrs.CountAddr or addrs.ForEachAddr, so + // we can know which resource's repetition expression it's + // referring to. + ContainerAddr addrs.Targetable + + // LocalRef is a reference that would be resolved in the context + // of the module instance or resource instance given in ContainerAddr. + LocalRef *addrs.Reference +} + +func absoluteRef(containerAddr addrs.Targetable, localRef *addrs.Reference) Reference { + ret := Reference{ + ContainerAddr: containerAddr, + LocalRef: localRef, + } + // For simplicity's sake, we always reduce the ContainerAddr to be + // just the module address unless it's a count.index, each.key, or + // each.value reference, because for anything else it's immaterial + // which resource it belongs to. + switch localRef.Subject.(type) { + case addrs.CountAttr, addrs.ForEachAttr: + // nothing to do + default: + ret.ContainerAddr = ret.ModuleAddr() + } + return ret +} + +func absoluteRefs(containerAddr addrs.Targetable, refs []*addrs.Reference) []Reference { + if len(refs) == 0 { + return nil + } + + ret := make([]Reference, len(refs)) + for i, ref := range refs { + ret[i] = absoluteRef(containerAddr, ref) + } + return ret +} + +// ModuleAddr returns the address of the module where the reference would +// be resolved. +// +// This is either ContainerAddr directly if it's already just a module +// instance, or the module instance part of it if it's a resource instance. +func (r Reference) ModuleAddr() addrs.ModuleInstance { + switch addr := r.ContainerAddr.(type) { + case addrs.ModuleInstance: + return addr + case addrs.AbsResourceInstance: + return addr.Module + default: + // NOTE: We're intentionally using only a subset of possible + // addrs.Targetable implementations here, so anything else + // is invalid. + panic(fmt.Sprintf("reference has invalid container address type %T", addr)) + } +} + +// ResourceAddr returns the address of the resource where the reference +// would be resolved, if there is one. +// +// Because not all references belong to resources, the extra boolean return +// value indicates whether the returned address is valid. +func (r Reference) ResourceAddr() (addrs.AbsResource, bool) { + switch addr := r.ContainerAddr.(type) { + case addrs.ModuleInstance: + return addrs.AbsResource{}, false + case addrs.AbsResourceInstance: + return addr.ContainingResource(), true + default: + // NOTE: We're intentionally using only a subset of possible + // addrs.Targetable implementations here, so anything else + // is invalid. + panic(fmt.Sprintf("reference has invalid container address type %T", addr)) + } +} + +// DebugString returns an internal (but still somewhat Terraform-language-like) +// compact string representation of the reciever, which isn't an address that +// any of our usual address parsers could accept but still captures the +// essence of what the reference represents. +// +// The DebugString result is not suitable for end-user-oriented messages. +// +// DebugString is also not suitable for use as a unique key for a reference, +// because it's ambiguous (between a no-key resource instance and a resource) +// and because it discards the source location information in the LocalRef. +func (r Reference) DebugString() string { + // As the doc comment insinuates, we don't have any real syntax for + // "absolute references": references are always local, and targets are + // always absolute but only include modules and resources. + return r.ContainerAddr.String() + "::" + r.LocalRef.DisplayString() +} + +// addrKey returns the referenceAddrKey value for the item that +// this reference refers to, discarding any source location information. +// +// See the referenceAddrKey doc comment for more information on what this +// is suitable for. +func (r Reference) addrKey() referenceAddrKey { + // This is a pretty arbitrary bunch of stuff. We include the type here + // just to differentiate between no-key resource instances and resources. + return referenceAddrKey(fmt.Sprintf("%s(%T)%s", r.ContainerAddr.String(), r.LocalRef.Subject, r.LocalRef.DisplayString())) +} + +// referenceAddrKey is a special string type which conventionally contains +// a unique string representation of the object that a reference refers to, +// although not of the reference itself because it ignores the information +// that would differentiate two different references to the same object. +// +// The actual content of a referenceAddrKey is arbitrary, for internal use +// only. and subject to change in future. We use a named type here only to +// make it easier to see when we're intentionally using strings to uniquely +// identify absolute reference addresses. +type referenceAddrKey string diff --git a/internal/lang/globalref/testdata/assorted/assorted-root.tf b/internal/lang/globalref/testdata/assorted/assorted-root.tf new file mode 100644 index 000000000..d61297232 --- /dev/null +++ b/internal/lang/globalref/testdata/assorted/assorted-root.tf @@ -0,0 +1,47 @@ + +locals { + a = "hello world" + b = 2 +} + +resource "test_thing" "single" { + string = local.a + number = local.b + +} + +resource "test_thing" "for_each" { + for_each = {"q": local.a} + + string = local.a + + single { + z = test_thing.single.string + } +} + +resource "test_thing" "count" { + for_each = length(local.a) + + string = local.a +} + +module "single" { + source = "./child" + + a = test_thing.single +} + +module "for_each" { + source = "./child" + for_each = {"q": test_thing.single} + + a = test_thing.single +} + +module "count" { + source = "./child" + count = length(test_thing.single.string) + + a = test_thing.single +} diff --git a/internal/lang/globalref/testdata/assorted/child/assorted-child.tf b/internal/lang/globalref/testdata/assorted/child/assorted-child.tf new file mode 100644 index 000000000..e722fe8e1 --- /dev/null +++ b/internal/lang/globalref/testdata/assorted/child/assorted-child.tf @@ -0,0 +1,13 @@ +variable "a" { +} + +resource "test_thing" "foo" { + string = var.a +} + +output "a" { + value = { + a = var.a + foo = test_thing.foo + } +} diff --git a/internal/lang/globalref/testdata/contributing-resources/compute/contributing-resources-compute.tf b/internal/lang/globalref/testdata/contributing-resources/compute/contributing-resources-compute.tf new file mode 100644 index 000000000..c83daffe8 --- /dev/null +++ b/internal/lang/globalref/testdata/contributing-resources/compute/contributing-resources-compute.tf @@ -0,0 +1,51 @@ +variable "network" { + type = object({ + vpc_id = string + subnet_ids = map(string) + }) +} + +resource "test_thing" "controller" { + for_each = var.network.subnet_ids + + string = each.value +} + +locals { + workers = flatten([ + for k, id in var.network_subnet_ids : [ + for n in range(3) : { + unique_key = "${k}:${n}" + subnet_id = n + } + ] + ]) +} + +resource "test_thing" "worker" { + for_each = { for o in local.workers : o.unique_key => o.subnet_id } + + string = each.value + + dynamic "list" { + for_each = test_thing.controller + content { + z = list.value.string + } + } +} + +resource "test_thing" "load_balancer" { + string = var.network.vpc_id + + dynamic "list" { + for_each = test_thing.controller + content { + z = list.value.string + } + } +} + +output "compuneetees_api_url" { + value = test_thing.load_balancer.string +} diff --git a/internal/lang/globalref/testdata/contributing-resources/contributing-resources-root.tf b/internal/lang/globalref/testdata/contributing-resources/contributing-resources-root.tf new file mode 100644 index 000000000..d6ec5c481 --- /dev/null +++ b/internal/lang/globalref/testdata/contributing-resources/contributing-resources-root.tf @@ -0,0 +1,28 @@ +variable "environment" { + type = string +} + +data "test_thing" "environment" { + string = var.environment +} + +module "network" { + source = "./network" + + base_cidr_block = data.test_thing.environment.any.base_cidr_block + subnet_count = data.test_thing.environment.any.subnet_count +} + +module "compute" { + source = "./compute" + + network = module.network +} + +output "network" { + value = module.network +} + +output "c10s_url" { + value = module.compute.compuneetees_api_url +} diff --git a/internal/lang/globalref/testdata/contributing-resources/network/contributing-resources-network.tf b/internal/lang/globalref/testdata/contributing-resources/network/contributing-resources-network.tf new file mode 100644 index 000000000..3a4c9dc1d --- /dev/null +++ b/internal/lang/globalref/testdata/contributing-resources/network/contributing-resources-network.tf @@ -0,0 +1,41 @@ +variable "base_cidr_block" { + type = string +} + +variable "subnet_count" { + type = number +} + +locals { + subnet_newbits = log(var.subnet_count, 2) + subnet_cidr_blocks = toset([ + for n in range(var.subnet_count) : cidrsubnet(var.base_cidr_block, local.subnet_newbits, n) + ]) +} + +resource "test_thing" "vpc" { + string = var.base_cidr_block +} + +resource "test_thing" "subnet" { + for_each = local.subnet_cidr_blocks + + string = test_thing.vpc.string + single { + z = each.value + } +} + +resource "test_thing" "route_table" { + for_each = local.subnet_cidr_blocks + + string = each.value +} + +output "vpc_id" { + value = test_thing.vpc.string +} + +output "subnet_ids" { + value = { for k, sn in test_thing.subnet : k => sn.string } +}