From 86f0b5191c9083569e75f572eefd0920f497dab1 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Fri, 14 Feb 2020 18:10:03 -0800 Subject: [PATCH] addrs: Stronger validation and normalization of provider namespace/type The provider FQN is becoming our primary identifier for a provider, so it's important that we are clear about the equality rules for these addresses and what characters are valid within them. We previously had a basic regex permitting ASCII letters and digits for validation and no normalization at all. We need to do at least case folding and UTF-8 normalization because these names will appear in file and directory names in case-insensitive filesystems and in repository names such as on GitHub. Since we're already using DNS-style normalization and validation rules for the hostname part, rather than defining an entirely new set of rules here we'll just treat the provider namespace and type as if they were single labels in a DNS name. Aside from some internal consistency, that also works out nicely because systems like GitHub use organization and repository names as part of hostnames (e.g. with GitHub Pages) and so tend to apply comparable constraints themselves. This introduces the possibility of names containing letters from alphabets other than the latin alphabet, and for latin letters with diacritics. That's consistent with our introduction of similar support for identifiers in the language in Terraform 0.12, and is intended to be more friendly to Terraform users throughout the world that might prefer to name their products using a different alphabet. This is also a further justification for using the DNS normalization rules: modern companies tend to choose product names that make good domain names, and now such names will be usable as Terraform provider names too. --- addrs/provider.go | 188 +++++++++++++++--- addrs/provider_test.go | 178 +++++++++++++++++ configs/config_test.go | 4 +- configs/module_test.go | 8 +- .../testdata/providers-explicit-fqn/root.tf | 2 +- internal/earlyconfig/config.go | 22 +- terraform/module_dependencies.go | 9 +- 7 files changed, 369 insertions(+), 42 deletions(-) diff --git a/addrs/provider.go b/addrs/provider.go index 5f1960c5f..617596436 100644 --- a/addrs/provider.go +++ b/addrs/provider.go @@ -2,9 +2,10 @@ package addrs import ( "fmt" - "regexp" "strings" + "golang.org/x/net/idna" + "github.com/hashicorp/hcl/v2" svchost "github.com/hashicorp/terraform-svchost" "github.com/hashicorp/terraform/tfdiags" @@ -18,22 +19,56 @@ type Provider struct { Hostname svchost.Hostname } -const DefaultRegistryHost = "registry.terraform.io" +// DefaultRegistryHost is the hostname used for provider addresses that do +// not have an explicit hostname. +const DefaultRegistryHost = svchost.Hostname("registry.terraform.io") -var ( - ValidProviderName = regexp.MustCompile("^[a-zA-Z0-9_-]+$") -) +// LegacyProviderNamespace is the special string used in the Namespace field +// of type Provider to mark a legacy provider address. This special namespace +// value would normally be invalid, and can be used only when the hostname is +// DefaultRegistryHost because that host owns the mapping from legacy name to +// FQN. +const LegacyProviderNamespace = "-" // String returns an FQN string, indended for use in output. func (pt Provider) String() string { + if pt.IsZero() { + panic("called String on zero-value addrs.Provider") + } return pt.Hostname.ForDisplay() + "/" + pt.Namespace + "/" + pt.Type } +// NewProvider constructs a provider address from its parts, and normalizes +// the namespace and type parts to lowercase using unicode case folding rules +// so that resulting addrs.Provider values can be compared using standard +// Go equality rules (==). +// +// The hostname is given as a svchost.Hostname, which is required by the +// contract of that type to have already been normalized for equality testing. +// +// This function will panic if the given namespace or type name are not valid. +// When accepting namespace or type values from outside the program, use +// ParseProviderPart first to check that the given value is valid. +func NewProvider(hostname svchost.Hostname, namespace, typeName string) Provider { + if namespace == LegacyProviderNamespace { + // Legacy provider addresses must always be created via + // NewLegacyProvider so that we can use static analysis to find + // codepaths still working with those. + panic("attempt to create legacy provider address using NewProvider; use NewLegacyProvider instead") + } + + return Provider{ + Type: MustParseProviderPart(typeName), + Namespace: MustParseProviderPart(namespace), + Hostname: hostname, + } +} + // NewDefaultProvider returns the default address of a HashiCorp-maintained, // Registry-hosted provider. func NewDefaultProvider(name string) Provider { return Provider{ - Type: name, + Type: MustParseProviderPart(name), Namespace: "hashicorp", Hostname: DefaultRegistryHost, } @@ -42,14 +77,12 @@ func NewDefaultProvider(name string) Provider { // NewLegacyProvider returns a mock address for a provider. // This will be removed when ProviderType is fully integrated. func NewLegacyProvider(name string) Provider { - // This is intended to catch provider names with aliases, such as "null.foo" - if !ValidProviderName.MatchString(name) { - panic("invalid provider name") - } - return Provider{ + // We intentionally don't normalize and validate the legacy names, + // because existing code expects legacy provider names to pass through + // verbatim, even if not compliant with our new naming rules. Type: name, - Namespace: "-", + Namespace: LegacyProviderNamespace, Hostname: DefaultRegistryHost, } } @@ -59,12 +92,23 @@ func NewLegacyProvider(name string) Provider { // when provider type is fully integrated. As a safeguard for future // refactoring, this function panics if the Provider is not a legacy provider. func (pt Provider) LegacyString() string { - if pt.Namespace != "-" { - panic("not a legacy Provider") + if pt.IsZero() { + panic("called LegacyString on zero-value addrs.Provider") + } + if pt.Namespace != LegacyProviderNamespace { + panic(pt.String() + " is not a legacy addrs.Provider") } return pt.Type } +// IsZero returns true if the receiver is the zero value of addrs.Provider. +// +// The zero value is not a valid addrs.Provider and calling other methods on +// such a value is likely to either panic or otherwise misbehave. +func (pt Provider) IsZero() bool { + return pt == Provider{} +} + // ParseProviderSourceString parses the source attribute and returns a provider. // This is intended primarily to parse the FQN-like strings returned by // terraform-config-inspect. @@ -101,12 +145,13 @@ func ParseProviderSourceString(str string) (Provider, tfdiags.Diagnostics) { } // check the 'name' portion, which is always the last part - name := parts[len(parts)-1] - if !ValidProviderName.MatchString(name) { + givenName := parts[len(parts)-1] + name, err := ParseProviderPart(givenName) + if err != nil { diags = diags.Append(&hcl.Diagnostic{ Severity: hcl.DiagError, Summary: "Invalid provider type", - Detail: fmt.Sprintf(`Invalid provider type %q in source %q: must be a provider type name"`, name, str), + Detail: fmt.Sprintf(`Invalid provider type %q in source %q: %s"`, name, str, err), }) return ret, diags } @@ -120,16 +165,24 @@ func ParseProviderSourceString(str string) (Provider, tfdiags.Diagnostics) { if len(parts) >= 2 { // the namespace is always the second-to-last part - namespace := parts[len(parts)-2] - if !ValidProviderName.MatchString(namespace) { - diags = diags.Append(&hcl.Diagnostic{ - Severity: hcl.DiagError, - Summary: "Invalid provider namespace", - Detail: fmt.Sprintf(`Invalid provider namespace %q in source %q: must be a valid Registry Namespace"`, namespace, str), - }) - return Provider{}, diags + givenNamespace := parts[len(parts)-2] + if givenNamespace == LegacyProviderNamespace { + // For now we're tolerating legacy provider addresses until we've + // finished updating the rest of the codebase to no longer use them, + // or else we'd get errors round-tripping through legacy subsystems. + ret.Namespace = LegacyProviderNamespace + } else { + namespace, err := ParseProviderPart(givenNamespace) + if err != nil { + diags = diags.Append(&hcl.Diagnostic{ + Severity: hcl.DiagError, + Summary: "Invalid provider namespace", + Detail: fmt.Sprintf(`Invalid provider namespace %q in source %q: %s"`, namespace, str, err), + }) + return Provider{}, diags + } + ret.Namespace = namespace } - ret.Namespace = namespace } // Final Case: 3 parts @@ -140,12 +193,93 @@ func ParseProviderSourceString(str string) (Provider, tfdiags.Diagnostics) { diags = diags.Append(&hcl.Diagnostic{ Severity: hcl.DiagError, Summary: "Invalid provider source hostname", - Detail: fmt.Sprintf(`Invalid provider source hostname namespace %q in source %q: must be a valid Registry Namespace"`, hn, str), + Detail: fmt.Sprintf(`Invalid provider source hostname namespace %q in source %q: %s"`, hn, str, err), }) return Provider{}, diags } ret.Hostname = hn } + if ret.Namespace == LegacyProviderNamespace && ret.Hostname != DefaultRegistryHost { + // Legacy provider addresses must always be on the default registry + // host, because the default registry host decides what actual FQN + // each one maps to. + diags = diags.Append(&hcl.Diagnostic{ + Severity: hcl.DiagError, + Summary: "Invalid provider namespace", + Detail: "The legacy provider namespace \"-\" can be used only with hostname " + DefaultRegistryHost.ForDisplay() + ".", + }) + return Provider{}, diags + } + return ret, diags } + +// ParseProviderPart processes an addrs.Provider namespace or type string +// provided by an end-user, producing a normalized version if possible or +// an error if the string contains invalid characters. +// +// A provider part is processed in the same way as an individual label in a DNS +// domain name: it is transformed to lowercase per the usual DNS case mapping +// and normalization rules and may contain only letters, digits, and dashes. +// Additionally, dashes may not appear at the start or end of the string. +// +// These restrictions are intended to allow these names to appear in fussy +// contexts such as directory/file names on case-insensitive filesystems, +// repository names on GitHub, etc. We're using the DNS rules in particular, +// rather than some similar rules defined locally, because the hostname part +// of an addrs.Provider is already a hostname and it's ideal to use exactly +// the same case folding and normalization rules for all of the parts. +// +// In practice a provider type string conventionally does not contain dashes +// either. Such names are permitted, but providers with such type names will be +// hard to use because their resource type names will not be able to contain +// the provider type name and thus each resource will need an explicit provider +// address specified. (A real-world example of such a provider is the +// "google-beta" variant of the GCP provider, which has resource types that +// start with the "google_" prefix instead.) +// +// It's valid to pass the result of this function as the argument to a +// subsequent call, in which case the result will be identical. +func ParseProviderPart(given string) (string, error) { + if len(given) == 0 { + return "", fmt.Errorf("must have at least one character") + } + + // We're going to process the given name using the same "IDNA" library we + // use for the hostname portion, since it already implements the case + // folding rules we want. + // + // The idna library doesn't expose individual label parsing directly, but + // once we've verified it doesn't contain any dots we can just treat it + // like a top-level domain for this library's purposes. + if strings.ContainsRune(given, '.') { + return "", fmt.Errorf("dots are not allowed") + } + + // We don't allow names containing multiple consecutive dashes, just as + // a matter of preference: they look weird, confusing, or incorrect. + // This also, as a side-effect, prevents the use of the "punycode" + // indicator prefix "xn--" that would cause the IDNA library to interpret + // the given name as punycode, because that would be weird and unexpected. + if strings.Contains(given, "--") { + return "", fmt.Errorf("cannot use multiple consecutive dashes") + } + + result, err := idna.Lookup.ToUnicode(given) + if err != nil { + return "", fmt.Errorf("must contain only letters, digits, and dashes, and may not use leading or trailing dashes") + } + + return result, nil +} + +// MustParseProviderPart is a wrapper around ParseProviderPart that panics if +// it returns an error. +func MustParseProviderPart(given string) string { + result, err := ParseProviderPart(given) + if err != nil { + panic(err.Error()) + } + return result +} diff --git a/addrs/provider_test.go b/addrs/provider_test.go index 50a4b9cd1..ab4b0864d 100644 --- a/addrs/provider_test.go +++ b/addrs/provider_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/go-test/deep" + svchost "github.com/hashicorp/terraform-svchost" ) func TestParseProviderSourceStr(t *testing.T) { @@ -19,6 +20,14 @@ func TestParseProviderSourceStr(t *testing.T) { }, false, }, + "registry.Terraform.io/HashiCorp/AWS": { + Provider{ + Type: "aws", + Namespace: "hashicorp", + Hostname: DefaultRegistryHost, + }, + false, + }, "hashicorp/aws": { Provider{ Type: "aws", @@ -27,6 +36,14 @@ func TestParseProviderSourceStr(t *testing.T) { }, false, }, + "HashiCorp/AWS": { + Provider{ + Type: "aws", + Namespace: "hashicorp", + Hostname: DefaultRegistryHost, + }, + false, + }, "aws": { Provider{ Type: "aws", @@ -35,6 +52,43 @@ func TestParseProviderSourceStr(t *testing.T) { }, false, }, + "AWS": { + Provider{ + // No case folding here because we're currently handling this + // as a legacy one. When this changes to be a _default_ + // address in future (registry.terraform.io/hashicorp/aws) + // then we should start applying case folding to it, making + // Type appear as "aws" here instead. + Type: "AWS", + Namespace: "-", + Hostname: DefaultRegistryHost, + }, + false, + }, + "example.com/foo-bar/baz-boop": { + Provider{ + Type: "baz-boop", + Namespace: "foo-bar", + Hostname: svchost.Hostname("example.com"), + }, + false, + }, + "foo-bar/baz-boop": { + Provider{ + Type: "baz-boop", + Namespace: "foo-bar", + Hostname: DefaultRegistryHost, + }, + false, + }, + "localhost:8080/foo/bar": { + Provider{ + Type: "bar", + Namespace: "foo", + Hostname: svchost.Hostname("localhost:8080"), + }, + false, + }, "example.com/too/many/parts/here": { Provider{}, true, @@ -47,6 +101,50 @@ func TestParseProviderSourceStr(t *testing.T) { Provider{}, true, }, + "badhost!/hashicorp/aws": { + Provider{}, + true, + }, + "example.com/badnamespace!/aws": { + Provider{}, + true, + }, + "example.com/bad--namespace/aws": { + Provider{}, + true, + }, + "example.com/-badnamespace/aws": { + Provider{}, + true, + }, + "example.com/badnamespace-/aws": { + Provider{}, + true, + }, + "example.com/bad.namespace/aws": { + Provider{}, + true, + }, + "example.com/hashicorp/badtype!": { + Provider{}, + true, + }, + "example.com/hashicorp/bad--type": { + Provider{}, + true, + }, + "example.com/hashicorp/-badtype": { + Provider{}, + true, + }, + "example.com/hashicorp/badtype-": { + Provider{}, + true, + }, + "example.com/hashicorp/bad.type": { + Provider{}, + true, + }, } for name, test := range tests { @@ -65,3 +163,83 @@ func TestParseProviderSourceStr(t *testing.T) { } } } + +func TestParseProviderPart(t *testing.T) { + tests := map[string]struct { + Want string + Error string + }{ + `foo`: { + `foo`, + ``, + }, + `FOO`: { + `foo`, + ``, + }, + `Foo`: { + `foo`, + ``, + }, + `abc-123`: { + `abc-123`, + ``, + }, + `Испытание`: { + `испытание`, + ``, + }, + `münchen`: { // this is a precomposed u with diaeresis + `münchen`, // this is a precomposed u with diaeresis + ``, + }, + `münchen`: { // this is a separate u and combining diaeresis + `münchen`, // this is a precomposed u with diaeresis + ``, + }, + `abc--123`: { + ``, + `cannot use multiple consecutive dashes`, + }, + `xn--80akhbyknj4f`: { // this is the punycode form of "испытание", but we don't accept punycode here + ``, + `cannot use multiple consecutive dashes`, + }, + `abc.123`: { + ``, + `dots are not allowed`, + }, + `-abc123`: { + ``, + `must contain only letters, digits, and dashes, and may not use leading or trailing dashes`, + }, + `abc123-`: { + ``, + `must contain only letters, digits, and dashes, and may not use leading or trailing dashes`, + }, + ``: { + ``, + `must have at least one character`, + }, + } + + for given, test := range tests { + t.Run(given, func(t *testing.T) { + got, err := ParseProviderPart(given) + if test.Error != "" { + if err == nil { + t.Errorf("unexpected success\ngot: %s\nwant: %s", err, test.Error) + } else if got := err.Error(); got != test.Error { + t.Errorf("wrong error\ngot: %s\nwant: %s", got, test.Error) + } + } else { + if err != nil { + t.Errorf("unexpected error\ngot: %s\nwant: ", err) + } else if got != test.Want { + t.Errorf("wrong result\ngot: %s\nwant: %s", got, test.Want) + } + } + }) + } + +} diff --git a/configs/config_test.go b/configs/config_test.go index 0fb204a03..012e44fc6 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -75,7 +75,7 @@ func TestConfigResolveAbsProviderAddr(t *testing.T) { }) t.Run("local, explicit mapping", func(t *testing.T) { addr := addrs.LocalProviderConfig{ - LocalName: "foo_test", // this is explicitly set in the config + LocalName: "foo-test", // this is explicitly set in the config Alias: "boop", } got := cfg.ResolveAbsProviderAddr(addr, addrs.RootModuleInstance) @@ -87,7 +87,7 @@ func TestConfigResolveAbsProviderAddr(t *testing.T) { // once we are fully supporting this we should expect to see // the "registry.terraform.io/foo/test" FQN here, while still // preserving the "boop" alias. - Provider: addrs.NewLegacyProvider("foo_test"), + Provider: addrs.NewLegacyProvider("foo-test"), Alias: "boop", } if got, want := got.String(), want.String(); got != want { diff --git a/configs/module_test.go b/configs/module_test.go index 4e58db4f5..1898bdfe2 100644 --- a/configs/module_test.go +++ b/configs/module_test.go @@ -17,18 +17,18 @@ func TestNewModule_provider_local_name(t *testing.T) { // currently assumes everything is a legacy provider and the localname and // type match. This test will be updated when provider source is fully // implemented. - p := addrs.NewLegacyProvider("foo_test") + p := addrs.NewLegacyProvider("foo-test") if name, exists := mod.ProviderLocalNames[p]; !exists { t.Fatal("provider FQN foo/test not found") } else { - if name != "foo_test" { - t.Fatalf("provider localname mismatch: got %s, want foo_test", name) + if name != "foo-test" { + t.Fatalf("provider localname mismatch: got %s, want foo-test", name) } } // ensure the reverse lookup (fqn to local name) works as well localName := mod.LocalNameForProvider(p) - if localName != "foo_test" { + if localName != "foo-test" { t.Fatal("provider local name not found") } } diff --git a/configs/testdata/providers-explicit-fqn/root.tf b/configs/testdata/providers-explicit-fqn/root.tf index e4ec41161..153222ce3 100644 --- a/configs/testdata/providers-explicit-fqn/root.tf +++ b/configs/testdata/providers-explicit-fqn/root.tf @@ -1,7 +1,7 @@ terraform { required_providers { - foo_test = { + foo-test = { source = "foo/test" } } diff --git a/internal/earlyconfig/config.go b/internal/earlyconfig/config.go index 48932b5cd..c0aa58fe2 100644 --- a/internal/earlyconfig/config.go +++ b/internal/earlyconfig/config.go @@ -84,13 +84,21 @@ func (c *Config) ProviderDependencies() (*moduledeps.Module, tfdiags.Diagnostics providers := make(moduledeps.Providers) for name, reqs := range c.Module.RequiredProviders { - fqn, diags := addrs.ParseProviderSourceString(name) - if diags.HasErrors() { - diags = diags.Append(wrapDiagnostic(tfconfig.Diagnostic{ - Severity: tfconfig.DiagError, - Summary: "Invalid provider source", - Detail: fmt.Sprintf("Invalid source %q for provider", name), - })) + var fqn addrs.Provider + if source := reqs.Source; source != "" { + addr, diags := addrs.ParseProviderSourceString(source) + if diags.HasErrors() { + diags = diags.Append(wrapDiagnostic(tfconfig.Diagnostic{ + Severity: tfconfig.DiagError, + Summary: "Invalid provider source", + Detail: fmt.Sprintf("Invalid source %q for provider", name), + })) + continue + } + fqn = addr + } + if fqn.IsZero() { + fqn = addrs.NewLegacyProvider(name) } var constraints version.Constraints for _, reqStr := range reqs.VersionConstraints { diff --git a/terraform/module_dependencies.go b/terraform/module_dependencies.go index 2e590a372..e10a6f1d1 100644 --- a/terraform/module_dependencies.go +++ b/terraform/module_dependencies.go @@ -58,7 +58,7 @@ func configTreeConfigDependencies(root *configs.Config, inheritProviders map[str // The main way to declare a provider dependency is explicitly inside // the "terraform" block, which allows declaring a requirement without // also creating a configuration. - for _, req := range module.ProviderRequirements { + for localName, req := range module.ProviderRequirements { // The handling here is a bit fiddly because the moduledeps package // was designed around the legacy (pre-0.12) configuration model // and hasn't yet been revised to handle the new model. As a result, @@ -71,6 +71,10 @@ func configTreeConfigDependencies(root *configs.Config, inheritProviders map[str rawConstraints = append(rawConstraints, constraint.Required...) } discoConstraints := discovery.NewConstraints(rawConstraints) + fqn := req.Type + if fqn.IsZero() { + fqn = addrs.NewLegacyProvider(localName) + } providers[req.Type] = moduledeps.ProviderDependency{ Constraints: discoConstraints, @@ -83,6 +87,7 @@ func configTreeConfigDependencies(root *configs.Config, inheritProviders map[str // configuration and a constraint are defined in the same module. for _, pCfg := range module.ProviderConfigs { fqn := module.ProviderForLocalConfig(pCfg.Addr()) + discoConstraints := discovery.AllVersions if pCfg.Version.Required != nil { discoConstraints = discovery.NewConstraints(pCfg.Version.Required) @@ -107,6 +112,7 @@ func configTreeConfigDependencies(root *configs.Config, inheritProviders map[str for _, rc := range module.ManagedResources { addr := rc.ProviderConfigAddr() fqn := module.ProviderForLocalConfig(addr) + if _, exists := providers[fqn]; exists { // Explicit dependency already present continue @@ -125,6 +131,7 @@ func configTreeConfigDependencies(root *configs.Config, inheritProviders map[str for _, rc := range module.DataResources { addr := rc.ProviderConfigAddr() fqn := module.ProviderForLocalConfig(addr) + if _, exists := providers[fqn]; exists { // Explicit dependency already present continue