package addrs import ( "fmt" "path" "regexp" "strings" svchost "github.com/hashicorp/terraform-svchost" "github.com/hashicorp/terraform/internal/getmodules" ) // ModuleSource is the general type for all three of the possible module source // address types. The concrete implementations of this are ModuleSourceLocal, // ModuleSourceRegistry, and ModuleSourceRemote. type ModuleSource interface { // String returns a full representation of the address, including any // additional components that are typically implied by omission in // user-written addresses. // // We typically use this longer representation in error message, in case // the inclusion of normally-omitted components is helpful in debugging // unexpected behavior. String() string // ForDisplay is similar to String but instead returns a representation of // the idiomatic way to write the address in configuration, omitting // components that are commonly just implied in addresses written by // users. // // We typically use this shorter representation in informational messages, // such as the note that we're about to start downloading a package. ForDisplay() string moduleSource() } var _ ModuleSource = ModuleSourceLocal("") var _ ModuleSource = ModuleSourceRegistry{} var _ ModuleSource = ModuleSourceRemote{} var moduleSourceLocalPrefixes = []string{ "./", "../", ".\\", "..\\", } // ParseModuleSource parses a module source address as given in the "source" // argument inside a "module" block in the configuration. // // For historical reasons this syntax is a bit overloaded, supporting three // different address types: // - Local paths starting with either ./ or ../, which are special because // Terraform considers them to belong to the same "package" as the caller. // - Module registry addresses, given as either NAMESPACE/NAME/SYSTEM or // HOST/NAMESPACE/NAME/SYSTEM, in which case the remote registry serves // as an indirection over the third address type that follows. // - Various URL-like and other heuristically-recognized strings which // we currently delegate to the external library go-getter. // // There is some ambiguity between the module registry addresses and go-getter's // very liberal heuristics and so this particular function will typically treat // an invalid registry address as some other sort of remote source address // rather than returning an error. If you know that you're expecting a // registry address in particular, use ParseModuleSourceRegistry instead, which // can therefore expose more detailed error messages about registry address // parsing in particular. func ParseModuleSource(raw string) (ModuleSource, error) { if isModuleSourceLocal(raw) { localAddr, err := parseModuleSourceLocal(raw) if err != nil { // This is to make sure we really return a nil ModuleSource in // this case, rather than an interface containing the zero // value of ModuleSourceLocal. return nil, err } return localAddr, nil } // For historical reasons, whether an address is a registry // address is defined only by whether it can be successfully // parsed as one, and anything else must fall through to be // parsed as a direct remote source, where go-getter might // then recognize it as a filesystem path. This is odd // but matches behavior we've had since Terraform v0.10 which // existing modules may be relying on. // (Notice that this means that there's never any path where // the registry source parse error gets returned to the caller, // which is annoying but has been true for many releases // without it posing a serious problem in practice.) if ret, err := ParseModuleSourceRegistry(raw); err == nil { return ret, nil } // If we get down here then we treat everything else as a // remote address. In practice there's very little that // go-getter doesn't consider invalid input, so even invalid // nonsense will probably interpreted as _something_ here // and then fail during installation instead. We can't // really improve this situation for historical reasons. remoteAddr, err := parseModuleSourceRemote(raw) if err != nil { // This is to make sure we really return a nil ModuleSource in // this case, rather than an interface containing the zero // value of ModuleSourceRemote. return nil, err } return remoteAddr, nil } // ModuleSourceLocal is a ModuleSource representing a local path reference // from the caller's directory to the callee's directory within the same // module package. // // A "module package" here means a set of modules distributed together in // the same archive, repository, or similar. That's a significant distinction // because we always download and cache entire module packages at once, // and then create relative references within the same directory in order // to ensure all modules in the package are looking at a consistent filesystem // layout. We also assume that modules within a package are maintained together, // which means that cross-cutting maintenence across all of them would be // possible. // // The actual value of a ModuleSourceLocal is a normalized relative path using // forward slashes, even on operating systems that have other conventions, // because we're representing traversal within the logical filesystem // represented by the containing package, not actually within the physical // filesystem we unpacked the package into. We should typically not construct // ModuleSourceLocal values directly, except in tests where we can ensure // the value meets our assumptions. Use ParseModuleSource instead if the // input string is not hard-coded in the program. type ModuleSourceLocal string func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) { // As long as we have a suitable prefix (detected by ParseModuleSource) // there is no failure case for local paths: we just use the "path" // package's cleaning logic to remove any redundant "./" and "../" // sequences and any duplicate slashes and accept whatever that // produces. // Although using backslashes (Windows-style) is non-idiomatic, we do // allow it and just normalize it away, so the rest of Terraform will // only see the forward-slash form. if strings.Contains(raw, `\`) { // Note: We use string replacement rather than filepath.ToSlash // here because the filepath package behavior varies by current // platform, but we want to interpret configured paths the same // across all platforms: these are virtual paths within a module // package, not physical filesystem paths. raw = strings.ReplaceAll(raw, `\`, "/") } // Note that we could've historically blocked using "//" in a path here // in order to avoid confusion with the subdir syntax in remote addresses, // but we historically just treated that as the same as a single slash // and so we continue to do that now for compatibility. Clean strips those // out and reduces them to just a single slash. clean := path.Clean(raw) // However, we do need to keep a single "./" on the front if it isn't // a "../" path, or else it would be ambigous with the registry address // syntax. if !strings.HasPrefix(clean, "../") { clean = "./" + clean } return ModuleSourceLocal(clean), nil } func isModuleSourceLocal(raw string) bool { for _, prefix := range moduleSourceLocalPrefixes { if strings.HasPrefix(raw, prefix) { return true } } return false } func (s ModuleSourceLocal) moduleSource() {} func (s ModuleSourceLocal) String() string { // We assume that our underlying string was already normalized at // construction, so we just return it verbatim. return string(s) } func (s ModuleSourceLocal) ForDisplay() string { return string(s) } // ModuleSourceRegistry is a ModuleSource representing a module listed in a // Terraform module registry. // // A registry source isn't a direct source location but rather an indirection // over a ModuleSourceRemote. The job of a registry is to translate the // combination of a ModuleSourceRegistry and a module version number into // a concrete ModuleSourceRemote that Terraform will then download and // install. type ModuleSourceRegistry struct { // PackageAddr is the registry package that the target module belongs to. // The module installer must translate this into a ModuleSourceRemote // using the registry API and then take that underlying address's // PackageAddr in order to find the actual package location. PackageAddr ModuleRegistryPackage // If Subdir is non-empty then it represents a sub-directory within the // remote package that the registry address eventually resolves to. // This will ultimately become the suffix of the Subdir of the // ModuleSourceRemote that the registry address translates to. // // Subdir uses a normalized forward-slash-based path syntax within the // virtual filesystem represented by the final package. It will never // include `../` or `./` sequences. Subdir string } // DefaultModuleRegistryHost is the hostname used for registry-based module // source addresses that do not have an explicit hostname. const DefaultModuleRegistryHost = svchost.Hostname("registry.terraform.io") var moduleRegistryNamePattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$") var moduleRegistryTargetSystemPattern = regexp.MustCompile("^[0-9a-z]{1,64}$") // ParseModuleSourceRegistry is a variant of ParseModuleSource which only // accepts module registry addresses, and will reject any other address type. // // Use this instead of ParseModuleSource if you know from some other surrounding // context that an address is intended to be a registry address rather than // some other address type, which will then allow for better error reporting // due to the additional information about user intent. func ParseModuleSourceRegistry(raw string) (ModuleSource, error) { // Before we delegate to the "real" function we'll just make sure this // doesn't look like a local source address, so we can return a better // error message for that situation. if isModuleSourceLocal(raw) { return ModuleSourceRegistry{}, fmt.Errorf("can't use local directory %q as a module registry address", raw) } ret, err := parseModuleSourceRegistry(raw) if err != nil { // This is to make sure we return a nil ModuleSource, rather than // a non-nil ModuleSource containing a zero-value ModuleSourceRegistry. return nil, err } return ret, nil } func parseModuleSourceRegistry(raw string) (ModuleSourceRegistry, error) { var err error var subDir string raw, subDir = getmodules.SplitPackageSubdir(raw) if strings.HasPrefix(subDir, "../") { return ModuleSourceRegistry{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) } parts := strings.Split(raw, "/") // A valid registry address has either three or four parts, because the // leading hostname part is optional. if len(parts) != 3 && len(parts) != 4 { return ModuleSourceRegistry{}, fmt.Errorf("a module registry source address must have either three or four slash-separated components") } host := DefaultModuleRegistryHost if len(parts) == 4 { host, err = svchost.ForComparison(parts[0]) if err != nil { // The svchost library doesn't produce very good error messages to // return to an end-user, so we'll use some custom ones here. switch { case strings.Contains(parts[0], "--"): // Looks like possibly punycode, which we don't allow here // to ensure that source addresses are written readably. return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q; internationalized domain names must be given as direct unicode characters, not in punycode", parts[0]) default: return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q", parts[0]) } } if !strings.Contains(host.String(), ".") { return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname: must contain at least one dot") } // Discard the hostname prefix now that we've processed it parts = parts[1:] } ret := ModuleSourceRegistry{ PackageAddr: ModuleRegistryPackage{ Host: host, }, Subdir: subDir, } if host == svchost.Hostname("github.com") || host == svchost.Hostname("bitbucket.org") { return ret, fmt.Errorf("can't use %q as a module registry host, because it's reserved for installing directly from version control repositories", host) } if ret.PackageAddr.Namespace, err = parseModuleRegistryName(parts[0]); err != nil { if strings.Contains(parts[0], ".") { // Seems like the user omitted one of the latter components in // an address with an explicit hostname. return ret, fmt.Errorf("source address must have three more components after the hostname: the namespace, the name, and the target system") } return ret, fmt.Errorf("invalid namespace %q: %s", parts[0], err) } if ret.PackageAddr.Name, err = parseModuleRegistryName(parts[1]); err != nil { return ret, fmt.Errorf("invalid module name %q: %s", parts[1], err) } if ret.PackageAddr.TargetSystem, err = parseModuleRegistryTargetSystem(parts[2]); err != nil { if strings.Contains(parts[2], "?") { // The user was trying to include a query string, probably? return ret, fmt.Errorf("module registry addresses may not include a query string portion") } return ret, fmt.Errorf("invalid target system %q: %s", parts[2], err) } return ret, nil } // parseModuleRegistryName validates and normalizes a string in either the // "namespace" or "name" position of a module registry source address. func parseModuleRegistryName(given string) (string, error) { // Similar to the names in provider source addresses, we defined these // to be compatible with what filesystems and typical remote systems // like GitHub allow in names. Unfortunately we didn't end up defining // these exactly equivalently: provider names can only use dashes as // punctuation, whereas module names can use underscores. So here we're // using some regular expressions from the original module source // implementation, rather than using the IDNA rules as we do in // ParseProviderPart. if !moduleRegistryNamePattern.MatchString(given) { return "", fmt.Errorf("must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix") } // We also skip normalizing the name to lowercase, because we historically // didn't do that and so existing module registries might be doing // case-sensitive matching. return given, nil } // parseModuleRegistryTargetSystem validates and normalizes a string in the // "target system" position of a module registry source address. This is // what we historically called "provider" but never actually enforced as // being a provider address, and now _cannot_ be a provider address because // provider addresses have three slash-separated components of their own. func parseModuleRegistryTargetSystem(given string) (string, error) { // Similar to the names in provider source addresses, we defined these // to be compatible with what filesystems and typical remote systems // like GitHub allow in names. Unfortunately we didn't end up defining // these exactly equivalently: provider names can't use dashes or // underscores. So here we're using some regular expressions from the // original module source implementation, rather than using the IDNA rules // as we do in ParseProviderPart. if !moduleRegistryTargetSystemPattern.MatchString(given) { return "", fmt.Errorf("must be between one and 64 ASCII letters or digits") } // We also skip normalizing the name to lowercase, because we historically // didn't do that and so existing module registries might be doing // case-sensitive matching. return given, nil } func (s ModuleSourceRegistry) moduleSource() {} func (s ModuleSourceRegistry) String() string { if s.Subdir != "" { return s.PackageAddr.String() + "//" + s.Subdir } return s.PackageAddr.String() } func (s ModuleSourceRegistry) ForDisplay() string { if s.Subdir != "" { return s.PackageAddr.ForDisplay() + "//" + s.Subdir } return s.PackageAddr.ForDisplay() } // ModuleSourceRemote is a ModuleSource representing a remote location from // which we can retrieve a module package. // // A ModuleSourceRemote can optionally include a "subdirectory" path, which // means that it's selecting a sub-directory of the given package to use as // the entry point into the package. type ModuleSourceRemote struct { // PackageAddr is the address of the remote package that the requested // module belongs to. PackageAddr ModulePackage // If Subdir is non-empty then it represents a sub-directory within the // remote package which will serve as the entry-point for the package. // // Subdir uses a normalized forward-slash-based path syntax within the // virtual filesystem represented by the final package. It will never // include `../` or `./` sequences. Subdir string } func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) { var subDir string raw, subDir = getmodules.SplitPackageSubdir(raw) if strings.HasPrefix(subDir, "../") { return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) } // A remote source address is really just a go-getter address resulting // from go-getter's "detect" phase, which adds on the prefix specifying // which protocol it should use and possibly also adjusts the // protocol-specific part into different syntax. // // Note that for historical reasons this can potentially do network // requests in order to disambiguate certain address types, although // that's a legacy thing that is only for some specific, less-commonly-used // address types. Most just do local string manipulation. We should // aim to remove the network requests over time, if possible. norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw) if err != nil { // We must pass through the returned error directly here because // the getmodules package has some special error types it uses // for certain cases where the UI layer might want to include a // more helpful error message. return ModuleSourceRemote{}, err } if moreSubDir != "" { switch { case subDir != "": // The detector's own subdir goes first, because the // subdir we were given is conceptually relative to // the subdirectory that we just detected. subDir = path.Join(moreSubDir, subDir) default: subDir = path.Clean(moreSubDir) } if strings.HasPrefix(subDir, "../") { // This would suggest a bug in a go-getter detector, but // we'll catch it anyway to avoid doing something confusing // downstream. return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm) } } return ModuleSourceRemote{ PackageAddr: ModulePackage(norm), Subdir: subDir, }, nil } func (s ModuleSourceRemote) moduleSource() {} func (s ModuleSourceRemote) String() string { if s.Subdir != "" { return s.PackageAddr.String() + "//" + s.Subdir } return s.PackageAddr.String() } func (s ModuleSourceRemote) ForDisplay() string { // The two string representations are identical for this address type. // This isn't really entirely true to the idea of "ForDisplay" since // it'll often include some additional components added in by the // go-getter detectors, but we don't have any function to turn a // "detected" string back into an idiomatic shorthand the user might've // entered. return s.String() } // FromRegistry can be called on a remote source address that was returned // from a module registry, passing in the original registry source address // that the registry was asked about, in order to get the effective final // remote source address. // // Specifically, this method handles the situations where one or both of // the two addresses contain subdirectory paths, combining both when necessary // in order to ensure that both the registry's given path and the user's // given path are both respected. // // This will return nonsense if given a registry address other than the one // that generated the reciever via a registry lookup. func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote { ret := s // not a pointer, so this is a shallow copy switch { case s.Subdir != "" && given.Subdir != "": ret.Subdir = path.Join(s.Subdir, given.Subdir) case given.Subdir != "": ret.Subdir = given.Subdir } return ret }