terraform/internal/addrs/module_source.go

496 lines
20 KiB
Go

package addrs
import (
"fmt"
"path"
"regexp"
"strings"
svchost "github.com/hashicorp/terraform-svchost"
"github.com/hashicorp/terraform/internal/getmodules"
)
// ModuleSource is the general type for all three of the possible module source
// address types. The concrete implementations of this are ModuleSourceLocal,
// ModuleSourceRegistry, and ModuleSourceRemote.
type ModuleSource interface {
// String returns a full representation of the address, including any
// additional components that are typically implied by omission in
// user-written addresses.
//
// We typically use this longer representation in error message, in case
// the inclusion of normally-omitted components is helpful in debugging
// unexpected behavior.
String() string
// ForDisplay is similar to String but instead returns a representation of
// the idiomatic way to write the address in configuration, omitting
// components that are commonly just implied in addresses written by
// users.
//
// We typically use this shorter representation in informational messages,
// such as the note that we're about to start downloading a package.
ForDisplay() string
moduleSource()
}
var _ ModuleSource = ModuleSourceLocal("")
var _ ModuleSource = ModuleSourceRegistry{}
var _ ModuleSource = ModuleSourceRemote{}
var moduleSourceLocalPrefixes = []string{
"./",
"../",
".\\",
"..\\",
}
// ParseModuleSource parses a module source address as given in the "source"
// argument inside a "module" block in the configuration.
//
// For historical reasons this syntax is a bit overloaded, supporting three
// different address types:
// - Local paths starting with either ./ or ../, which are special because
// Terraform considers them to belong to the same "package" as the caller.
// - Module registry addresses, given as either NAMESPACE/NAME/SYSTEM or
// HOST/NAMESPACE/NAME/SYSTEM, in which case the remote registry serves
// as an indirection over the third address type that follows.
// - Various URL-like and other heuristically-recognized strings which
// we currently delegate to the external library go-getter.
//
// There is some ambiguity between the module registry addresses and go-getter's
// very liberal heuristics and so this particular function will typically treat
// an invalid registry address as some other sort of remote source address
// rather than returning an error. If you know that you're expecting a
// registry address in particular, use ParseModuleSourceRegistry instead, which
// can therefore expose more detailed error messages about registry address
// parsing in particular.
func ParseModuleSource(raw string) (ModuleSource, error) {
if isModuleSourceLocal(raw) {
localAddr, err := parseModuleSourceLocal(raw)
if err != nil {
// This is to make sure we really return a nil ModuleSource in
// this case, rather than an interface containing the zero
// value of ModuleSourceLocal.
return nil, err
}
return localAddr, nil
}
// For historical reasons, whether an address is a registry
// address is defined only by whether it can be successfully
// parsed as one, and anything else must fall through to be
// parsed as a direct remote source, where go-getter might
// then recognize it as a filesystem path. This is odd
// but matches behavior we've had since Terraform v0.10 which
// existing modules may be relying on.
// (Notice that this means that there's never any path where
// the registry source parse error gets returned to the caller,
// which is annoying but has been true for many releases
// without it posing a serious problem in practice.)
if ret, err := ParseModuleSourceRegistry(raw); err == nil {
return ret, nil
}
// If we get down here then we treat everything else as a
// remote address. In practice there's very little that
// go-getter doesn't consider invalid input, so even invalid
// nonsense will probably interpreted as _something_ here
// and then fail during installation instead. We can't
// really improve this situation for historical reasons.
remoteAddr, err := parseModuleSourceRemote(raw)
if err != nil {
// This is to make sure we really return a nil ModuleSource in
// this case, rather than an interface containing the zero
// value of ModuleSourceRemote.
return nil, err
}
return remoteAddr, nil
}
// ModuleSourceLocal is a ModuleSource representing a local path reference
// from the caller's directory to the callee's directory within the same
// module package.
//
// A "module package" here means a set of modules distributed together in
// the same archive, repository, or similar. That's a significant distinction
// because we always download and cache entire module packages at once,
// and then create relative references within the same directory in order
// to ensure all modules in the package are looking at a consistent filesystem
// layout. We also assume that modules within a package are maintained together,
// which means that cross-cutting maintenence across all of them would be
// possible.
//
// The actual value of a ModuleSourceLocal is a normalized relative path using
// forward slashes, even on operating systems that have other conventions,
// because we're representing traversal within the logical filesystem
// represented by the containing package, not actually within the physical
// filesystem we unpacked the package into. We should typically not construct
// ModuleSourceLocal values directly, except in tests where we can ensure
// the value meets our assumptions. Use ParseModuleSource instead if the
// input string is not hard-coded in the program.
type ModuleSourceLocal string
func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) {
// As long as we have a suitable prefix (detected by ParseModuleSource)
// there is no failure case for local paths: we just use the "path"
// package's cleaning logic to remove any redundant "./" and "../"
// sequences and any duplicate slashes and accept whatever that
// produces.
// Although using backslashes (Windows-style) is non-idiomatic, we do
// allow it and just normalize it away, so the rest of Terraform will
// only see the forward-slash form.
if strings.Contains(raw, `\`) {
// Note: We use string replacement rather than filepath.ToSlash
// here because the filepath package behavior varies by current
// platform, but we want to interpret configured paths the same
// across all platforms: these are virtual paths within a module
// package, not physical filesystem paths.
raw = strings.ReplaceAll(raw, `\`, "/")
}
// Note that we could've historically blocked using "//" in a path here
// in order to avoid confusion with the subdir syntax in remote addresses,
// but we historically just treated that as the same as a single slash
// and so we continue to do that now for compatibility. Clean strips those
// out and reduces them to just a single slash.
clean := path.Clean(raw)
// However, we do need to keep a single "./" on the front if it isn't
// a "../" path, or else it would be ambigous with the registry address
// syntax.
if !strings.HasPrefix(clean, "../") {
clean = "./" + clean
}
return ModuleSourceLocal(clean), nil
}
func isModuleSourceLocal(raw string) bool {
for _, prefix := range moduleSourceLocalPrefixes {
if strings.HasPrefix(raw, prefix) {
return true
}
}
return false
}
func (s ModuleSourceLocal) moduleSource() {}
func (s ModuleSourceLocal) String() string {
// We assume that our underlying string was already normalized at
// construction, so we just return it verbatim.
return string(s)
}
func (s ModuleSourceLocal) ForDisplay() string {
return string(s)
}
// ModuleSourceRegistry is a ModuleSource representing a module listed in a
// Terraform module registry.
//
// A registry source isn't a direct source location but rather an indirection
// over a ModuleSourceRemote. The job of a registry is to translate the
// combination of a ModuleSourceRegistry and a module version number into
// a concrete ModuleSourceRemote that Terraform will then download and
// install.
type ModuleSourceRegistry struct {
// PackageAddr is the registry package that the target module belongs to.
// The module installer must translate this into a ModuleSourceRemote
// using the registry API and then take that underlying address's
// PackageAddr in order to find the actual package location.
PackageAddr ModuleRegistryPackage
// If Subdir is non-empty then it represents a sub-directory within the
// remote package that the registry address eventually resolves to.
// This will ultimately become the suffix of the Subdir of the
// ModuleSourceRemote that the registry address translates to.
//
// Subdir uses a normalized forward-slash-based path syntax within the
// virtual filesystem represented by the final package. It will never
// include `../` or `./` sequences.
Subdir string
}
// DefaultModuleRegistryHost is the hostname used for registry-based module
// source addresses that do not have an explicit hostname.
const DefaultModuleRegistryHost = svchost.Hostname("registry.terraform.io")
var moduleRegistryNamePattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$")
var moduleRegistryTargetSystemPattern = regexp.MustCompile("^[0-9a-z]{1,64}$")
// ParseModuleSourceRegistry is a variant of ParseModuleSource which only
// accepts module registry addresses, and will reject any other address type.
//
// Use this instead of ParseModuleSource if you know from some other surrounding
// context that an address is intended to be a registry address rather than
// some other address type, which will then allow for better error reporting
// due to the additional information about user intent.
func ParseModuleSourceRegistry(raw string) (ModuleSource, error) {
// Before we delegate to the "real" function we'll just make sure this
// doesn't look like a local source address, so we can return a better
// error message for that situation.
if isModuleSourceLocal(raw) {
return ModuleSourceRegistry{}, fmt.Errorf("can't use local directory %q as a module registry address", raw)
}
ret, err := parseModuleSourceRegistry(raw)
if err != nil {
// This is to make sure we return a nil ModuleSource, rather than
// a non-nil ModuleSource containing a zero-value ModuleSourceRegistry.
return nil, err
}
return ret, nil
}
func parseModuleSourceRegistry(raw string) (ModuleSourceRegistry, error) {
var err error
var subDir string
raw, subDir = getmodules.SplitPackageSubdir(raw)
if strings.HasPrefix(subDir, "../") {
return ModuleSourceRegistry{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir)
}
parts := strings.Split(raw, "/")
// A valid registry address has either three or four parts, because the
// leading hostname part is optional.
if len(parts) != 3 && len(parts) != 4 {
return ModuleSourceRegistry{}, fmt.Errorf("a module registry source address must have either three or four slash-separated components")
}
host := DefaultModuleRegistryHost
if len(parts) == 4 {
host, err = svchost.ForComparison(parts[0])
if err != nil {
// The svchost library doesn't produce very good error messages to
// return to an end-user, so we'll use some custom ones here.
switch {
case strings.Contains(parts[0], "--"):
// Looks like possibly punycode, which we don't allow here
// to ensure that source addresses are written readably.
return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q; internationalized domain names must be given as direct unicode characters, not in punycode", parts[0])
default:
return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q", parts[0])
}
}
if !strings.Contains(host.String(), ".") {
return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname: must contain at least one dot")
}
// Discard the hostname prefix now that we've processed it
parts = parts[1:]
}
ret := ModuleSourceRegistry{
PackageAddr: ModuleRegistryPackage{
Host: host,
},
Subdir: subDir,
}
if host == svchost.Hostname("github.com") || host == svchost.Hostname("bitbucket.org") {
return ret, fmt.Errorf("can't use %q as a module registry host, because it's reserved for installing directly from version control repositories", host)
}
if ret.PackageAddr.Namespace, err = parseModuleRegistryName(parts[0]); err != nil {
if strings.Contains(parts[0], ".") {
// Seems like the user omitted one of the latter components in
// an address with an explicit hostname.
return ret, fmt.Errorf("source address must have three more components after the hostname: the namespace, the name, and the target system")
}
return ret, fmt.Errorf("invalid namespace %q: %s", parts[0], err)
}
if ret.PackageAddr.Name, err = parseModuleRegistryName(parts[1]); err != nil {
return ret, fmt.Errorf("invalid module name %q: %s", parts[1], err)
}
if ret.PackageAddr.TargetSystem, err = parseModuleRegistryTargetSystem(parts[2]); err != nil {
if strings.Contains(parts[2], "?") {
// The user was trying to include a query string, probably?
return ret, fmt.Errorf("module registry addresses may not include a query string portion")
}
return ret, fmt.Errorf("invalid target system %q: %s", parts[2], err)
}
return ret, nil
}
// parseModuleRegistryName validates and normalizes a string in either the
// "namespace" or "name" position of a module registry source address.
func parseModuleRegistryName(given string) (string, error) {
// Similar to the names in provider source addresses, we defined these
// to be compatible with what filesystems and typical remote systems
// like GitHub allow in names. Unfortunately we didn't end up defining
// these exactly equivalently: provider names can only use dashes as
// punctuation, whereas module names can use underscores. So here we're
// using some regular expressions from the original module source
// implementation, rather than using the IDNA rules as we do in
// ParseProviderPart.
if !moduleRegistryNamePattern.MatchString(given) {
return "", fmt.Errorf("must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix")
}
// We also skip normalizing the name to lowercase, because we historically
// didn't do that and so existing module registries might be doing
// case-sensitive matching.
return given, nil
}
// parseModuleRegistryTargetSystem validates and normalizes a string in the
// "target system" position of a module registry source address. This is
// what we historically called "provider" but never actually enforced as
// being a provider address, and now _cannot_ be a provider address because
// provider addresses have three slash-separated components of their own.
func parseModuleRegistryTargetSystem(given string) (string, error) {
// Similar to the names in provider source addresses, we defined these
// to be compatible with what filesystems and typical remote systems
// like GitHub allow in names. Unfortunately we didn't end up defining
// these exactly equivalently: provider names can't use dashes or
// underscores. So here we're using some regular expressions from the
// original module source implementation, rather than using the IDNA rules
// as we do in ParseProviderPart.
if !moduleRegistryTargetSystemPattern.MatchString(given) {
return "", fmt.Errorf("must be between one and 64 ASCII letters or digits")
}
// We also skip normalizing the name to lowercase, because we historically
// didn't do that and so existing module registries might be doing
// case-sensitive matching.
return given, nil
}
func (s ModuleSourceRegistry) moduleSource() {}
func (s ModuleSourceRegistry) String() string {
if s.Subdir != "" {
return s.PackageAddr.String() + "//" + s.Subdir
}
return s.PackageAddr.String()
}
func (s ModuleSourceRegistry) ForDisplay() string {
if s.Subdir != "" {
return s.PackageAddr.ForDisplay() + "//" + s.Subdir
}
return s.PackageAddr.ForDisplay()
}
// ModuleSourceRemote is a ModuleSource representing a remote location from
// which we can retrieve a module package.
//
// A ModuleSourceRemote can optionally include a "subdirectory" path, which
// means that it's selecting a sub-directory of the given package to use as
// the entry point into the package.
type ModuleSourceRemote struct {
// PackageAddr is the address of the remote package that the requested
// module belongs to.
PackageAddr ModulePackage
// If Subdir is non-empty then it represents a sub-directory within the
// remote package which will serve as the entry-point for the package.
//
// Subdir uses a normalized forward-slash-based path syntax within the
// virtual filesystem represented by the final package. It will never
// include `../` or `./` sequences.
Subdir string
}
func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) {
var subDir string
raw, subDir = getmodules.SplitPackageSubdir(raw)
if strings.HasPrefix(subDir, "../") {
return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir)
}
// A remote source address is really just a go-getter address resulting
// from go-getter's "detect" phase, which adds on the prefix specifying
// which protocol it should use and possibly also adjusts the
// protocol-specific part into different syntax.
//
// Note that for historical reasons this can potentially do network
// requests in order to disambiguate certain address types, although
// that's a legacy thing that is only for some specific, less-commonly-used
// address types. Most just do local string manipulation. We should
// aim to remove the network requests over time, if possible.
norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw)
if err != nil {
// We must pass through the returned error directly here because
// the getmodules package has some special error types it uses
// for certain cases where the UI layer might want to include a
// more helpful error message.
return ModuleSourceRemote{}, err
}
if moreSubDir != "" {
switch {
case subDir != "":
// The detector's own subdir goes first, because the
// subdir we were given is conceptually relative to
// the subdirectory that we just detected.
subDir = path.Join(moreSubDir, subDir)
default:
subDir = path.Clean(moreSubDir)
}
if strings.HasPrefix(subDir, "../") {
// This would suggest a bug in a go-getter detector, but
// we'll catch it anyway to avoid doing something confusing
// downstream.
return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm)
}
}
return ModuleSourceRemote{
PackageAddr: ModulePackage(norm),
Subdir: subDir,
}, nil
}
func (s ModuleSourceRemote) moduleSource() {}
func (s ModuleSourceRemote) String() string {
if s.Subdir != "" {
return s.PackageAddr.String() + "//" + s.Subdir
}
return s.PackageAddr.String()
}
func (s ModuleSourceRemote) ForDisplay() string {
// The two string representations are identical for this address type.
// This isn't really entirely true to the idea of "ForDisplay" since
// it'll often include some additional components added in by the
// go-getter detectors, but we don't have any function to turn a
// "detected" string back into an idiomatic shorthand the user might've
// entered.
return s.String()
}
// FromRegistry can be called on a remote source address that was returned
// from a module registry, passing in the original registry source address
// that the registry was asked about, in order to get the effective final
// remote source address.
//
// Specifically, this method handles the situations where one or both of
// the two addresses contain subdirectory paths, combining both when necessary
// in order to ensure that both the registry's given path and the user's
// given path are both respected.
//
// This will return nonsense if given a registry address other than the one
// that generated the reciever via a registry lookup.
func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote {
ret := s // not a pointer, so this is a shallow copy
switch {
case s.Subdir != "" && given.Subdir != "":
ret.Subdir = path.Join(s.Subdir, given.Subdir)
case given.Subdir != "":
ret.Subdir = given.Subdir
}
return ret
}