diff --git a/internal/getmodules/doc.go b/internal/getmodules/doc.go new file mode 100644 index 000000000..9c7299842 --- /dev/null +++ b/internal/getmodules/doc.go @@ -0,0 +1,8 @@ +// Package getmodules contains the low-level functionality for fetching +// remote module packages. It's essentially just a thin wrapper around +// go-getter. +// +// This package is only for remote module source addresses, not for local +// or registry source addresses. The other address types are handled +// elsewhere. +package getmodules diff --git a/internal/getmodules/getter.go b/internal/getmodules/getter.go new file mode 100644 index 000000000..44dc8254b --- /dev/null +++ b/internal/getmodules/getter.go @@ -0,0 +1,161 @@ +package getmodules + +import ( + "fmt" + "log" + "os" + + cleanhttp "github.com/hashicorp/go-cleanhttp" + getter "github.com/hashicorp/go-getter" + "github.com/hashicorp/terraform/internal/copy" +) + +// We configure our own go-getter detector and getter sets here, because +// the set of sources we support is part of Terraform's documentation and +// so we don't want any new sources introduced in go-getter to sneak in here +// and work even though they aren't documented. This also insulates us from +// any meddling that might be done by other go-getter callers linked into our +// executable. +// +// Note that over time we've found go-getter's design to be not wholly fit +// for Terraform's purposes in various ways, and so we're continuing to use +// it here because our backward compatibility with earlier versions depends +// on it, but we use go-getter very carefully and always only indirectly via +// the public API of this package so that we can get the subset of the +// go-getter functionality we need while working around some of the less +// helpful parts of its design. See the comments in various other functions +// in this package which call into go-getter for more information on what +// tradeoffs we're making here. + +var goGetterDetectors = []getter.Detector{ + new(getter.GitHubDetector), + new(getter.GitDetector), + + // Because historically BitBucket supported both Git and Mercurial + // repositories but used the same repository URL syntax for both, + // this detector takes the unusual step of actually reaching out + // to the BitBucket API to recognize the repository type. That + // means there's the possibility of an outgoing network request + // inside what is otherwise normally just a local string manipulation + // operation, but we continue to accept this for now. + // + // Perhaps a future version of go-getter will remove the check now + // that BitBucket only supports Git anyway. Aside from this historical + // exception, we should avoid adding any new detectors that make network + // requests in here, and limit ourselves only to ones that can operate + // entirely through local string manipulation. + new(getter.BitBucketDetector), + + new(getter.GCSDetector), + new(getter.S3Detector), + new(getter.FileDetector), +} + +var goGetterNoDetectors = []getter.Detector{} + +var goGetterDecompressors = map[string]getter.Decompressor{ + "bz2": new(getter.Bzip2Decompressor), + "gz": new(getter.GzipDecompressor), + "xz": new(getter.XzDecompressor), + "zip": new(getter.ZipDecompressor), + + "tar.bz2": new(getter.TarBzip2Decompressor), + "tar.tbz2": new(getter.TarBzip2Decompressor), + + "tar.gz": new(getter.TarGzipDecompressor), + "tgz": new(getter.TarGzipDecompressor), + + "tar.xz": new(getter.TarXzDecompressor), + "txz": new(getter.TarXzDecompressor), +} + +var goGetterGetters = map[string]getter.Getter{ + "file": new(getter.FileGetter), + "gcs": new(getter.GCSGetter), + "git": new(getter.GitGetter), + "hg": new(getter.HgGetter), + "s3": new(getter.S3Getter), + "http": getterHTTPGetter, + "https": getterHTTPGetter, +} + +var getterHTTPClient = cleanhttp.DefaultClient() + +var getterHTTPGetter = &getter.HttpGetter{ + Client: getterHTTPClient, + Netrc: true, +} + +// A reusingGetter is a helper for the module installer that remembers +// the final resolved addresses of all of the sources it has already been +// asked to install, and will copy from a prior installation directory if +// it has the same resolved source address. +// +// The keys in a reusingGetter are the normalized (post-detection) package +// addresses, and the values are the paths where each source was previously +// installed. (Users of this map should treat the keys as addrs.ModulePackage +// values, but we can't type them that way because the addrs package +// imports getmodules in order to indirectly access our go-getter +// configuration.) +type reusingGetter map[string]string + +// getWithGoGetter fetches the package at the given address into the given +// target directory. The given address must already be in normalized form +// (using NormalizePackageAddress) or else the behavior is undefined. +// +// This function deals only in entire packages, so it's always the caller's +// responsibility to handle any subdirectory specification and select a +// suitable subdirectory of the given installation directory after installation +// has succeeded. +// +// This function would ideally accept packageAddr as a value of type +// addrs.ModulePackage, but we can't do that because the addrs package +// depends on this package for package address parsing. Therefore we just +// use a string here but assume that the caller got that value by calling +// the String method on a valid addrs.ModulePackage value. +// +// The errors returned by this function are those surfaced by the underlying +// go-getter library, which have very inconsistent quality as +// end-user-actionable error messages. At this time we do not have any +// reasonable way to improve these error messages at this layer because +// the underlying errors are not separately recognizable. +func (g reusingGetter) getWithGoGetter(instPath, packageAddr string) error { + var err error + + if prevDir, exists := g[packageAddr]; exists { + log.Printf("[TRACE] getmodules: copying previous install of %q from %s to %s", packageAddr, prevDir, instPath) + err := os.Mkdir(instPath, os.ModePerm) + if err != nil { + return fmt.Errorf("failed to create directory %s: %s", instPath, err) + } + err = copy.CopyDir(instPath, prevDir) + if err != nil { + return fmt.Errorf("failed to copy from %s to %s: %s", prevDir, instPath, err) + } + } else { + log.Printf("[TRACE] getmodules: fetching %q to %q", packageAddr, instPath) + client := getter.Client{ + Src: packageAddr, + Dst: instPath, + Pwd: instPath, + + Mode: getter.ClientModeDir, + + Detectors: goGetterNoDetectors, // our caller should've already done detection + Decompressors: goGetterDecompressors, + Getters: goGetterGetters, + } + err = client.Get() + if err != nil { + return err + } + // Remember where we installed this so we might reuse this directory + // on subsequent calls to avoid re-downloading. + g[packageAddr] = instPath + } + + // If we get down here then we've either downloaded the package or + // copied a previous tree we downloaded, and so either way we should + // have got the full module package structure written into instPath. + return nil +} diff --git a/internal/getmodules/installer.go b/internal/getmodules/installer.go new file mode 100644 index 000000000..a49ee0be8 --- /dev/null +++ b/internal/getmodules/installer.go @@ -0,0 +1,34 @@ +package getmodules + +// PackageFetcher is a low-level utility for fetching remote module packages +// into local filesystem directories in preparation for use by higher-level +// module installer functionality implemented elsewhere. +// +// A PackageFetcher works only with entire module packages and never with +// the individual modules within a package. +// +// A particular PackageFetcher instance remembers the target directory of +// any successfully-installed package so that it can optimize future calls +// that have the same package address by copying the local directory tree, +// rather than fetching the package from its origin repeatedly. There is +// no way to reset this cache, so a particular PackageFetcher instance should +// live only for the duration of a single initialization process. +type PackageFetcher struct { + getter reusingGetter +} + +// FetchPackage downloads or otherwise retrieves the filesystem inside the +// package at the given address into the given local installation directory. +// +// packageAddr must be formatted as if it were the result of an +// addrs.ModulePackage.String() call. It's only defined as a raw string here +// because the getmodules package can't import the addrs package due to +// that creating a package dependency cycle. +// +// PackageFetcher only works with entire packages. If the caller is processing +// a module source address which includes a subdirectory portion then the +// caller must resolve that itself, possibly with the help of the +// getmodules.SplitPackageSubdir and getmodules.ExpandSubdirGlobs functions. +func (f *PackageFetcher) FetchPackage(instDir string, packageAddr string) error { + return f.getter.getWithGoGetter(instDir, packageAddr) +} diff --git a/internal/getmodules/package.go b/internal/getmodules/package.go new file mode 100644 index 000000000..d0e46dbab --- /dev/null +++ b/internal/getmodules/package.go @@ -0,0 +1,61 @@ +package getmodules + +import ( + getter "github.com/hashicorp/go-getter" +) + +// NormalizePackageAddress uses the go-getter "detector" functionality in +// order to turn a user-supplied source address into a normalized address +// which always includes a prefix naming a protocol to fetch with and may +// also include a transformed/normalized version of the protocol-specific +// source address included afterward. +// +// This is part of the implementation of addrs.ParseModulePackage and of +// addrs.ParseModuleSource, so for most callers it'd be better to call +// one of those other functions instead. The addrs package can potentially +// perform other processing in addition to just the go-getter detection. +// +// Note that this function expects to recieve only a package address, not +// a full source address that might also include a subdirectory portion. +// The caller must trim off any subdirectory portion using +// getmodules.SplitPackageSubdir before calling this function, passing in +// just the packageAddr return value, or the result will be incorrect. +// +// The detectors in go-getter can potentially introduce their own +// package subdirectory portions. If that happens then this function will +// return the subdirectory portion as a non-empty subDir return value, +// which the caller must then use as a prefix for any subDir it already +// extracted from the user's given package address. +// +// Some of go-getter's detectors make outgoing HTTP requests, and so +// the behavior of this function may depend on the network connectivity +// of the system where Terraform is running. However, most of the getters +// we use are local-only, and so HTTP requests are only for some ambiguous +// edge-cases, such as the BitBucket detector which has a mechanism to +// detect whether to use Git or Mercurial, because earlier versions of +// BitBucket used to support both. +func NormalizePackageAddress(given string) (packageAddr, subDir string, err error) { + // NOTE: We're passing an empty string to the "current working directory" + // here because that's only relevant for relative filesystem paths, + // but Terraform handles relative filesystem paths itself outside of + // go-getter and so it'd always be an error to pass one into here. + // go-getter's "file" detector returns an error if it encounters a + // relative path when the pwd argument is empty. + // + // (Absolute filesystem paths _are_ valid though, for annoying historical + // reasons, and we treat them as remote packages even though "downloading" + // them just means a recursive copy of the source directory tree.) + + result, err := getter.Detect(given, "", goGetterDetectors) + if err != nil { + // NOTE: go-getter's error messages are of very inconsistent quality + // and many are not suitable for an end-user audience, but they are all + // just strings and so we can't really do any sort of post-processing + // to improve them and thus we just accept some bad error messages for + // now. + return "", "", err + } + + packageAddr, subDir = SplitPackageSubdir(result) + return packageAddr, subDir, nil +} diff --git a/internal/getmodules/subdir.go b/internal/getmodules/subdir.go new file mode 100644 index 000000000..38d398f7b --- /dev/null +++ b/internal/getmodules/subdir.go @@ -0,0 +1,57 @@ +package getmodules + +import ( + "path" + + getter "github.com/hashicorp/go-getter" +) + +// SplitPackageSubdir detects whether the given address string has a +// subdirectory portion, and if so returns a non-empty subDir string +// along with the trimmed package address. +// +// If the given string doesn't have a subdirectory portion then it'll +// just be returned verbatim in packageAddr, with an empty subDir value. +// +// Although the rest of this package is focused only on direct remote +// module packages, this particular function and its companion +// ExpandSubdirGlobs are both also relevant for registry-based module +// addresses, because a registry translates such an address into a +// remote module package address and thus can contribute its own +// additions to the final subdirectory selection. +func SplitPackageSubdir(given string) (packageAddr, subDir string) { + // We delegate this mostly to go-getter, because older Terraform + // versions just used go-getter directly and so we need to preserve + // its various quirks for compatibility reasons. + // + // However, note that in Terraform we _always_ split off the subdirectory + // portion and handle it within Terraform-level code, _never_ passing + // a subdirectory portion down into go-getter's own Get function, because + // Terraform's ability to refer between local paths inside the same + // package depends on Terraform itself always being aware of where the + // package's root directory ended up on disk, and always needs the + // package installed wholesale. + packageAddr, subDir = getter.SourceDirSubdir(given) + if subDir != "" { + subDir = path.Clean(subDir) + } + return packageAddr, subDir +} + +// ExpandSubdirGlobs handles a subdir string that might contain glob syntax, +// turning it into a concrete subdirectory path by referring to the actual +// files on disk in the given directory which we assume contains the content +// of whichever package this is a subdirectory glob for. +// +// Subdir globs are used, for example, when a module registry wants to specify +// to select the contents of the single directory at the root of a conventional +// tar archive but it doesn't actually know the exact name of that directory. +// In that case it might specify a subdir of just "*", which this function +// will then expand into the single subdirectory found inside instDir, or +// return an error if the result would be ambiguous. +func ExpandSubdirGlobs(instDir string, subDir string) (string, error) { + // We just delegate this entirely to go-getter, because older Terraform + // versions just used go-getter directly and so we need to preserve + // its various quirks for compatibility reasons. + return getter.SubdirGlob(instDir, subDir) +}