terraform/internal/getproviders/hash.go

452 lines
17 KiB
Go

package getproviders
import (
"crypto/sha256"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"golang.org/x/mod/sumdb/dirhash"
)
// Hash is a specially-formatted string representing a checksum of a package
// or the contents of the package.
//
// A Hash string is always starts with a scheme, which is a short series of
// alphanumeric characters followed by a colon, and then the remainder of the
// string has a different meaning depending on the scheme prefix.
//
// The currently-valid schemes are defined as the constants of type HashScheme
// in this package.
//
// Callers outside of this package must not create Hash values via direct
// conversion. Instead, use either the HashScheme.New method on one of the
// HashScheme contents (for a hash of a particular scheme) or the ParseHash
// function (if hashes of any scheme are acceptable).
type Hash string
// NilHash is the zero value of Hash. It isn't a valid hash, so all of its
// methods will panic.
const NilHash = Hash("")
// ParseHash parses the string representation of a Hash into a Hash value.
//
// A particular version of Terraform only supports a fixed set of hash schemes,
// but this function intentionally allows unrecognized schemes so that we can
// silently ignore other schemes that may be introduced in the future. For
// that reason, the Scheme method of the returned Hash may return a value that
// isn't in one of the HashScheme constants in this package.
//
// This function doesn't verify that the value portion of the given hash makes
// sense for the given scheme. Invalid values are just considered to not match
// any packages.
//
// If this function returns an error then the returned Hash is invalid and
// must not be used.
func ParseHash(s string) (Hash, error) {
colon := strings.Index(s, ":")
if colon < 1 { // 1 because a zero-length scheme is not allowed
return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon")
}
return Hash(s), nil
}
// MustParseHash is a wrapper around ParseHash that panics if it returns an
// error.
func MustParseHash(s string) Hash {
hash, err := ParseHash(s)
if err != nil {
panic(err.Error())
}
return hash
}
// Scheme returns the scheme of the recieving hash. If the receiver is not
// using valid syntax then this method will panic.
func (h Hash) Scheme() HashScheme {
colon := strings.Index(string(h), ":")
if colon < 0 {
panic(fmt.Sprintf("invalid hash string %q", h))
}
return HashScheme(h[:colon+1])
}
// HasScheme returns true if the given scheme matches the receiver's scheme,
// or false otherwise.
//
// If the receiver is not using valid syntax then this method will panic.
func (h Hash) HasScheme(want HashScheme) bool {
return h.Scheme() == want
}
// Value returns the scheme-specific value from the recieving hash. The
// meaning of this value depends on the scheme.
//
// If the receiver is not using valid syntax then this method will panic.
func (h Hash) Value() string {
colon := strings.Index(string(h), ":")
if colon < 0 {
panic(fmt.Sprintf("invalid hash string %q", h))
}
return string(h[colon+1:])
}
// String returns a string representation of the receiving hash.
func (h Hash) String() string {
return string(h)
}
// GoString returns a Go syntax representation of the receiving hash.
//
// This is here primarily to help with producing descriptive test failure
// output; these results are not particularly useful at runtime.
func (h Hash) GoString() string {
if h == NilHash {
return "getproviders.NilHash"
}
switch scheme := h.Scheme(); scheme {
case HashScheme1:
return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value())
case HashSchemeZip:
return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value())
default:
// This fallback is for when we encounter lock files or API responses
// with hash schemes that the current version of Terraform isn't
// familiar with. They were presumably introduced in a later version.
return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value())
}
}
// HashScheme is an enumeration of schemes that are allowed for values of type
// Hash.
type HashScheme string
const (
// HashScheme1 is the scheme identifier for the first hash scheme.
//
// Use HashV1 (or one of its wrapper functions) to calculate hashes with
// this scheme.
HashScheme1 HashScheme = HashScheme("h1:")
// HashSchemeZip is the scheme identifier for the legacy hash scheme that
// applies to distribution archives (.zip files) rather than package
// contents, and can therefore only be verified against the original
// distribution .zip file, not an extracted directory.
//
// Use PackageHashLegacyZipSHA to calculate hashes with this scheme.
HashSchemeZip HashScheme = HashScheme("zh:")
)
// New creates a new Hash value with the receiver as its scheme and the given
// raw string as its value.
//
// It's the caller's responsibility to make sure that the given value makes
// sense for the selected scheme.
func (hs HashScheme) New(value string) Hash {
return Hash(string(hs) + value)
}
// PackageHash computes a hash of the contents of the package at the given
// location, using whichever hash algorithm is the current default.
//
// Currently, this method returns version 1 hashes as produced by the
// function PackageHashV1, but this function may switch to other versions in
// later releases. Call PackageHashV1 directly if you specifically need a V1
// hash.
//
// PackageHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageHash(loc PackageLocation) (Hash, error) {
return PackageHashV1(loc)
}
// PackageMatchesHash returns true if the package at the given location matches
// the given hash, or false otherwise.
//
// If it cannot read from the given location, or if the given hash is in an
// unsupported format, PackageMatchesHash returns an error.
//
// There is currently only one hash format, as implemented by HashV1. However,
// if others are introduced in future PackageMatchesHash may accept multiple
// formats, and may generate errors for any formats that become obsolete.
//
// PackageMatchesHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) {
switch want.Scheme() {
case HashScheme1:
got, err := PackageHashV1(loc)
if err != nil {
return false, err
}
return got == want, nil
case HashSchemeZip:
archiveLoc, ok := loc.(PackageLocalArchive)
if !ok {
return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`)
}
got, err := PackageHashLegacyZipSHA(archiveLoc)
if err != nil {
return false, err
}
return got == want, nil
default:
return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)")
}
}
// PackageMatchesAnyHash returns true if the package at the given location
// matches at least one of the given hashes, or false otherwise.
//
// If it cannot read from the given location, PackageMatchesAnyHash returns an
// error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash
// considers unsupported hash formats as successfully non-matching, rather
// than returning an error.
//
// PackageMatchesAnyHash can be used only with the two local package location
// types PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) {
// It's likely that we'll have multiple hashes of the same scheme in
// the "allowed" set, in which case we'll avoid repeatedly re-reading the
// given package by caching its result for each of the two
// currently-supported hash formats. These will be NilHash until we
// encounter the first hash of the corresponding scheme.
var v1Hash, zipHash Hash
for _, want := range allowed {
switch want.Scheme() {
case HashScheme1:
if v1Hash == NilHash {
got, err := PackageHashV1(loc)
if err != nil {
return false, err
}
v1Hash = got
}
if v1Hash == want {
return true, nil
}
case HashSchemeZip:
archiveLoc, ok := loc.(PackageLocalArchive)
if !ok {
// A zip hash can never match an unpacked directory
continue
}
if zipHash == NilHash {
got, err := PackageHashLegacyZipSHA(archiveLoc)
if err != nil {
return false, err
}
zipHash = got
}
if zipHash == want {
return true, nil
}
default:
// If it's not a supported format then it can't match.
continue
}
}
return false, nil
}
// PreferredHashes examines all of the given hash strings and returns the one
// that the current version of Terraform considers to provide the strongest
// verification.
//
// Returns an empty string if none of the given hashes are of a supported
// format. If PreferredHash returns a non-empty string then it will be one
// of the hash strings in "given", and that hash is the one that must pass
// verification in order for a package to be considered valid.
func PreferredHashes(given []Hash) []Hash {
// For now this is just filtering for the two hash formats we support,
// both of which are considered equally "preferred". If we introduce
// a new scheme like "h2:" in future then, depending on the characteristics
// of that new version, it might make sense to rework this function so
// that it only returns "h1:" hashes if the input has no "h2:" hashes,
// so that h2: is preferred when possible and h1: is only a fallback for
// interacting with older systems that haven't been updated with the new
// scheme yet.
var ret []Hash
for _, hash := range given {
switch hash.Scheme() {
case HashScheme1, HashSchemeZip:
ret = append(ret, hash)
}
}
return ret
}
// PackageHashLegacyZipSHA implements the old provider package hashing scheme
// of taking a SHA256 hash of the containing .zip archive itself, rather than
// of the contents of the archive.
//
// The result is a hash string with the "zh:" prefix, which is intended to
// represent "zip hash". After the prefix is a lowercase-hex encoded SHA256
// checksum, intended to exactly match the formatting used in the registry
// API (apart from the prefix) so that checksums can be more conveniently
// compared by humans.
//
// Because this hashing scheme uses the official provider .zip file as its
// input, it accepts only PackageLocalArchive locations.
func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) {
archivePath, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
f, err := os.Open(archivePath)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
_, err = io.Copy(h, f)
if err != nil {
return "", err
}
gotHash := h.Sum(nil)
return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil
}
// HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string
// hash format from an already-calculated hash of a provider .zip archive.
//
// This just adds the "zh:" prefix and encodes the string in hex, so that the
// result is in the same format as PackageHashLegacyZipSHA.
func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash {
return HashSchemeZip.New(fmt.Sprintf("%x", sum[:]))
}
// PackageHashV1 computes a hash of the contents of the package at the given
// location using hash algorithm 1. The resulting Hash is guaranteed to have
// the scheme HashScheme1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// This function is named "PackageHashV1" in anticipation of other hashing
// algorithms being added in a backward-compatible way in future. The result
// from PackageHashV1 always begins with the prefix "h1:" so that callers can
// distinguish the results of potentially multiple different hash algorithms in
// future.
//
// PackageHashV1 can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageHashV1(loc PackageLocation) (Hash, error) {
// Our HashV1 is really just the Go Modules hash version 1, which is
// sufficient for our needs and already well-used for identity of
// Go Modules distribution packages. It is also blocked from incompatible
// changes by being used in a wide array of go.sum files already.
//
// In particular, it also supports computing an equivalent hash from
// an unpacked zip file, which is not important for Terraform workflow
// today but is likely to become so in future if we adopt a top-level
// lockfile mechanism that is intended to be checked in to version control,
// rather than just a transient lock for a particular local cache directory.
// (In that case we'd need to check hashes of _packed_ packages, too.)
//
// Internally, dirhash.Hash1 produces a string containing a sequence of
// newline-separated path+filehash pairs for all of the files in the
// directory, and then finally produces a hash of that string to return.
// In both cases, the hash algorithm is SHA256.
switch loc := loc.(type) {
case PackageLocalDir:
// We'll first dereference a possible symlink at our PackageDir location,
// as would be created if this package were linked in from another cache.
packageDir, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
// The dirhash.HashDir result is already in our expected h1:...
// format, so we can just convert directly to Hash.
s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1)
return Hash(s), err
case PackageLocalArchive:
archivePath, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
// The dirhash.HashDir result is already in our expected h1:...
// format, so we can just convert directly to Hash.
s, err := dirhash.HashZip(archivePath, dirhash.Hash1)
return Hash(s), err
default:
return "", fmt.Errorf("cannot hash package at %s", loc.String())
}
}
// Hash computes a hash of the contents of the package at the location
// associated with the reciever, using whichever hash algorithm is the current
// default.
//
// This method will change to use new hash versions as they are introduced
// in future. If you need a specific hash version, call the method for that
// version directly instead, such as HashV1.
//
// Hash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) Hash() (Hash, error) {
return PackageHash(m.Location)
}
// MatchesHash returns true if the package at the location associated with
// the receiver matches the given hash, or false otherwise.
//
// If it cannot read from the given location, or if the given hash is in an
// unsupported format, MatchesHash returns an error.
//
// MatchesHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) MatchesHash(want Hash) (bool, error) {
return PackageMatchesHash(m.Location, want)
}
// MatchesAnyHash returns true if the package at the location associated with
// the receiver matches at least one of the given hashes, or false otherwise.
//
// If it cannot read from the given location, MatchesHash returns an error.
// Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported
// hash format to be a successful non-match.
func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) {
return PackageMatchesAnyHash(m.Location, acceptable)
}
// HashV1 computes a hash of the contents of the package at the location
// associated with the receiver using hash algorithm 1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// HashV1 can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) HashV1() (Hash, error) {
return PackageHashV1(m.Location)
}