terraform/internal/getproviders/hash.go

452 lines
17 KiB
Go
Raw Permalink Normal View History

package getproviders
import (
"crypto/sha256"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"golang.org/x/mod/sumdb/dirhash"
)
// Hash is a specially-formatted string representing a checksum of a package
// or the contents of the package.
//
// A Hash string is always starts with a scheme, which is a short series of
// alphanumeric characters followed by a colon, and then the remainder of the
// string has a different meaning depending on the scheme prefix.
//
// The currently-valid schemes are defined as the constants of type HashScheme
// in this package.
//
// Callers outside of this package must not create Hash values via direct
// conversion. Instead, use either the HashScheme.New method on one of the
// HashScheme contents (for a hash of a particular scheme) or the ParseHash
// function (if hashes of any scheme are acceptable).
type Hash string
// NilHash is the zero value of Hash. It isn't a valid hash, so all of its
// methods will panic.
const NilHash = Hash("")
// ParseHash parses the string representation of a Hash into a Hash value.
//
// A particular version of Terraform only supports a fixed set of hash schemes,
// but this function intentionally allows unrecognized schemes so that we can
// silently ignore other schemes that may be introduced in the future. For
// that reason, the Scheme method of the returned Hash may return a value that
// isn't in one of the HashScheme constants in this package.
//
// This function doesn't verify that the value portion of the given hash makes
// sense for the given scheme. Invalid values are just considered to not match
// any packages.
//
// If this function returns an error then the returned Hash is invalid and
// must not be used.
func ParseHash(s string) (Hash, error) {
colon := strings.Index(s, ":")
if colon < 1 { // 1 because a zero-length scheme is not allowed
return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon")
}
return Hash(s), nil
}
// MustParseHash is a wrapper around ParseHash that panics if it returns an
// error.
func MustParseHash(s string) Hash {
hash, err := ParseHash(s)
if err != nil {
panic(err.Error())
}
return hash
}
// Scheme returns the scheme of the recieving hash. If the receiver is not
// using valid syntax then this method will panic.
func (h Hash) Scheme() HashScheme {
colon := strings.Index(string(h), ":")
if colon < 0 {
panic(fmt.Sprintf("invalid hash string %q", h))
}
return HashScheme(h[:colon+1])
}
// HasScheme returns true if the given scheme matches the receiver's scheme,
// or false otherwise.
//
// If the receiver is not using valid syntax then this method will panic.
func (h Hash) HasScheme(want HashScheme) bool {
return h.Scheme() == want
}
// Value returns the scheme-specific value from the recieving hash. The
// meaning of this value depends on the scheme.
//
// If the receiver is not using valid syntax then this method will panic.
func (h Hash) Value() string {
colon := strings.Index(string(h), ":")
if colon < 0 {
panic(fmt.Sprintf("invalid hash string %q", h))
}
return string(h[colon+1:])
}
// String returns a string representation of the receiving hash.
func (h Hash) String() string {
return string(h)
}
// GoString returns a Go syntax representation of the receiving hash.
//
// This is here primarily to help with producing descriptive test failure
// output; these results are not particularly useful at runtime.
func (h Hash) GoString() string {
if h == NilHash {
return "getproviders.NilHash"
}
switch scheme := h.Scheme(); scheme {
case HashScheme1:
return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value())
case HashSchemeZip:
return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value())
default:
// This fallback is for when we encounter lock files or API responses
// with hash schemes that the current version of Terraform isn't
// familiar with. They were presumably introduced in a later version.
return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value())
}
}
// HashScheme is an enumeration of schemes that are allowed for values of type
// Hash.
type HashScheme string
const (
// HashScheme1 is the scheme identifier for the first hash scheme.
//
// Use HashV1 (or one of its wrapper functions) to calculate hashes with
// this scheme.
HashScheme1 HashScheme = HashScheme("h1:")
// HashSchemeZip is the scheme identifier for the legacy hash scheme that
// applies to distribution archives (.zip files) rather than package
// contents, and can therefore only be verified against the original
// distribution .zip file, not an extracted directory.
//
// Use PackageHashLegacyZipSHA to calculate hashes with this scheme.
HashSchemeZip HashScheme = HashScheme("zh:")
)
// New creates a new Hash value with the receiver as its scheme and the given
// raw string as its value.
//
// It's the caller's responsibility to make sure that the given value makes
// sense for the selected scheme.
func (hs HashScheme) New(value string) Hash {
return Hash(string(hs) + value)
}
// PackageHash computes a hash of the contents of the package at the given
// location, using whichever hash algorithm is the current default.
//
// Currently, this method returns version 1 hashes as produced by the
// function PackageHashV1, but this function may switch to other versions in
// later releases. Call PackageHashV1 directly if you specifically need a V1
// hash.
//
// PackageHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageHash(loc PackageLocation) (Hash, error) {
return PackageHashV1(loc)
}
// PackageMatchesHash returns true if the package at the given location matches
// the given hash, or false otherwise.
//
// If it cannot read from the given location, or if the given hash is in an
// unsupported format, PackageMatchesHash returns an error.
//
// There is currently only one hash format, as implemented by HashV1. However,
// if others are introduced in future PackageMatchesHash may accept multiple
// formats, and may generate errors for any formats that become obsolete.
//
// PackageMatchesHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) {
switch want.Scheme() {
case HashScheme1:
got, err := PackageHashV1(loc)
if err != nil {
return false, err
}
return got == want, nil
getproviders: Prepare for having multiple valid hashes per package As we continue iterating towards saving valid hashes for a package in a depsfile lock file after installation and verifying them on future installation, this prepares getproviders for the possibility of having multiple valid hashes per package. This will arise in future commits for two reasons: - We will need to support both the legacy "zip hash" hashing scheme and the new-style content-based hashing scheme because currently the registry protocol is only able to produce the legacy scheme, but our other installation sources prefer the content-based scheme. Therefore packages will typically have a mixture of hashes of both types. - Installing from an upstream registry will save the hashes for the packages across all supported platforms, rather than just the current platform, and we'll consider all of those valid for future installation if we see both successful matching of the current platform checksum and a signature verification for the checksums file as a whole. This also includes some more preparation for the second case above in that signatureAuthentication now supports AcceptableHashes and returns all of the zip-based hashes it can find in the checksums file. This is a bit of an abstraction leak because previously that authenticator considered its "document" to just be opaque bytes, but we want to make sure that we can only end up trusting _all_ of the hashes if we've verified that the document is signed. Hopefully we'll make this better in a future commit with some refactoring, but that's deferred for now in order to minimize disruption to existing codepaths while we work towards a provider locking MVP.
2020-09-24 01:23:00 +02:00
case HashSchemeZip:
archiveLoc, ok := loc.(PackageLocalArchive)
if !ok {
return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`)
}
got, err := PackageHashLegacyZipSHA(archiveLoc)
if err != nil {
return false, err
}
return got == want, nil
default:
return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)")
}
}
getproviders: Prepare for having multiple valid hashes per package As we continue iterating towards saving valid hashes for a package in a depsfile lock file after installation and verifying them on future installation, this prepares getproviders for the possibility of having multiple valid hashes per package. This will arise in future commits for two reasons: - We will need to support both the legacy "zip hash" hashing scheme and the new-style content-based hashing scheme because currently the registry protocol is only able to produce the legacy scheme, but our other installation sources prefer the content-based scheme. Therefore packages will typically have a mixture of hashes of both types. - Installing from an upstream registry will save the hashes for the packages across all supported platforms, rather than just the current platform, and we'll consider all of those valid for future installation if we see both successful matching of the current platform checksum and a signature verification for the checksums file as a whole. This also includes some more preparation for the second case above in that signatureAuthentication now supports AcceptableHashes and returns all of the zip-based hashes it can find in the checksums file. This is a bit of an abstraction leak because previously that authenticator considered its "document" to just be opaque bytes, but we want to make sure that we can only end up trusting _all_ of the hashes if we've verified that the document is signed. Hopefully we'll make this better in a future commit with some refactoring, but that's deferred for now in order to minimize disruption to existing codepaths while we work towards a provider locking MVP.
2020-09-24 01:23:00 +02:00
// PackageMatchesAnyHash returns true if the package at the given location
// matches at least one of the given hashes, or false otherwise.
//
// If it cannot read from the given location, PackageMatchesAnyHash returns an
// error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash
// considers unsupported hash formats as successfully non-matching, rather
// than returning an error.
//
// PackageMatchesAnyHash can be used only with the two local package location
// types PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) {
// It's likely that we'll have multiple hashes of the same scheme in
// the "allowed" set, in which case we'll avoid repeatedly re-reading the
// given package by caching its result for each of the two
// currently-supported hash formats. These will be NilHash until we
// encounter the first hash of the corresponding scheme.
var v1Hash, zipHash Hash
for _, want := range allowed {
switch want.Scheme() {
case HashScheme1:
if v1Hash == NilHash {
got, err := PackageHashV1(loc)
if err != nil {
return false, err
}
v1Hash = got
}
if v1Hash == want {
return true, nil
}
case HashSchemeZip:
archiveLoc, ok := loc.(PackageLocalArchive)
if !ok {
// A zip hash can never match an unpacked directory
continue
}
if zipHash == NilHash {
got, err := PackageHashLegacyZipSHA(archiveLoc)
if err != nil {
return false, err
}
zipHash = got
}
if zipHash == want {
return true, nil
}
default:
// If it's not a supported format then it can't match.
continue
}
}
return false, nil
}
// PreferredHashes examines all of the given hash strings and returns the one
// that the current version of Terraform considers to provide the strongest
// verification.
//
// Returns an empty string if none of the given hashes are of a supported
// format. If PreferredHash returns a non-empty string then it will be one
// of the hash strings in "given", and that hash is the one that must pass
// verification in order for a package to be considered valid.
func PreferredHashes(given []Hash) []Hash {
getproviders: Prepare for having multiple valid hashes per package As we continue iterating towards saving valid hashes for a package in a depsfile lock file after installation and verifying them on future installation, this prepares getproviders for the possibility of having multiple valid hashes per package. This will arise in future commits for two reasons: - We will need to support both the legacy "zip hash" hashing scheme and the new-style content-based hashing scheme because currently the registry protocol is only able to produce the legacy scheme, but our other installation sources prefer the content-based scheme. Therefore packages will typically have a mixture of hashes of both types. - Installing from an upstream registry will save the hashes for the packages across all supported platforms, rather than just the current platform, and we'll consider all of those valid for future installation if we see both successful matching of the current platform checksum and a signature verification for the checksums file as a whole. This also includes some more preparation for the second case above in that signatureAuthentication now supports AcceptableHashes and returns all of the zip-based hashes it can find in the checksums file. This is a bit of an abstraction leak because previously that authenticator considered its "document" to just be opaque bytes, but we want to make sure that we can only end up trusting _all_ of the hashes if we've verified that the document is signed. Hopefully we'll make this better in a future commit with some refactoring, but that's deferred for now in order to minimize disruption to existing codepaths while we work towards a provider locking MVP.
2020-09-24 01:23:00 +02:00
// For now this is just filtering for the two hash formats we support,
// both of which are considered equally "preferred". If we introduce
// a new scheme like "h2:" in future then, depending on the characteristics
// of that new version, it might make sense to rework this function so
// that it only returns "h1:" hashes if the input has no "h2:" hashes,
// so that h2: is preferred when possible and h1: is only a fallback for
// interacting with older systems that haven't been updated with the new
// scheme yet.
var ret []Hash
for _, hash := range given {
getproviders: Prepare for having multiple valid hashes per package As we continue iterating towards saving valid hashes for a package in a depsfile lock file after installation and verifying them on future installation, this prepares getproviders for the possibility of having multiple valid hashes per package. This will arise in future commits for two reasons: - We will need to support both the legacy "zip hash" hashing scheme and the new-style content-based hashing scheme because currently the registry protocol is only able to produce the legacy scheme, but our other installation sources prefer the content-based scheme. Therefore packages will typically have a mixture of hashes of both types. - Installing from an upstream registry will save the hashes for the packages across all supported platforms, rather than just the current platform, and we'll consider all of those valid for future installation if we see both successful matching of the current platform checksum and a signature verification for the checksums file as a whole. This also includes some more preparation for the second case above in that signatureAuthentication now supports AcceptableHashes and returns all of the zip-based hashes it can find in the checksums file. This is a bit of an abstraction leak because previously that authenticator considered its "document" to just be opaque bytes, but we want to make sure that we can only end up trusting _all_ of the hashes if we've verified that the document is signed. Hopefully we'll make this better in a future commit with some refactoring, but that's deferred for now in order to minimize disruption to existing codepaths while we work towards a provider locking MVP.
2020-09-24 01:23:00 +02:00
switch hash.Scheme() {
case HashScheme1, HashSchemeZip:
ret = append(ret, hash)
}
}
return ret
}
// PackageHashLegacyZipSHA implements the old provider package hashing scheme
// of taking a SHA256 hash of the containing .zip archive itself, rather than
// of the contents of the archive.
//
// The result is a hash string with the "zh:" prefix, which is intended to
// represent "zip hash". After the prefix is a lowercase-hex encoded SHA256
// checksum, intended to exactly match the formatting used in the registry
// API (apart from the prefix) so that checksums can be more conveniently
// compared by humans.
//
// Because this hashing scheme uses the official provider .zip file as its
// input, it accepts only PackageLocalArchive locations.
func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) {
archivePath, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
f, err := os.Open(archivePath)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
_, err = io.Copy(h, f)
if err != nil {
return "", err
}
gotHash := h.Sum(nil)
return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil
}
// HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string
// hash format from an already-calculated hash of a provider .zip archive.
//
// This just adds the "zh:" prefix and encodes the string in hex, so that the
// result is in the same format as PackageHashLegacyZipSHA.
func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash {
return HashSchemeZip.New(fmt.Sprintf("%x", sum[:]))
}
// PackageHashV1 computes a hash of the contents of the package at the given
// location using hash algorithm 1. The resulting Hash is guaranteed to have
// the scheme HashScheme1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// This function is named "PackageHashV1" in anticipation of other hashing
// algorithms being added in a backward-compatible way in future. The result
// from PackageHashV1 always begins with the prefix "h1:" so that callers can
// distinguish the results of potentially multiple different hash algorithms in
// future.
//
// PackageHashV1 can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageHashV1(loc PackageLocation) (Hash, error) {
// Our HashV1 is really just the Go Modules hash version 1, which is
// sufficient for our needs and already well-used for identity of
// Go Modules distribution packages. It is also blocked from incompatible
// changes by being used in a wide array of go.sum files already.
//
// In particular, it also supports computing an equivalent hash from
// an unpacked zip file, which is not important for Terraform workflow
// today but is likely to become so in future if we adopt a top-level
// lockfile mechanism that is intended to be checked in to version control,
// rather than just a transient lock for a particular local cache directory.
// (In that case we'd need to check hashes of _packed_ packages, too.)
//
// Internally, dirhash.Hash1 produces a string containing a sequence of
// newline-separated path+filehash pairs for all of the files in the
// directory, and then finally produces a hash of that string to return.
// In both cases, the hash algorithm is SHA256.
switch loc := loc.(type) {
case PackageLocalDir:
// We'll first dereference a possible symlink at our PackageDir location,
// as would be created if this package were linked in from another cache.
packageDir, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
// The dirhash.HashDir result is already in our expected h1:...
// format, so we can just convert directly to Hash.
s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1)
return Hash(s), err
case PackageLocalArchive:
archivePath, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
// The dirhash.HashDir result is already in our expected h1:...
// format, so we can just convert directly to Hash.
s, err := dirhash.HashZip(archivePath, dirhash.Hash1)
return Hash(s), err
default:
return "", fmt.Errorf("cannot hash package at %s", loc.String())
}
}
// Hash computes a hash of the contents of the package at the location
// associated with the reciever, using whichever hash algorithm is the current
// default.
//
// This method will change to use new hash versions as they are introduced
// in future. If you need a specific hash version, call the method for that
// version directly instead, such as HashV1.
//
// Hash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) Hash() (Hash, error) {
return PackageHash(m.Location)
}
// MatchesHash returns true if the package at the location associated with
// the receiver matches the given hash, or false otherwise.
//
// If it cannot read from the given location, or if the given hash is in an
// unsupported format, MatchesHash returns an error.
//
// MatchesHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) MatchesHash(want Hash) (bool, error) {
return PackageMatchesHash(m.Location, want)
}
// MatchesAnyHash returns true if the package at the location associated with
// the receiver matches at least one of the given hashes, or false otherwise.
//
// If it cannot read from the given location, MatchesHash returns an error.
// Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported
// hash format to be a successful non-match.
func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) {
return PackageMatchesAnyHash(m.Location, acceptable)
}
// HashV1 computes a hash of the contents of the package at the location
// associated with the receiver using hash algorithm 1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// HashV1 can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) HashV1() (Hash, error) {
return PackageHashV1(m.Location)
}