internal/providercache: Hashing of contents of cached packages

For the old-style provider cache directory model we hashed the individual
executable file for each provider. That's no longer appropriate because
we're giving each provider package a whole directory to itself where it
can potentially have many files.

This therefore introduces a new directory-oriented hashing algorithm, and
it's just using the Go Modules directory hashing algorithm directly
because that's already had its cross-platform quirks and other wrinkles
addressed during the Go Modules release process, and is now used
prolifically enough in Go codebases that breaking changes to the upstream
algorithm would be very expensive to the Go ecosystem.

This is also a bit of forward planning, anticipating that later we'll use
hashes in a top-level lock file intended to be checked in to user version
control, and then use those hashes also to verify packages _during_
installation, where we'd need to be able to hash unpacked zip files. The
Go Modules hashing algorithm is already implemented to consistently hash
both a zip file and an unpacked version of that zip file.
This commit is contained in:
Martin Atkins 2020-03-27 15:36:07 -07:00
parent 48bf00a7e2
commit f6a7a4868b
2 changed files with 136 additions and 0 deletions

View File

@ -1,6 +1,12 @@
package providercache
import (
"fmt"
"path/filepath"
"strings"
"golang.org/x/mod/sumdb/dirhash"
"github.com/hashicorp/terraform/addrs"
"github.com/hashicorp/terraform/internal/getproviders"
)
@ -31,3 +37,73 @@ type CachedProvider struct {
// within a particular path string.
ExecutableFile string
}
// Hash computes a hash of the contents of the package directory associated
// with the receiving cached provider, using whichever hash algorithm is
// the current default.
//
// Currently, this method returns version 1 hashes as produced by the
// method HashV1, but this function may switch to other versions in later
// releases. Call HashV1 directly if you specifically need a V1 hash.
func (cp *CachedProvider) Hash() (string, error) {
return cp.HashV1()
}
// MatchesHash returns true if the package on disk matches the given hash,
// or false otherwise. If it cannot traverse the package directory and read
// all of the files in it, or if the hash is in an unsupported format,
// CheckHash returns an error.
//
// There is currently only one hash format, as implemented by HashV1. However,
// if others are introduced in future MatchesHash may accept multiple formats,
// and may generate errors for any formats that become obsolete.
func (cp *CachedProvider) MatchesHash(want string) (bool, error) {
switch {
case strings.HasPrefix(want, "h1"):
got, err := cp.HashV1()
if err != nil {
return false, err
}
return got == want, nil
default:
return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)")
}
}
// HashV1 computes a hash of the contents of the package directory associated
// with the receiving cached provider using hash algorithm 1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// This function is named "HashV1" in anticipation of other hashing algorithms
// being added (in a backward-compatible way) in future. The result from
// HashV1 always begins with the prefix "h1:" so that callers can distinguish
// the results of potentially multiple different hash algorithms in future.
func (cp *CachedProvider) HashV1() (string, error) {
// Our HashV1 is really just the Go Modules hash version 1, which is
// sufficient for our needs and already well-used for identity of
// Go Modules distribution packages. It is also blocked from incompatible
// changes by being used in a wide array of go.sum files already.
//
// In particular, it also supports computing an equivalent hash from
// an unpacked zip file, which is not important for Terraform workflow
// today but is likely to become so in future if we adopt a top-level
// lockfile mechanism that is intended to be checked in to version control,
// rather than just a transient lock for a particular local cache directory.
// (In that case we'd need to check hashes of _packed_ packages, too.)
// We'll first dereference a possible symlink at our PackageDir location,
// as would be created if this package were linked in from another cache.
packageDir, err := filepath.EvalSymlinks(cp.PackageDir)
if err != nil {
return "", err
}
// Internally, dirhash.Hash1 produces a string containing a sequence of
// newline-separated path+filehash pairs for all of the files in the
// directory, and then finally produces a hash of that string to return.
// In both cases, the hash algorithm is SHA256.
return dirhash.HashDir(packageDir, "", dirhash.Hash1)
}

View File

@ -0,0 +1,60 @@
package providercache
import (
"testing"
"github.com/hashicorp/terraform/addrs"
"github.com/hashicorp/terraform/internal/getproviders"
)
func TestCachedProviderHash(t *testing.T) {
cp := &CachedProvider{
Provider: addrs.NewProvider(
addrs.DefaultRegistryHost,
"hashicorp", "null",
),
Version: getproviders.MustParseVersion("2.0.0"),
PackageDir: "testdata/cachedir/registry.terraform.io/hashicorp/null/2.0.0/darwin_amd64",
ExecutableFile: "testdata/cachedir/registry.terraform.io/hashicorp/null/2.0.0/darwin_amd64/terraform-provider-null",
}
want := "h1:qjsREM4DqEWECD43FcPqddZ9oxCG+IaMTxvWPciS05g="
got, err := cp.Hash()
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if got != want {
t.Errorf("wrong Hash result\ngot: %s\nwant: %s", got, want)
}
gotMatches, err := cp.MatchesHash(want)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if wantMatches := true; gotMatches != wantMatches {
t.Errorf("wrong MatchesHash result\ngot: %#v\nwant: %#v", gotMatches, wantMatches)
}
// The windows build has a different hash because its executable filename
// has a .exe suffix, but the darwin build (hashed above) does not.
cp2 := &CachedProvider{
Provider: addrs.NewProvider(
addrs.DefaultRegistryHost,
"hashicorp", "null",
),
Version: getproviders.MustParseVersion("2.0.0"),
PackageDir: "testdata/cachedir/registry.terraform.io/hashicorp/null/2.0.0/windows_amd64",
ExecutableFile: "testdata/cachedir/registry.terraform.io/hashicorp/null/2.0.0/windows_amd64/terraform-provider-null",
}
gotMatches, err = cp2.MatchesHash(want)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if wantMatches := false; gotMatches != wantMatches {
t.Errorf("wrong MatchesHash result for other package\ngot: %#v\nwant: %#v", gotMatches, wantMatches)
}
}