getmodules: Re-allow git:: source with ref=COMMIT_ID

Earlier versions of this code allowed "ref" to take any value that would
be accepted by "git checkout" as a valid target of a symbolic ref. We
inadvertently accepted a breaking change to upstream go-getter that broke
that as part of introducing a shallow clone optimization, because shallow
clone requires selecting a single branch.

To restore the previous capabilities while retaining the "depth" argument,
here we accept a compromise where "ref" has the stronger requirement of
being a valid named ref in the remote repository if and only if "depth"
is set to a value greater than zero. If depth isn't set or is less than
one, we will do the old behavior of just cloning all of the refs in the
remote repository in full and then switching to refer to the selected
branch, tag, or naked commit ID as a separate step.

This includes a heuristic to generate an additional error message hint if
we get an error from "git clone" and it looks like the user might've been
trying to use "depth" and "ref=COMMIT" together. We can't recognize that
error accurately because it's only reported as human-oriented git command
output, but this heuristic should hopefully minimize situations where we
show it inappropriately.

For now this is a change in the Terraform repository directly, so that we
can expedite the fix to an already-reported regression. After this is
released I tend to also submit a similar set of changes to upstream
go-getter, at which point we can revert Terraform to using the upstream
getter.GitGetter instead of our own local fork.
This commit is contained in:
Martin Atkins 2021-12-13 17:14:30 -08:00
parent b0ff17ef2a
commit c4d46e7c6b
2 changed files with 140 additions and 3 deletions

View File

@ -80,7 +80,7 @@ func (g *gitGetter) Get(dst string, u *url.URL) error {
// Extract some query parameters we use // Extract some query parameters we use
var ref, sshKey string var ref, sshKey string
var depth int depth := 0 // 0 means "not set"
q := u.Query() q := u.Query()
if len(q) > 0 { if len(q) > 0 {
ref = q.Get("ref") ref = q.Get("ref")
@ -194,20 +194,54 @@ func (g *gitGetter) checkout(dst string, ref string) error {
return getRunCommand(cmd) return getRunCommand(cmd)
} }
// gitCommitIDRegex is a pattern intended to match strings that seem
// "likely to be" git commit IDs, rather than named refs. This cannot be
// an exact decision because it's valid to name a branch or tag after a series
// of hexadecimal digits too.
//
// We require at least 7 digits here because that's the smallest size git
// itself will typically generate, and so it'll reduce the risk of false
// positives on short branch names that happen to also be "hex words".
var gitCommitIDRegex = regexp.MustCompile("^[0-9a-fA-F]{7,40}$")
func (g *gitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error { func (g *gitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error {
args := []string{"clone"} args := []string{"clone"}
autoBranch := false
if ref == "" { if ref == "" {
ref = findRemoteDefaultBranch(u) ref = findRemoteDefaultBranch(u)
autoBranch = true
} }
if depth > 0 { if depth > 0 {
args = append(args, "--depth", strconv.Itoa(depth)) args = append(args, "--depth", strconv.Itoa(depth))
args = append(args, "--branch", ref)
} }
args = append(args, u.String(), dst)
args = append(args, "--branch", ref, u.String(), dst)
cmd := exec.CommandContext(ctx, "git", args...) cmd := exec.CommandContext(ctx, "git", args...)
setupGitEnv(cmd, sshKeyFile) setupGitEnv(cmd, sshKeyFile)
return getRunCommand(cmd) err := getRunCommand(cmd)
if err != nil {
if depth > 0 && !autoBranch {
// If we're creating a shallow clone then the given ref must be
// a named ref (branch or tag) rather than a commit directly.
// We can't accurately recognize the resulting error here without
// hard-coding assumptions about git's human-readable output, but
// we can at least try a heuristic.
if gitCommitIDRegex.MatchString(ref) {
return fmt.Errorf("%w (note that setting 'depth' requires 'ref' to be a branch or tag name)", err)
}
}
return err
}
if depth < 1 && !autoBranch {
// If we didn't add --depth and --branch above then we will now be
// on the remote repository's default branch, rather than the selected
// ref, so we'll need to fix that before we return.
return g.checkout(dst, ref)
}
return nil
} }
func (g *gitGetter) update(ctx context.Context, dst, sshKeyFile, ref string, depth int) error { func (g *gitGetter) update(ctx context.Context, dst, sshKeyFile, ref string, depth int) error {

View File

@ -90,6 +90,58 @@ func TestGitGetter_branch(t *testing.T) {
} }
} }
func TestGitGetter_commitID(t *testing.T) {
if !testHasGit {
t.Skip("git not found, skipping")
}
g := new(gitGetter)
dst := tempDir(t)
// We're going to create different content on the main branch vs.
// another branch here, so that below we can recognize if we
// correctly cloned the commit actually requested (from the
// "other branch"), not the one at HEAD.
repo := testGitRepo(t, "commit_id")
repo.git("checkout", "-b", "main-branch")
repo.commitFile("wrong.txt", "Nope")
repo.git("checkout", "-b", "other-branch")
repo.commitFile("hello.txt", "Yep")
commitID, err := repo.latestCommit()
if err != nil {
t.Fatal(err)
}
// Return to the main branch so that HEAD of this repository
// will be that, rather than "test-branch".
repo.git("checkout", "main-branch")
q := repo.url.Query()
q.Add("ref", commitID)
repo.url.RawQuery = q.Encode()
t.Logf("Getting %s", repo.url)
if err := g.Get(dst, repo.url); err != nil {
t.Fatalf("err: %s", err)
}
// Verify the main file exists
mainPath := filepath.Join(dst, "hello.txt")
if _, err := os.Stat(mainPath); err != nil {
t.Fatalf("err: %s", err)
}
// Get again should work
if err := g.Get(dst, repo.url); err != nil {
t.Fatalf("err: %s", err)
}
// Verify the main file exists
mainPath = filepath.Join(dst, "hello.txt")
if _, err := os.Stat(mainPath); err != nil {
t.Fatalf("err: %s", err)
}
}
func TestGitGetter_remoteWithoutMaster(t *testing.T) { func TestGitGetter_remoteWithoutMaster(t *testing.T) {
if !testHasGit { if !testHasGit {
t.Log("git not found, skipping") t.Log("git not found, skipping")
@ -214,6 +266,44 @@ func TestGitGetter_shallowCloneWithTag(t *testing.T) {
} }
} }
func TestGitGetter_shallowCloneWithCommitID(t *testing.T) {
if !testHasGit {
t.Log("git not found, skipping")
t.Skip()
}
g := new(gitGetter)
dst := tempDir(t)
repo := testGitRepo(t, "upstream")
repo.commitFile("v1.0.txt", "0")
repo.git("tag", "v1.0")
repo.commitFile("v1.1.txt", "1")
commitID, err := repo.latestCommit()
if err != nil {
t.Fatal(err)
}
// Specify a clone depth of 1 with a naked commit ID
// This is intentionally invalid: shallow clone always requires a named ref.
q := repo.url.Query()
q.Add("ref", commitID[:8])
q.Add("depth", "1")
repo.url.RawQuery = q.Encode()
t.Logf("Getting %s", repo.url)
err = g.Get(dst, repo.url)
if err == nil {
t.Fatalf("success; want error")
}
// We use a heuristic to generate an extra hint in the error message if
// it looks like the user was trying to combine ref=COMMIT with depth.
if got, want := err.Error(), "(note that setting 'depth' requires 'ref' to be a branch or tag name)"; !strings.Contains(got, want) {
t.Errorf("missing error message hint\ngot: %s\nwant substring: %s", got, want)
}
}
func TestGitGetter_branchUpdate(t *testing.T) { func TestGitGetter_branchUpdate(t *testing.T) {
if !testHasGit { if !testHasGit {
t.Skip("git not found, skipping") t.Skip("git not found, skipping")
@ -664,6 +754,19 @@ func (r *gitRepo) commitFile(file, content string) {
r.git("commit", "-m", "Adding "+file) r.git("commit", "-m", "Adding "+file)
} }
// latestCommit returns the full commit id of the latest commit on the current
// branch.
func (r *gitRepo) latestCommit() (string, error) {
cmd := exec.Command("git", "rev-parse", "HEAD")
cmd.Dir = r.dir
rawOut, err := cmd.Output()
if err != nil {
return "", err
}
rawOut = bytes.TrimSpace(rawOut)
return string(rawOut), nil
}
// This is a read-only deploy key for an empty test repository. // This is a read-only deploy key for an empty test repository.
// Note: This is split over multiple lines to avoid being disabled by key // Note: This is split over multiple lines to avoid being disabled by key
// scanners automatically. // scanners automatically.