From c4d46e7c6b9ed577cae3f1ab149e610fcc44a666 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Mon, 13 Dec 2021 17:14:30 -0800 Subject: [PATCH] getmodules: Re-allow git:: source with ref=COMMIT_ID Earlier versions of this code allowed "ref" to take any value that would be accepted by "git checkout" as a valid target of a symbolic ref. We inadvertently accepted a breaking change to upstream go-getter that broke that as part of introducing a shallow clone optimization, because shallow clone requires selecting a single branch. To restore the previous capabilities while retaining the "depth" argument, here we accept a compromise where "ref" has the stronger requirement of being a valid named ref in the remote repository if and only if "depth" is set to a value greater than zero. If depth isn't set or is less than one, we will do the old behavior of just cloning all of the refs in the remote repository in full and then switching to refer to the selected branch, tag, or naked commit ID as a separate step. This includes a heuristic to generate an additional error message hint if we get an error from "git clone" and it looks like the user might've been trying to use "depth" and "ref=COMMIT" together. We can't recognize that error accurately because it's only reported as human-oriented git command output, but this heuristic should hopefully minimize situations where we show it inappropriately. For now this is a change in the Terraform repository directly, so that we can expedite the fix to an already-reported regression. After this is released I tend to also submit a similar set of changes to upstream go-getter, at which point we can revert Terraform to using the upstream getter.GitGetter instead of our own local fork. --- internal/getmodules/git_getter.go | 40 +++++++++- internal/getmodules/git_getter_test.go | 103 +++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 3 deletions(-) diff --git a/internal/getmodules/git_getter.go b/internal/getmodules/git_getter.go index 535112e7e..1b811b8fb 100644 --- a/internal/getmodules/git_getter.go +++ b/internal/getmodules/git_getter.go @@ -80,7 +80,7 @@ func (g *gitGetter) Get(dst string, u *url.URL) error { // Extract some query parameters we use var ref, sshKey string - var depth int + depth := 0 // 0 means "not set" q := u.Query() if len(q) > 0 { ref = q.Get("ref") @@ -194,20 +194,54 @@ func (g *gitGetter) checkout(dst string, ref string) error { return getRunCommand(cmd) } +// gitCommitIDRegex is a pattern intended to match strings that seem +// "likely to be" git commit IDs, rather than named refs. This cannot be +// an exact decision because it's valid to name a branch or tag after a series +// of hexadecimal digits too. +// +// We require at least 7 digits here because that's the smallest size git +// itself will typically generate, and so it'll reduce the risk of false +// positives on short branch names that happen to also be "hex words". +var gitCommitIDRegex = regexp.MustCompile("^[0-9a-fA-F]{7,40}$") + func (g *gitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error { args := []string{"clone"} + autoBranch := false if ref == "" { ref = findRemoteDefaultBranch(u) + autoBranch = true } if depth > 0 { args = append(args, "--depth", strconv.Itoa(depth)) + args = append(args, "--branch", ref) } + args = append(args, u.String(), dst) - args = append(args, "--branch", ref, u.String(), dst) cmd := exec.CommandContext(ctx, "git", args...) setupGitEnv(cmd, sshKeyFile) - return getRunCommand(cmd) + err := getRunCommand(cmd) + if err != nil { + if depth > 0 && !autoBranch { + // If we're creating a shallow clone then the given ref must be + // a named ref (branch or tag) rather than a commit directly. + // We can't accurately recognize the resulting error here without + // hard-coding assumptions about git's human-readable output, but + // we can at least try a heuristic. + if gitCommitIDRegex.MatchString(ref) { + return fmt.Errorf("%w (note that setting 'depth' requires 'ref' to be a branch or tag name)", err) + } + } + return err + } + + if depth < 1 && !autoBranch { + // If we didn't add --depth and --branch above then we will now be + // on the remote repository's default branch, rather than the selected + // ref, so we'll need to fix that before we return. + return g.checkout(dst, ref) + } + return nil } func (g *gitGetter) update(ctx context.Context, dst, sshKeyFile, ref string, depth int) error { diff --git a/internal/getmodules/git_getter_test.go b/internal/getmodules/git_getter_test.go index 79b3e1592..5893c9c8e 100644 --- a/internal/getmodules/git_getter_test.go +++ b/internal/getmodules/git_getter_test.go @@ -90,6 +90,58 @@ func TestGitGetter_branch(t *testing.T) { } } +func TestGitGetter_commitID(t *testing.T) { + if !testHasGit { + t.Skip("git not found, skipping") + } + + g := new(gitGetter) + dst := tempDir(t) + + // We're going to create different content on the main branch vs. + // another branch here, so that below we can recognize if we + // correctly cloned the commit actually requested (from the + // "other branch"), not the one at HEAD. + repo := testGitRepo(t, "commit_id") + repo.git("checkout", "-b", "main-branch") + repo.commitFile("wrong.txt", "Nope") + repo.git("checkout", "-b", "other-branch") + repo.commitFile("hello.txt", "Yep") + commitID, err := repo.latestCommit() + if err != nil { + t.Fatal(err) + } + // Return to the main branch so that HEAD of this repository + // will be that, rather than "test-branch". + repo.git("checkout", "main-branch") + + q := repo.url.Query() + q.Add("ref", commitID) + repo.url.RawQuery = q.Encode() + + t.Logf("Getting %s", repo.url) + if err := g.Get(dst, repo.url); err != nil { + t.Fatalf("err: %s", err) + } + + // Verify the main file exists + mainPath := filepath.Join(dst, "hello.txt") + if _, err := os.Stat(mainPath); err != nil { + t.Fatalf("err: %s", err) + } + + // Get again should work + if err := g.Get(dst, repo.url); err != nil { + t.Fatalf("err: %s", err) + } + + // Verify the main file exists + mainPath = filepath.Join(dst, "hello.txt") + if _, err := os.Stat(mainPath); err != nil { + t.Fatalf("err: %s", err) + } +} + func TestGitGetter_remoteWithoutMaster(t *testing.T) { if !testHasGit { t.Log("git not found, skipping") @@ -214,6 +266,44 @@ func TestGitGetter_shallowCloneWithTag(t *testing.T) { } } +func TestGitGetter_shallowCloneWithCommitID(t *testing.T) { + if !testHasGit { + t.Log("git not found, skipping") + t.Skip() + } + + g := new(gitGetter) + dst := tempDir(t) + + repo := testGitRepo(t, "upstream") + repo.commitFile("v1.0.txt", "0") + repo.git("tag", "v1.0") + repo.commitFile("v1.1.txt", "1") + + commitID, err := repo.latestCommit() + if err != nil { + t.Fatal(err) + } + + // Specify a clone depth of 1 with a naked commit ID + // This is intentionally invalid: shallow clone always requires a named ref. + q := repo.url.Query() + q.Add("ref", commitID[:8]) + q.Add("depth", "1") + repo.url.RawQuery = q.Encode() + + t.Logf("Getting %s", repo.url) + err = g.Get(dst, repo.url) + if err == nil { + t.Fatalf("success; want error") + } + // We use a heuristic to generate an extra hint in the error message if + // it looks like the user was trying to combine ref=COMMIT with depth. + if got, want := err.Error(), "(note that setting 'depth' requires 'ref' to be a branch or tag name)"; !strings.Contains(got, want) { + t.Errorf("missing error message hint\ngot: %s\nwant substring: %s", got, want) + } +} + func TestGitGetter_branchUpdate(t *testing.T) { if !testHasGit { t.Skip("git not found, skipping") @@ -664,6 +754,19 @@ func (r *gitRepo) commitFile(file, content string) { r.git("commit", "-m", "Adding "+file) } +// latestCommit returns the full commit id of the latest commit on the current +// branch. +func (r *gitRepo) latestCommit() (string, error) { + cmd := exec.Command("git", "rev-parse", "HEAD") + cmd.Dir = r.dir + rawOut, err := cmd.Output() + if err != nil { + return "", err + } + rawOut = bytes.TrimSpace(rawOut) + return string(rawOut), nil +} + // This is a read-only deploy key for an empty test repository. // Note: This is split over multiple lines to avoid being disabled by key // scanners automatically.