lang/funcs: File hashing functions stream data from disk

Previously our file hashing functions were backed by the same "read file
into memory" function we use for situations like "file" and "templatefile",
meaning that they'd read the entire file into memory first and then
calculate the hash from that buffer.

All of the hash implementations we use here can calculate hashes from a
sequence of smaller buffer writes though, so there's no actual need for
us to create a file-sized temporary buffer here.

This, then, is a small refactoring of our underlying function into two
parts, where one is responsible for deciding the actual filename to load
opening it, and the other is responsible for buffering the file into
memory. Our hashing functions can then use only the first function and
skip the second.

This then allows us to use io.Copy to stream from the file into the
hashing function in smaller chunks, possibly of a size chosen by the hash
function if it happens to implement io.ReaderFrom.

The new implementation is functionally equivalent to the old but should
use less temporary memory if the user passes a large file to one of the
hashing functions.
This commit is contained in:
Martin Atkins 2021-05-11 14:22:16 -07:00
parent 42e0985839
commit 70fed23be5
2 changed files with 18 additions and 6 deletions

View File

@ -11,6 +11,7 @@ import (
"encoding/hex"
"fmt"
"hash"
"io"
"strings"
uuidv5 "github.com/google/uuid"
@ -243,13 +244,16 @@ func makeFileHashFunction(baseDir string, hf func() hash.Hash, enc func([]byte)
Type: function.StaticReturnType(cty.String),
Impl: func(args []cty.Value, retType cty.Type) (ret cty.Value, err error) {
path := args[0].AsString()
src, err := readFileBytes(baseDir, path)
f, err := openFile(baseDir, path)
if err != nil {
return cty.UnknownVal(cty.String), err
}
h := hf()
h.Write(src)
_, err = io.Copy(h, f)
if err != nil {
return cty.UnknownVal(cty.String), err
}
rv := enc(h.Sum(nil))
return cty.StringVal(rv), nil
},

View File

@ -352,7 +352,7 @@ var PathExpandFunc = function.New(&function.Spec{
},
})
func readFileBytes(baseDir, path string) ([]byte, error) {
func openFile(baseDir, path string) (*os.File, error) {
path, err := homedir.Expand(path)
if err != nil {
return nil, fmt.Errorf("failed to expand ~: %s", err)
@ -365,13 +365,21 @@ func readFileBytes(baseDir, path string) ([]byte, error) {
// Ensure that the path is canonical for the host OS
path = filepath.Clean(path)
src, err := ioutil.ReadFile(path)
return os.Open(path)
}
func readFileBytes(baseDir, path string) ([]byte, error) {
f, err := openFile(baseDir, path)
if err != nil {
// ReadFile does not return Terraform-user-friendly error
// messages, so we'll provide our own.
if os.IsNotExist(err) {
// An extra Terraform-specific hint for this situation
return nil, fmt.Errorf("no file exists at %s; this function works only with files that are distributed as part of the configuration source code, so if this file will be created by a resource in this configuration you must instead obtain this result from an attribute of that resource", path)
}
return nil, err
}
src, err := ioutil.ReadAll(f)
if err != nil {
return nil, fmt.Errorf("failed to read %s", path)
}