diff --git a/go.mod b/go.mod index 96db292b5..2bcf8d7e3 100644 --- a/go.mod +++ b/go.mod @@ -129,6 +129,7 @@ require ( golang.org/x/net v0.0.0-20200602114024-627f9648deb9 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd + golang.org/x/text v0.3.2 golang.org/x/tools v0.0.0-20191203134012-c197fd4bf371 google.golang.org/api v0.9.0 google.golang.org/grpc v1.27.1 diff --git a/lang/funcs/encoding.go b/lang/funcs/encoding.go index d9a0bbb31..2a8e9f592 100644 --- a/lang/funcs/encoding.go +++ b/lang/funcs/encoding.go @@ -11,6 +11,7 @@ import ( "github.com/zclconf/go-cty/cty" "github.com/zclconf/go-cty/cty/function" + "golang.org/x/text/encoding/ianaindex" ) // Base64DecodeFunc constructs a function that decodes a string containing a base64 sequence. @@ -50,6 +51,95 @@ var Base64EncodeFunc = function.New(&function.Spec{ }, }) +// Base64TextDecodeFunc constructs a function that encodes a string to a target encoding and then to a base64 sequence. +var Base64TextEncodeFunc = function.New(&function.Spec{ + Params: []function.Parameter{ + { + Name: "string", + Type: cty.String, + }, + { + Name: "encoding", + Type: cty.String, + }, + }, + Type: function.StaticReturnType(cty.String), + Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) { + encoding, err := ianaindex.IANA.Encoding(args[1].AsString()) + if err != nil || encoding == nil { + return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Terraform version", args[1].AsString()) + } + + encName, err := ianaindex.IANA.Name(encoding) + if err != nil { // would be weird, since we just read this encoding out + encName = args[1].AsString() + } + + encoder := encoding.NewEncoder() + encodedInput, err := encoder.Bytes([]byte(args[0].AsString())) + if err != nil { + // The string representations of "err" disclose implementation + // details of the underlying library, and the main error we might + // like to return a special message for is unexported as + // golang.org/x/text/encoding/internal.RepertoireError, so this + // is just a generic error message for now. + // + // We also don't include the string itself in the message because + // it can typically be very large, contain newline characters, + // etc. + return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains characters that cannot be represented in %s", encName) + } + + return cty.StringVal(base64.StdEncoding.EncodeToString(encodedInput)), nil + }, +}) + +// Base64TextDecodeFunc constructs a function that decodes a base64 sequence to a target encoding. +var Base64TextDecodeFunc = function.New(&function.Spec{ + Params: []function.Parameter{ + { + Name: "source", + Type: cty.String, + }, + { + Name: "encoding", + Type: cty.String, + }, + }, + Type: function.StaticReturnType(cty.String), + Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) { + encoding, err := ianaindex.IANA.Encoding(args[1].AsString()) + if err != nil || encoding == nil { + return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Terraform version", args[1].AsString()) + } + + encName, err := ianaindex.IANA.Name(encoding) + if err != nil { // would be weird, since we just read this encoding out + encName = args[1].AsString() + } + + s := args[0].AsString() + sDec, err := base64.StdEncoding.DecodeString(s) + if err != nil { + switch err := err.(type) { + case base64.CorruptInputError: + return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given value is has an invalid base64 symbol at offset %d", int(err)) + default: + return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "invalid source string: %T", err) + } + + } + + decoder := encoding.NewDecoder() + decoded, err := decoder.Bytes(sDec) + if err != nil || bytes.ContainsRune(decoded, '�') { + return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains symbols that are not defined for %s", encName) + } + + return cty.StringVal(string(decoded)), nil + }, +}) + // Base64GzipFunc constructs a function that compresses a string with gzip and then encodes the result in // Base64 encoding. var Base64GzipFunc = function.New(&function.Spec{ @@ -138,3 +228,26 @@ func Base64Gzip(str cty.Value) (cty.Value, error) { func URLEncode(str cty.Value) (cty.Value, error) { return URLEncodeFunc.Call([]cty.Value{str}) } + +// Base64TextEncode applies Base64 encoding to a string that was encoded before with a target encoding. +// +// Terraform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4. +// +// First step is to apply the target IANA encoding (e.g. UTF-16LE). +// Strings in the Terraform language are sequences of unicode characters rather +// than bytes, so this function will first encode the characters from the string +// as UTF-8, and then apply Base64 encoding to the result. +func Base64TextEncode(str, enc cty.Value) (cty.Value, error) { + return Base64TextEncodeFunc.Call([]cty.Value{str, enc}) +} + +// Base64TextDecode decodes a string containing a base64 sequence whereas a specific encoding of the string is expected. +// +// Terraform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4. +// +// Strings in the Terraform language are sequences of unicode characters rather +// than bytes, so this function will also interpret the resulting bytes as +// the target encoding. +func Base64TextDecode(str, enc cty.Value) (cty.Value, error) { + return Base64TextDecodeFunc.Call([]cty.Value{str, enc}) +} diff --git a/lang/funcs/encoding_test.go b/lang/funcs/encoding_test.go index 1bff88544..1ab571bc5 100644 --- a/lang/funcs/encoding_test.go +++ b/lang/funcs/encoding_test.go @@ -163,3 +163,157 @@ func TestURLEncode(t *testing.T) { }) } } + +func TestBase64TextEncode(t *testing.T) { + tests := []struct { + String cty.Value + Encoding cty.Value + Want cty.Value + Err string + }{ + { + cty.StringVal("abc123!?$*&()'-=@~"), + cty.StringVal("UTF-8"), + cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"), + ``, + }, + { + cty.StringVal("abc123!?$*&()'-=@~"), + cty.StringVal("UTF-16LE"), + cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"), + ``, + }, + { + cty.StringVal("abc123!?$*&()'-=@~"), + cty.StringVal("CP936"), + cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"), + ``, + }, + { + cty.StringVal("abc123!?$*&()'-=@~"), + cty.StringVal("NOT-EXISTS"), + cty.UnknownVal(cty.String), + `"NOT-EXISTS" is not a supported IANA encoding name or alias in this Terraform version`, + }, + { + cty.StringVal("🤔"), + cty.StringVal("cp437"), + cty.UnknownVal(cty.String), + `the given string contains characters that cannot be represented in IBM437`, + }, + { + cty.UnknownVal(cty.String), + cty.StringVal("windows-1250"), + cty.UnknownVal(cty.String), + ``, + }, + { + cty.StringVal("hello world"), + cty.UnknownVal(cty.String), + cty.UnknownVal(cty.String), + ``, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("encodetextbase64(%#v, %#v)", test.String, test.Encoding), func(t *testing.T) { + got, err := Base64TextEncode(test.String, test.Encoding) + + if test.Err != "" { + if err == nil { + t.Fatal("succeeded; want error") + } + if got, want := err.Error(), test.Err; got != want { + t.Fatalf("wrong error\ngot: %s\nwant: %s", got, want) + } + return + } else if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + if !got.RawEquals(test.Want) { + t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, test.Want) + } + }) + } +} + +func TestBase64TextDecode(t *testing.T) { + tests := []struct { + String cty.Value + Encoding cty.Value + Want cty.Value + Err string + }{ + { + cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"), + cty.StringVal("UTF-8"), + cty.StringVal("abc123!?$*&()'-=@~"), + ``, + }, + { + cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"), + cty.StringVal("UTF-16LE"), + cty.StringVal("abc123!?$*&()'-=@~"), + ``, + }, + { + cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"), + cty.StringVal("CP936"), + cty.StringVal("abc123!?$*&()'-=@~"), + ``, + }, + { + cty.StringVal("doesn't matter"), + cty.StringVal("NOT-EXISTS"), + cty.UnknownVal(cty.String), + `"NOT-EXISTS" is not a supported IANA encoding name or alias in this Terraform version`, + }, + { + cty.StringVal(""), + cty.StringVal("cp437"), + cty.UnknownVal(cty.String), + `the given value is has an invalid base64 symbol at offset 0`, + }, + { + cty.StringVal("gQ=="), // this is 0x81, which is not defined in windows-1250 + cty.StringVal("windows-1250"), + cty.StringVal("�"), + `the given string contains symbols that are not defined for windows-1250`, + }, + { + cty.UnknownVal(cty.String), + cty.StringVal("windows-1250"), + cty.UnknownVal(cty.String), + ``, + }, + { + cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"), + cty.UnknownVal(cty.String), + cty.UnknownVal(cty.String), + ``, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("decodetextbase64(%#v, %#v)", test.String, test.Encoding), func(t *testing.T) { + got, err := Base64TextDecode(test.String, test.Encoding) + + if test.Err != "" { + if err == nil { + t.Fatal("succeeded; want error") + } + if got, want := err.Error(), test.Err; got != want { + t.Fatalf("wrong error\ngot: %s\nwant: %s", got, want) + } + return + } else if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + if !got.RawEquals(test.Want) { + t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, test.Want) + } + }) + } +} diff --git a/lang/functions.go b/lang/functions.go index 24698cda5..5aabbdfbf 100644 --- a/lang/functions.go +++ b/lang/functions.go @@ -54,9 +54,11 @@ func (s *Scope) Functions() map[string]function.Function { "concat": stdlib.ConcatFunc, "contains": stdlib.ContainsFunc, "csvdecode": stdlib.CSVDecodeFunc, + "decodetextbase64": funcs.Base64TextDecodeFunc, "dirname": funcs.DirnameFunc, "distinct": stdlib.DistinctFunc, "element": stdlib.ElementFunc, + "encodetextbase64": funcs.Base64TextEncodeFunc, "chunklist": stdlib.ChunklistFunc, "file": funcs.MakeFileFunc(s.BaseDir, false), "fileexists": funcs.MakeFileExistsFunc(s.BaseDir), diff --git a/lang/functions_test.go b/lang/functions_test.go index 24667d61f..848318ff4 100644 --- a/lang/functions_test.go +++ b/lang/functions_test.go @@ -282,6 +282,13 @@ func TestFunctions(t *testing.T) { }, }, + "decodetextbase64": { + { + `decodetextbase64("dABlAHMAdAA=", "UTF-16LE")`, + cty.StringVal("test"), + }, + }, + "dirname": { { `dirname("testdata/hello.txt")`, @@ -298,6 +305,13 @@ func TestFunctions(t *testing.T) { }, }, + "encodetextbase64": { + { + `encodetextbase64("test", "UTF-16LE")`, + cty.StringVal("dABlAHMAdAA="), + }, + }, + "element": { { `element(["hello"], 0)`, diff --git a/website/docs/configuration/functions/base64decode.html.md b/website/docs/configuration/functions/base64decode.html.md index fc15f2a46..ce0d25ace 100644 --- a/website/docs/configuration/functions/base64decode.html.md +++ b/website/docs/configuration/functions/base64decode.html.md @@ -30,6 +30,10 @@ Base64 themselves, which avoids the need to encode or decode it directly in most cases. Various other functions with names containing "base64" can generate or manipulate Base64 data directly. +`base64decode` is, in effect, a shorthand for calling +[`decodetextbase64`](./decodetextbase64.html) with the encoding name set to +`UTF-8`. + ## Examples ``` @@ -41,6 +45,8 @@ Hello World * [`base64encode`](./base64encode.html) performs the opposite operation, encoding the UTF-8 bytes for a string as Base64. +* [`decodetextbase64`](./decodetextbase64.html) is a more general function that + supports character encodings other than UTF-8. * [`base64gzip`](./base64gzip.html) applies gzip compression to a string and returns the result with Base64 encoding. * [`filebase64`](./filebase64.html) reads a file from the local filesystem diff --git a/website/docs/configuration/functions/base64encode.html.md b/website/docs/configuration/functions/base64encode.html.md index 076423272..9737f4da7 100644 --- a/website/docs/configuration/functions/base64encode.html.md +++ b/website/docs/configuration/functions/base64encode.html.md @@ -31,6 +31,10 @@ sequences, and so resource types that accept or return binary data will use Base64 themselves, and so this function exists primarily to allow string data to be easily provided to resource types that expect Base64 bytes. +`base64encode` is, in effect, a shorthand for calling +[`encodetextbase64`](./encodetextbase64.html) with the encoding name set to +`UTF-8`. + ## Examples ``` @@ -42,6 +46,8 @@ SGVsbG8gV29ybGQ= * [`base64decode`](./base64decode.html) performs the opposite operation, decoding Base64 data and interpreting it as a UTF-8 string. +* [`encodetextbase64`](./encodetextbase64.html) is a more general function that + supports character encodings other than UTF-8. * [`base64gzip`](./base64gzip.html) applies gzip compression to a string and returns the result with Base64 encoding all in one operation. * [`filebase64`](./filebase64.html) reads a file from the local filesystem diff --git a/website/docs/configuration/functions/decodetextbase64.html.md b/website/docs/configuration/functions/decodetextbase64.html.md new file mode 100644 index 000000000..18a1949fd --- /dev/null +++ b/website/docs/configuration/functions/decodetextbase64.html.md @@ -0,0 +1,40 @@ +--- +layout: "functions" +page_title: "encodetextbase64 - Functions - Configuration Language" +sidebar_current: "docs-funcs-encoding-encodetextbase64" +description: |- + The encodetextbase64 function decodes a string containing a base64 sequence assuming that the target encoding was used. +--- + +# `decodetextbase64` Function + +-> **Note:** This function is supported only in Terraform v0.14 and later. + +`decodetextbase64` function decodes a string that was previously Base64-encoded, +and then interprets the result as characters in a specified character encoding. + +Terraform uses the "standard" Base64 alphabet as defined in +[RFC 4648 section 4](https://tools.ietf.org/html/rfc4648#section-4). + +The `encoding_name` argument must contain one of the encoding names or aliases +recorded in +[the IANA character encoding registry](https://www.iana.org/assignments/character-sets/character-sets.xhtml). +Terraform supports only a subset of the registered encodings, and the encoding +support may vary between Terraform versions. + +Terraform accepts the encoding name `UTF-8`, which will produce the same result +as [`base64decode`](./base64decode.html). + +## Examples + +``` +> decodetextbase64("SABlAGwAbABvACAAVwBvAHIAbABkAA==", "UTF-16LE") +Hello World +``` + +## Related Functions + +* [`encodetextbase64`](./encodetextbase64.html) performs the opposite operation, + applying target encoding and then Base64 to a string. +* [`base64decode`](./base64decode.html) is effectively a shorthand for + `decodetextbase64` where the character encoding is fixed as `UTF-8`. diff --git a/website/docs/configuration/functions/encodetextbase64.html.md b/website/docs/configuration/functions/encodetextbase64.html.md new file mode 100644 index 000000000..98e8c6995 --- /dev/null +++ b/website/docs/configuration/functions/encodetextbase64.html.md @@ -0,0 +1,50 @@ +--- +layout: "functions" +page_title: "encodetextbase64 - Functions - Configuration Language" +sidebar_current: "docs-funcs-encoding-encodetextbase64" +description: |- + The encodetextbase64 function applies Base64 encoding to a string that was encoded to target encoding before. +--- + +# `encodetextbase64` Function + +-> **Note:** This function is supported only in Terraform v0.14 and later. + +`encodetextbase64` encodes the unicode characters in a given string using a +specified character encoding, returning the result base64 encoded because +Terraform language strings are always sequences of unicode characters. + +```hcl +substr(string, encoding_name) +``` + +Terraform uses the "standard" Base64 alphabet as defined in +[RFC 4648 section 4](https://tools.ietf.org/html/rfc4648#section-4). + +The `encoding_name` argument must contain one of the encoding names or aliases +recorded in +[the IANA character encoding registry](https://www.iana.org/assignments/character-sets/character-sets.xhtml). +Terraform supports only a subset of the registered encodings, and the encoding +support may vary between Terraform versions. In particular Terraform supports +`UTF-16LE`, which is the native character encoding for the Windows API and +therefore sometimes expected by Windows-originated software such as PowerShell. + +Terraform also accepts the encoding name `UTF-8`, which will produce the same +result as [`base64encode`](./base64encode.html). + +## Examples + +``` +> encodetextbase64("Hello World", "UTF-16LE") +SABlAGwAbABvACAAVwBvAHIAbABkAA== +``` + +## Related Functions + +* [`decodetextbase64`](./decodetextbase64.html) performs the opposite operation, + decoding Base64 data and interpreting it as a particular character encoding. +* [`base64encode`](./base64encode.html) applies Base64 encoding of the UTF-8 + encoding of a string. +* [`filebase64`](./filebase64.html) reads a file from the local filesystem + and returns its raw bytes with Base64 encoding, without creating an + intermediate Unicode string.