lang/funcs: Functions for encoding text in specific character encodings

This commit is contained in:
r0bnet 2020-07-03 14:01:40 +02:00 committed by Martin Atkins
parent 7a31e56cb7
commit 877399c631
9 changed files with 386 additions and 0 deletions

1
go.mod
View File

@ -129,6 +129,7 @@ require (
golang.org/x/net v0.0.0-20200602114024-627f9648deb9
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd
golang.org/x/text v0.3.2
golang.org/x/tools v0.0.0-20191203134012-c197fd4bf371
google.golang.org/api v0.9.0
google.golang.org/grpc v1.27.1

View File

@ -11,6 +11,7 @@ import (
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/function"
"golang.org/x/text/encoding/ianaindex"
)
// Base64DecodeFunc constructs a function that decodes a string containing a base64 sequence.
@ -50,6 +51,95 @@ var Base64EncodeFunc = function.New(&function.Spec{
},
})
// Base64TextDecodeFunc constructs a function that encodes a string to a target encoding and then to a base64 sequence.
var Base64TextEncodeFunc = function.New(&function.Spec{
Params: []function.Parameter{
{
Name: "string",
Type: cty.String,
},
{
Name: "encoding",
Type: cty.String,
},
},
Type: function.StaticReturnType(cty.String),
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
encoding, err := ianaindex.IANA.Encoding(args[1].AsString())
if err != nil || encoding == nil {
return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Terraform version", args[1].AsString())
}
encName, err := ianaindex.IANA.Name(encoding)
if err != nil { // would be weird, since we just read this encoding out
encName = args[1].AsString()
}
encoder := encoding.NewEncoder()
encodedInput, err := encoder.Bytes([]byte(args[0].AsString()))
if err != nil {
// The string representations of "err" disclose implementation
// details of the underlying library, and the main error we might
// like to return a special message for is unexported as
// golang.org/x/text/encoding/internal.RepertoireError, so this
// is just a generic error message for now.
//
// We also don't include the string itself in the message because
// it can typically be very large, contain newline characters,
// etc.
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains characters that cannot be represented in %s", encName)
}
return cty.StringVal(base64.StdEncoding.EncodeToString(encodedInput)), nil
},
})
// Base64TextDecodeFunc constructs a function that decodes a base64 sequence to a target encoding.
var Base64TextDecodeFunc = function.New(&function.Spec{
Params: []function.Parameter{
{
Name: "source",
Type: cty.String,
},
{
Name: "encoding",
Type: cty.String,
},
},
Type: function.StaticReturnType(cty.String),
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
encoding, err := ianaindex.IANA.Encoding(args[1].AsString())
if err != nil || encoding == nil {
return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Terraform version", args[1].AsString())
}
encName, err := ianaindex.IANA.Name(encoding)
if err != nil { // would be weird, since we just read this encoding out
encName = args[1].AsString()
}
s := args[0].AsString()
sDec, err := base64.StdEncoding.DecodeString(s)
if err != nil {
switch err := err.(type) {
case base64.CorruptInputError:
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given value is has an invalid base64 symbol at offset %d", int(err))
default:
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "invalid source string: %T", err)
}
}
decoder := encoding.NewDecoder()
decoded, err := decoder.Bytes(sDec)
if err != nil || bytes.ContainsRune(decoded, '<27>') {
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains symbols that are not defined for %s", encName)
}
return cty.StringVal(string(decoded)), nil
},
})
// Base64GzipFunc constructs a function that compresses a string with gzip and then encodes the result in
// Base64 encoding.
var Base64GzipFunc = function.New(&function.Spec{
@ -138,3 +228,26 @@ func Base64Gzip(str cty.Value) (cty.Value, error) {
func URLEncode(str cty.Value) (cty.Value, error) {
return URLEncodeFunc.Call([]cty.Value{str})
}
// Base64TextEncode applies Base64 encoding to a string that was encoded before with a target encoding.
//
// Terraform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
//
// First step is to apply the target IANA encoding (e.g. UTF-16LE).
// Strings in the Terraform language are sequences of unicode characters rather
// than bytes, so this function will first encode the characters from the string
// as UTF-8, and then apply Base64 encoding to the result.
func Base64TextEncode(str, enc cty.Value) (cty.Value, error) {
return Base64TextEncodeFunc.Call([]cty.Value{str, enc})
}
// Base64TextDecode decodes a string containing a base64 sequence whereas a specific encoding of the string is expected.
//
// Terraform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
//
// Strings in the Terraform language are sequences of unicode characters rather
// than bytes, so this function will also interpret the resulting bytes as
// the target encoding.
func Base64TextDecode(str, enc cty.Value) (cty.Value, error) {
return Base64TextDecodeFunc.Call([]cty.Value{str, enc})
}

View File

@ -163,3 +163,157 @@ func TestURLEncode(t *testing.T) {
})
}
}
func TestBase64TextEncode(t *testing.T) {
tests := []struct {
String cty.Value
Encoding cty.Value
Want cty.Value
Err string
}{
{
cty.StringVal("abc123!?$*&()'-=@~"),
cty.StringVal("UTF-8"),
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
``,
},
{
cty.StringVal("abc123!?$*&()'-=@~"),
cty.StringVal("UTF-16LE"),
cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"),
``,
},
{
cty.StringVal("abc123!?$*&()'-=@~"),
cty.StringVal("CP936"),
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
``,
},
{
cty.StringVal("abc123!?$*&()'-=@~"),
cty.StringVal("NOT-EXISTS"),
cty.UnknownVal(cty.String),
`"NOT-EXISTS" is not a supported IANA encoding name or alias in this Terraform version`,
},
{
cty.StringVal("🤔"),
cty.StringVal("cp437"),
cty.UnknownVal(cty.String),
`the given string contains characters that cannot be represented in IBM437`,
},
{
cty.UnknownVal(cty.String),
cty.StringVal("windows-1250"),
cty.UnknownVal(cty.String),
``,
},
{
cty.StringVal("hello world"),
cty.UnknownVal(cty.String),
cty.UnknownVal(cty.String),
``,
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("encodetextbase64(%#v, %#v)", test.String, test.Encoding), func(t *testing.T) {
got, err := Base64TextEncode(test.String, test.Encoding)
if test.Err != "" {
if err == nil {
t.Fatal("succeeded; want error")
}
if got, want := err.Error(), test.Err; got != want {
t.Fatalf("wrong error\ngot: %s\nwant: %s", got, want)
}
return
} else if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !got.RawEquals(test.Want) {
t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, test.Want)
}
})
}
}
func TestBase64TextDecode(t *testing.T) {
tests := []struct {
String cty.Value
Encoding cty.Value
Want cty.Value
Err string
}{
{
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
cty.StringVal("UTF-8"),
cty.StringVal("abc123!?$*&()'-=@~"),
``,
},
{
cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"),
cty.StringVal("UTF-16LE"),
cty.StringVal("abc123!?$*&()'-=@~"),
``,
},
{
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
cty.StringVal("CP936"),
cty.StringVal("abc123!?$*&()'-=@~"),
``,
},
{
cty.StringVal("doesn't matter"),
cty.StringVal("NOT-EXISTS"),
cty.UnknownVal(cty.String),
`"NOT-EXISTS" is not a supported IANA encoding name or alias in this Terraform version`,
},
{
cty.StringVal("<invalid base64>"),
cty.StringVal("cp437"),
cty.UnknownVal(cty.String),
`the given value is has an invalid base64 symbol at offset 0`,
},
{
cty.StringVal("gQ=="), // this is 0x81, which is not defined in windows-1250
cty.StringVal("windows-1250"),
cty.StringVal("<22>"),
`the given string contains symbols that are not defined for windows-1250`,
},
{
cty.UnknownVal(cty.String),
cty.StringVal("windows-1250"),
cty.UnknownVal(cty.String),
``,
},
{
cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"),
cty.UnknownVal(cty.String),
cty.UnknownVal(cty.String),
``,
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("decodetextbase64(%#v, %#v)", test.String, test.Encoding), func(t *testing.T) {
got, err := Base64TextDecode(test.String, test.Encoding)
if test.Err != "" {
if err == nil {
t.Fatal("succeeded; want error")
}
if got, want := err.Error(), test.Err; got != want {
t.Fatalf("wrong error\ngot: %s\nwant: %s", got, want)
}
return
} else if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !got.RawEquals(test.Want) {
t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, test.Want)
}
})
}
}

View File

@ -54,9 +54,11 @@ func (s *Scope) Functions() map[string]function.Function {
"concat": stdlib.ConcatFunc,
"contains": stdlib.ContainsFunc,
"csvdecode": stdlib.CSVDecodeFunc,
"decodetextbase64": funcs.Base64TextDecodeFunc,
"dirname": funcs.DirnameFunc,
"distinct": stdlib.DistinctFunc,
"element": stdlib.ElementFunc,
"encodetextbase64": funcs.Base64TextEncodeFunc,
"chunklist": stdlib.ChunklistFunc,
"file": funcs.MakeFileFunc(s.BaseDir, false),
"fileexists": funcs.MakeFileExistsFunc(s.BaseDir),

View File

@ -282,6 +282,13 @@ func TestFunctions(t *testing.T) {
},
},
"decodetextbase64": {
{
`decodetextbase64("dABlAHMAdAA=", "UTF-16LE")`,
cty.StringVal("test"),
},
},
"dirname": {
{
`dirname("testdata/hello.txt")`,
@ -298,6 +305,13 @@ func TestFunctions(t *testing.T) {
},
},
"encodetextbase64": {
{
`encodetextbase64("test", "UTF-16LE")`,
cty.StringVal("dABlAHMAdAA="),
},
},
"element": {
{
`element(["hello"], 0)`,

View File

@ -30,6 +30,10 @@ Base64 themselves, which avoids the need to encode or decode it directly in
most cases. Various other functions with names containing "base64" can generate
or manipulate Base64 data directly.
`base64decode` is, in effect, a shorthand for calling
[`decodetextbase64`](./decodetextbase64.html) with the encoding name set to
`UTF-8`.
## Examples
```
@ -41,6 +45,8 @@ Hello World
* [`base64encode`](./base64encode.html) performs the opposite operation,
encoding the UTF-8 bytes for a string as Base64.
* [`decodetextbase64`](./decodetextbase64.html) is a more general function that
supports character encodings other than UTF-8.
* [`base64gzip`](./base64gzip.html) applies gzip compression to a string
and returns the result with Base64 encoding.
* [`filebase64`](./filebase64.html) reads a file from the local filesystem

View File

@ -31,6 +31,10 @@ sequences, and so resource types that accept or return binary data will use
Base64 themselves, and so this function exists primarily to allow string
data to be easily provided to resource types that expect Base64 bytes.
`base64encode` is, in effect, a shorthand for calling
[`encodetextbase64`](./encodetextbase64.html) with the encoding name set to
`UTF-8`.
## Examples
```
@ -42,6 +46,8 @@ SGVsbG8gV29ybGQ=
* [`base64decode`](./base64decode.html) performs the opposite operation,
decoding Base64 data and interpreting it as a UTF-8 string.
* [`encodetextbase64`](./encodetextbase64.html) is a more general function that
supports character encodings other than UTF-8.
* [`base64gzip`](./base64gzip.html) applies gzip compression to a string
and returns the result with Base64 encoding all in one operation.
* [`filebase64`](./filebase64.html) reads a file from the local filesystem

View File

@ -0,0 +1,40 @@
---
layout: "functions"
page_title: "encodetextbase64 - Functions - Configuration Language"
sidebar_current: "docs-funcs-encoding-encodetextbase64"
description: |-
The encodetextbase64 function decodes a string containing a base64 sequence assuming that the target encoding was used.
---
# `decodetextbase64` Function
-> **Note:** This function is supported only in Terraform v0.14 and later.
`decodetextbase64` function decodes a string that was previously Base64-encoded,
and then interprets the result as characters in a specified character encoding.
Terraform uses the "standard" Base64 alphabet as defined in
[RFC 4648 section 4](https://tools.ietf.org/html/rfc4648#section-4).
The `encoding_name` argument must contain one of the encoding names or aliases
recorded in
[the IANA character encoding registry](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
Terraform supports only a subset of the registered encodings, and the encoding
support may vary between Terraform versions.
Terraform accepts the encoding name `UTF-8`, which will produce the same result
as [`base64decode`](./base64decode.html).
## Examples
```
> decodetextbase64("SABlAGwAbABvACAAVwBvAHIAbABkAA==", "UTF-16LE")
Hello World
```
## Related Functions
* [`encodetextbase64`](./encodetextbase64.html) performs the opposite operation,
applying target encoding and then Base64 to a string.
* [`base64decode`](./base64decode.html) is effectively a shorthand for
`decodetextbase64` where the character encoding is fixed as `UTF-8`.

View File

@ -0,0 +1,50 @@
---
layout: "functions"
page_title: "encodetextbase64 - Functions - Configuration Language"
sidebar_current: "docs-funcs-encoding-encodetextbase64"
description: |-
The encodetextbase64 function applies Base64 encoding to a string that was encoded to target encoding before.
---
# `encodetextbase64` Function
-> **Note:** This function is supported only in Terraform v0.14 and later.
`encodetextbase64` encodes the unicode characters in a given string using a
specified character encoding, returning the result base64 encoded because
Terraform language strings are always sequences of unicode characters.
```hcl
substr(string, encoding_name)
```
Terraform uses the "standard" Base64 alphabet as defined in
[RFC 4648 section 4](https://tools.ietf.org/html/rfc4648#section-4).
The `encoding_name` argument must contain one of the encoding names or aliases
recorded in
[the IANA character encoding registry](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
Terraform supports only a subset of the registered encodings, and the encoding
support may vary between Terraform versions. In particular Terraform supports
`UTF-16LE`, which is the native character encoding for the Windows API and
therefore sometimes expected by Windows-originated software such as PowerShell.
Terraform also accepts the encoding name `UTF-8`, which will produce the same
result as [`base64encode`](./base64encode.html).
## Examples
```
> encodetextbase64("Hello World", "UTF-16LE")
SABlAGwAbABvACAAVwBvAHIAbABkAA==
```
## Related Functions
* [`decodetextbase64`](./decodetextbase64.html) performs the opposite operation,
decoding Base64 data and interpreting it as a particular character encoding.
* [`base64encode`](./base64encode.html) applies Base64 encoding of the UTF-8
encoding of a string.
* [`filebase64`](./filebase64.html) reads a file from the local filesystem
and returns its raw bytes with Base64 encoding, without creating an
intermediate Unicode string.