lang/funcs: Functions for encoding text in specific character encodings
This commit is contained in:
parent
7a31e56cb7
commit
877399c631
1
go.mod
1
go.mod
|
@ -129,6 +129,7 @@ require (
|
|||
golang.org/x/net v0.0.0-20200602114024-627f9648deb9
|
||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd
|
||||
golang.org/x/text v0.3.2
|
||||
golang.org/x/tools v0.0.0-20191203134012-c197fd4bf371
|
||||
google.golang.org/api v0.9.0
|
||||
google.golang.org/grpc v1.27.1
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
"github.com/zclconf/go-cty/cty/function"
|
||||
"golang.org/x/text/encoding/ianaindex"
|
||||
)
|
||||
|
||||
// Base64DecodeFunc constructs a function that decodes a string containing a base64 sequence.
|
||||
|
@ -50,6 +51,95 @@ var Base64EncodeFunc = function.New(&function.Spec{
|
|||
},
|
||||
})
|
||||
|
||||
// Base64TextDecodeFunc constructs a function that encodes a string to a target encoding and then to a base64 sequence.
|
||||
var Base64TextEncodeFunc = function.New(&function.Spec{
|
||||
Params: []function.Parameter{
|
||||
{
|
||||
Name: "string",
|
||||
Type: cty.String,
|
||||
},
|
||||
{
|
||||
Name: "encoding",
|
||||
Type: cty.String,
|
||||
},
|
||||
},
|
||||
Type: function.StaticReturnType(cty.String),
|
||||
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
|
||||
encoding, err := ianaindex.IANA.Encoding(args[1].AsString())
|
||||
if err != nil || encoding == nil {
|
||||
return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Terraform version", args[1].AsString())
|
||||
}
|
||||
|
||||
encName, err := ianaindex.IANA.Name(encoding)
|
||||
if err != nil { // would be weird, since we just read this encoding out
|
||||
encName = args[1].AsString()
|
||||
}
|
||||
|
||||
encoder := encoding.NewEncoder()
|
||||
encodedInput, err := encoder.Bytes([]byte(args[0].AsString()))
|
||||
if err != nil {
|
||||
// The string representations of "err" disclose implementation
|
||||
// details of the underlying library, and the main error we might
|
||||
// like to return a special message for is unexported as
|
||||
// golang.org/x/text/encoding/internal.RepertoireError, so this
|
||||
// is just a generic error message for now.
|
||||
//
|
||||
// We also don't include the string itself in the message because
|
||||
// it can typically be very large, contain newline characters,
|
||||
// etc.
|
||||
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains characters that cannot be represented in %s", encName)
|
||||
}
|
||||
|
||||
return cty.StringVal(base64.StdEncoding.EncodeToString(encodedInput)), nil
|
||||
},
|
||||
})
|
||||
|
||||
// Base64TextDecodeFunc constructs a function that decodes a base64 sequence to a target encoding.
|
||||
var Base64TextDecodeFunc = function.New(&function.Spec{
|
||||
Params: []function.Parameter{
|
||||
{
|
||||
Name: "source",
|
||||
Type: cty.String,
|
||||
},
|
||||
{
|
||||
Name: "encoding",
|
||||
Type: cty.String,
|
||||
},
|
||||
},
|
||||
Type: function.StaticReturnType(cty.String),
|
||||
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
|
||||
encoding, err := ianaindex.IANA.Encoding(args[1].AsString())
|
||||
if err != nil || encoding == nil {
|
||||
return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Terraform version", args[1].AsString())
|
||||
}
|
||||
|
||||
encName, err := ianaindex.IANA.Name(encoding)
|
||||
if err != nil { // would be weird, since we just read this encoding out
|
||||
encName = args[1].AsString()
|
||||
}
|
||||
|
||||
s := args[0].AsString()
|
||||
sDec, err := base64.StdEncoding.DecodeString(s)
|
||||
if err != nil {
|
||||
switch err := err.(type) {
|
||||
case base64.CorruptInputError:
|
||||
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given value is has an invalid base64 symbol at offset %d", int(err))
|
||||
default:
|
||||
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "invalid source string: %T", err)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
decoder := encoding.NewDecoder()
|
||||
decoded, err := decoder.Bytes(sDec)
|
||||
if err != nil || bytes.ContainsRune(decoded, '<27>') {
|
||||
return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains symbols that are not defined for %s", encName)
|
||||
}
|
||||
|
||||
return cty.StringVal(string(decoded)), nil
|
||||
},
|
||||
})
|
||||
|
||||
// Base64GzipFunc constructs a function that compresses a string with gzip and then encodes the result in
|
||||
// Base64 encoding.
|
||||
var Base64GzipFunc = function.New(&function.Spec{
|
||||
|
@ -138,3 +228,26 @@ func Base64Gzip(str cty.Value) (cty.Value, error) {
|
|||
func URLEncode(str cty.Value) (cty.Value, error) {
|
||||
return URLEncodeFunc.Call([]cty.Value{str})
|
||||
}
|
||||
|
||||
// Base64TextEncode applies Base64 encoding to a string that was encoded before with a target encoding.
|
||||
//
|
||||
// Terraform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
|
||||
//
|
||||
// First step is to apply the target IANA encoding (e.g. UTF-16LE).
|
||||
// Strings in the Terraform language are sequences of unicode characters rather
|
||||
// than bytes, so this function will first encode the characters from the string
|
||||
// as UTF-8, and then apply Base64 encoding to the result.
|
||||
func Base64TextEncode(str, enc cty.Value) (cty.Value, error) {
|
||||
return Base64TextEncodeFunc.Call([]cty.Value{str, enc})
|
||||
}
|
||||
|
||||
// Base64TextDecode decodes a string containing a base64 sequence whereas a specific encoding of the string is expected.
|
||||
//
|
||||
// Terraform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
|
||||
//
|
||||
// Strings in the Terraform language are sequences of unicode characters rather
|
||||
// than bytes, so this function will also interpret the resulting bytes as
|
||||
// the target encoding.
|
||||
func Base64TextDecode(str, enc cty.Value) (cty.Value, error) {
|
||||
return Base64TextDecodeFunc.Call([]cty.Value{str, enc})
|
||||
}
|
||||
|
|
|
@ -163,3 +163,157 @@ func TestURLEncode(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBase64TextEncode(t *testing.T) {
|
||||
tests := []struct {
|
||||
String cty.Value
|
||||
Encoding cty.Value
|
||||
Want cty.Value
|
||||
Err string
|
||||
}{
|
||||
{
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
cty.StringVal("UTF-8"),
|
||||
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
cty.StringVal("UTF-16LE"),
|
||||
cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
cty.StringVal("CP936"),
|
||||
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
cty.StringVal("NOT-EXISTS"),
|
||||
cty.UnknownVal(cty.String),
|
||||
`"NOT-EXISTS" is not a supported IANA encoding name or alias in this Terraform version`,
|
||||
},
|
||||
{
|
||||
cty.StringVal("🤔"),
|
||||
cty.StringVal("cp437"),
|
||||
cty.UnknownVal(cty.String),
|
||||
`the given string contains characters that cannot be represented in IBM437`,
|
||||
},
|
||||
{
|
||||
cty.UnknownVal(cty.String),
|
||||
cty.StringVal("windows-1250"),
|
||||
cty.UnknownVal(cty.String),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("hello world"),
|
||||
cty.UnknownVal(cty.String),
|
||||
cty.UnknownVal(cty.String),
|
||||
``,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("encodetextbase64(%#v, %#v)", test.String, test.Encoding), func(t *testing.T) {
|
||||
got, err := Base64TextEncode(test.String, test.Encoding)
|
||||
|
||||
if test.Err != "" {
|
||||
if err == nil {
|
||||
t.Fatal("succeeded; want error")
|
||||
}
|
||||
if got, want := err.Error(), test.Err; got != want {
|
||||
t.Fatalf("wrong error\ngot: %s\nwant: %s", got, want)
|
||||
}
|
||||
return
|
||||
} else if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !got.RawEquals(test.Want) {
|
||||
t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, test.Want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBase64TextDecode(t *testing.T) {
|
||||
tests := []struct {
|
||||
String cty.Value
|
||||
Encoding cty.Value
|
||||
Want cty.Value
|
||||
Err string
|
||||
}{
|
||||
{
|
||||
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
|
||||
cty.StringVal("UTF-8"),
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"),
|
||||
cty.StringVal("UTF-16LE"),
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("YWJjMTIzIT8kKiYoKSctPUB+"),
|
||||
cty.StringVal("CP936"),
|
||||
cty.StringVal("abc123!?$*&()'-=@~"),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("doesn't matter"),
|
||||
cty.StringVal("NOT-EXISTS"),
|
||||
cty.UnknownVal(cty.String),
|
||||
`"NOT-EXISTS" is not a supported IANA encoding name or alias in this Terraform version`,
|
||||
},
|
||||
{
|
||||
cty.StringVal("<invalid base64>"),
|
||||
cty.StringVal("cp437"),
|
||||
cty.UnknownVal(cty.String),
|
||||
`the given value is has an invalid base64 symbol at offset 0`,
|
||||
},
|
||||
{
|
||||
cty.StringVal("gQ=="), // this is 0x81, which is not defined in windows-1250
|
||||
cty.StringVal("windows-1250"),
|
||||
cty.StringVal("<22>"),
|
||||
`the given string contains symbols that are not defined for windows-1250`,
|
||||
},
|
||||
{
|
||||
cty.UnknownVal(cty.String),
|
||||
cty.StringVal("windows-1250"),
|
||||
cty.UnknownVal(cty.String),
|
||||
``,
|
||||
},
|
||||
{
|
||||
cty.StringVal("YQBiAGMAMQAyADMAIQA/ACQAKgAmACgAKQAnAC0APQBAAH4A"),
|
||||
cty.UnknownVal(cty.String),
|
||||
cty.UnknownVal(cty.String),
|
||||
``,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(fmt.Sprintf("decodetextbase64(%#v, %#v)", test.String, test.Encoding), func(t *testing.T) {
|
||||
got, err := Base64TextDecode(test.String, test.Encoding)
|
||||
|
||||
if test.Err != "" {
|
||||
if err == nil {
|
||||
t.Fatal("succeeded; want error")
|
||||
}
|
||||
if got, want := err.Error(), test.Err; got != want {
|
||||
t.Fatalf("wrong error\ngot: %s\nwant: %s", got, want)
|
||||
}
|
||||
return
|
||||
} else if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !got.RawEquals(test.Want) {
|
||||
t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, test.Want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,9 +54,11 @@ func (s *Scope) Functions() map[string]function.Function {
|
|||
"concat": stdlib.ConcatFunc,
|
||||
"contains": stdlib.ContainsFunc,
|
||||
"csvdecode": stdlib.CSVDecodeFunc,
|
||||
"decodetextbase64": funcs.Base64TextDecodeFunc,
|
||||
"dirname": funcs.DirnameFunc,
|
||||
"distinct": stdlib.DistinctFunc,
|
||||
"element": stdlib.ElementFunc,
|
||||
"encodetextbase64": funcs.Base64TextEncodeFunc,
|
||||
"chunklist": stdlib.ChunklistFunc,
|
||||
"file": funcs.MakeFileFunc(s.BaseDir, false),
|
||||
"fileexists": funcs.MakeFileExistsFunc(s.BaseDir),
|
||||
|
|
|
@ -282,6 +282,13 @@ func TestFunctions(t *testing.T) {
|
|||
},
|
||||
},
|
||||
|
||||
"decodetextbase64": {
|
||||
{
|
||||
`decodetextbase64("dABlAHMAdAA=", "UTF-16LE")`,
|
||||
cty.StringVal("test"),
|
||||
},
|
||||
},
|
||||
|
||||
"dirname": {
|
||||
{
|
||||
`dirname("testdata/hello.txt")`,
|
||||
|
@ -298,6 +305,13 @@ func TestFunctions(t *testing.T) {
|
|||
},
|
||||
},
|
||||
|
||||
"encodetextbase64": {
|
||||
{
|
||||
`encodetextbase64("test", "UTF-16LE")`,
|
||||
cty.StringVal("dABlAHMAdAA="),
|
||||
},
|
||||
},
|
||||
|
||||
"element": {
|
||||
{
|
||||
`element(["hello"], 0)`,
|
||||
|
|
|
@ -30,6 +30,10 @@ Base64 themselves, which avoids the need to encode or decode it directly in
|
|||
most cases. Various other functions with names containing "base64" can generate
|
||||
or manipulate Base64 data directly.
|
||||
|
||||
`base64decode` is, in effect, a shorthand for calling
|
||||
[`decodetextbase64`](./decodetextbase64.html) with the encoding name set to
|
||||
`UTF-8`.
|
||||
|
||||
## Examples
|
||||
|
||||
```
|
||||
|
@ -41,6 +45,8 @@ Hello World
|
|||
|
||||
* [`base64encode`](./base64encode.html) performs the opposite operation,
|
||||
encoding the UTF-8 bytes for a string as Base64.
|
||||
* [`decodetextbase64`](./decodetextbase64.html) is a more general function that
|
||||
supports character encodings other than UTF-8.
|
||||
* [`base64gzip`](./base64gzip.html) applies gzip compression to a string
|
||||
and returns the result with Base64 encoding.
|
||||
* [`filebase64`](./filebase64.html) reads a file from the local filesystem
|
||||
|
|
|
@ -31,6 +31,10 @@ sequences, and so resource types that accept or return binary data will use
|
|||
Base64 themselves, and so this function exists primarily to allow string
|
||||
data to be easily provided to resource types that expect Base64 bytes.
|
||||
|
||||
`base64encode` is, in effect, a shorthand for calling
|
||||
[`encodetextbase64`](./encodetextbase64.html) with the encoding name set to
|
||||
`UTF-8`.
|
||||
|
||||
## Examples
|
||||
|
||||
```
|
||||
|
@ -42,6 +46,8 @@ SGVsbG8gV29ybGQ=
|
|||
|
||||
* [`base64decode`](./base64decode.html) performs the opposite operation,
|
||||
decoding Base64 data and interpreting it as a UTF-8 string.
|
||||
* [`encodetextbase64`](./encodetextbase64.html) is a more general function that
|
||||
supports character encodings other than UTF-8.
|
||||
* [`base64gzip`](./base64gzip.html) applies gzip compression to a string
|
||||
and returns the result with Base64 encoding all in one operation.
|
||||
* [`filebase64`](./filebase64.html) reads a file from the local filesystem
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
layout: "functions"
|
||||
page_title: "encodetextbase64 - Functions - Configuration Language"
|
||||
sidebar_current: "docs-funcs-encoding-encodetextbase64"
|
||||
description: |-
|
||||
The encodetextbase64 function decodes a string containing a base64 sequence assuming that the target encoding was used.
|
||||
---
|
||||
|
||||
# `decodetextbase64` Function
|
||||
|
||||
-> **Note:** This function is supported only in Terraform v0.14 and later.
|
||||
|
||||
`decodetextbase64` function decodes a string that was previously Base64-encoded,
|
||||
and then interprets the result as characters in a specified character encoding.
|
||||
|
||||
Terraform uses the "standard" Base64 alphabet as defined in
|
||||
[RFC 4648 section 4](https://tools.ietf.org/html/rfc4648#section-4).
|
||||
|
||||
The `encoding_name` argument must contain one of the encoding names or aliases
|
||||
recorded in
|
||||
[the IANA character encoding registry](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
|
||||
Terraform supports only a subset of the registered encodings, and the encoding
|
||||
support may vary between Terraform versions.
|
||||
|
||||
Terraform accepts the encoding name `UTF-8`, which will produce the same result
|
||||
as [`base64decode`](./base64decode.html).
|
||||
|
||||
## Examples
|
||||
|
||||
```
|
||||
> decodetextbase64("SABlAGwAbABvACAAVwBvAHIAbABkAA==", "UTF-16LE")
|
||||
Hello World
|
||||
```
|
||||
|
||||
## Related Functions
|
||||
|
||||
* [`encodetextbase64`](./encodetextbase64.html) performs the opposite operation,
|
||||
applying target encoding and then Base64 to a string.
|
||||
* [`base64decode`](./base64decode.html) is effectively a shorthand for
|
||||
`decodetextbase64` where the character encoding is fixed as `UTF-8`.
|
|
@ -0,0 +1,50 @@
|
|||
---
|
||||
layout: "functions"
|
||||
page_title: "encodetextbase64 - Functions - Configuration Language"
|
||||
sidebar_current: "docs-funcs-encoding-encodetextbase64"
|
||||
description: |-
|
||||
The encodetextbase64 function applies Base64 encoding to a string that was encoded to target encoding before.
|
||||
---
|
||||
|
||||
# `encodetextbase64` Function
|
||||
|
||||
-> **Note:** This function is supported only in Terraform v0.14 and later.
|
||||
|
||||
`encodetextbase64` encodes the unicode characters in a given string using a
|
||||
specified character encoding, returning the result base64 encoded because
|
||||
Terraform language strings are always sequences of unicode characters.
|
||||
|
||||
```hcl
|
||||
substr(string, encoding_name)
|
||||
```
|
||||
|
||||
Terraform uses the "standard" Base64 alphabet as defined in
|
||||
[RFC 4648 section 4](https://tools.ietf.org/html/rfc4648#section-4).
|
||||
|
||||
The `encoding_name` argument must contain one of the encoding names or aliases
|
||||
recorded in
|
||||
[the IANA character encoding registry](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
|
||||
Terraform supports only a subset of the registered encodings, and the encoding
|
||||
support may vary between Terraform versions. In particular Terraform supports
|
||||
`UTF-16LE`, which is the native character encoding for the Windows API and
|
||||
therefore sometimes expected by Windows-originated software such as PowerShell.
|
||||
|
||||
Terraform also accepts the encoding name `UTF-8`, which will produce the same
|
||||
result as [`base64encode`](./base64encode.html).
|
||||
|
||||
## Examples
|
||||
|
||||
```
|
||||
> encodetextbase64("Hello World", "UTF-16LE")
|
||||
SABlAGwAbABvACAAVwBvAHIAbABkAA==
|
||||
```
|
||||
|
||||
## Related Functions
|
||||
|
||||
* [`decodetextbase64`](./decodetextbase64.html) performs the opposite operation,
|
||||
decoding Base64 data and interpreting it as a particular character encoding.
|
||||
* [`base64encode`](./base64encode.html) applies Base64 encoding of the UTF-8
|
||||
encoding of a string.
|
||||
* [`filebase64`](./filebase64.html) reads a file from the local filesystem
|
||||
and returns its raw bytes with Base64 encoding, without creating an
|
||||
intermediate Unicode string.
|
Loading…
Reference in New Issue