vendor: go get github.com/hashicorp/hcl/v2@v2.4.0
This commit is contained in:
parent
65f9de04af
commit
9128ce611a
2
go.mod
2
go.mod
|
@ -69,7 +69,7 @@ require (
|
|||
github.com/hashicorp/go-uuid v1.0.1
|
||||
github.com/hashicorp/go-version v1.2.0
|
||||
github.com/hashicorp/hcl v0.0.0-20170504190234-a4b07c25de5f
|
||||
github.com/hashicorp/hcl/v2 v2.3.0
|
||||
github.com/hashicorp/hcl/v2 v2.4.0
|
||||
github.com/hashicorp/hil v0.0.0-20190212112733-ab17b08d6590
|
||||
github.com/hashicorp/memberlist v0.1.0 // indirect
|
||||
github.com/hashicorp/serf v0.0.0-20160124182025-e4ec8cc423bb // indirect
|
||||
|
|
2
go.sum
2
go.sum
|
@ -246,6 +246,8 @@ github.com/hashicorp/hcl v0.0.0-20170504190234-a4b07c25de5f/go.mod h1:oZtUIOe8dh
|
|||
github.com/hashicorp/hcl/v2 v2.0.0/go.mod h1:oVVDG71tEinNGYCxinCYadcmKU9bglqW9pV3txagJ90=
|
||||
github.com/hashicorp/hcl/v2 v2.3.0 h1:iRly8YaMwTBAKhn1Ybk7VSdzbnopghktCD031P8ggUE=
|
||||
github.com/hashicorp/hcl/v2 v2.3.0/go.mod h1:d+FwDBbOLvpAM3Z6J7gPj/VoAGkNe/gm352ZhjJ/Zv8=
|
||||
github.com/hashicorp/hcl/v2 v2.4.0 h1:xwVa1aj4nCSoAjUnFPBAIfqlzPgSZEVMdkJv/mgj4jY=
|
||||
github.com/hashicorp/hcl/v2 v2.4.0/go.mod h1:bQTN5mpo+jewjJgh8jr0JUguIi7qPHUF6yIfAEN3jqY=
|
||||
github.com/hashicorp/hil v0.0.0-20190212112733-ab17b08d6590 h1:2yzhWGdgQUWZUCNK+AoO35V+HTsgEmcM4J9IkArh7PI=
|
||||
github.com/hashicorp/hil v0.0.0-20190212112733-ab17b08d6590/go.mod h1:n2TSygSNwsLJ76m8qFXTSc7beTb+auJxYdqrnoqwZWE=
|
||||
github.com/hashicorp/memberlist v0.1.0 h1:qSsCiC0WYD39lbSitKNt40e30uorm2Ss/d4JGU1hzH8=
|
||||
|
|
|
@ -1,95 +0,0 @@
|
|||
Copyright (c) 2017 Martin Atkins
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---------
|
||||
|
||||
Unicode table generation programs are under a separate copyright and license:
|
||||
|
||||
Copyright (c) 2014 Couchbase, Inc.
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
except in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
either express or implied. See the License for the specific language governing permissions
|
||||
and limitations under the License.
|
||||
|
||||
---------
|
||||
|
||||
Grapheme break data is provided as part of the Unicode character database,
|
||||
copright 2016 Unicode, Inc, which is provided with the following license:
|
||||
|
||||
Unicode Data Files include all data files under the directories
|
||||
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
||||
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
||||
http://www.unicode.org/utility/trac/browser/.
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the
|
||||
directory http://www.unicode.org/Public/.
|
||||
|
||||
Software includes any source code published in the Unicode Standard
|
||||
or under the directories
|
||||
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
||||
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
||||
http://www.unicode.org/utility/trac/browser/.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
|
@ -1,30 +0,0 @@
|
|||
package textseg
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
|
||||
// all of the recognized tokens in the given buffer.
|
||||
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||
scanner.Split(splitFunc)
|
||||
var ret [][]byte
|
||||
for scanner.Scan() {
|
||||
ret = append(ret, scanner.Bytes())
|
||||
}
|
||||
return ret, scanner.Err()
|
||||
}
|
||||
|
||||
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
|
||||
// recognized tokens in the given buffer.
|
||||
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||
scanner.Split(splitFunc)
|
||||
var ret int
|
||||
for scanner.Scan() {
|
||||
ret++
|
||||
}
|
||||
return ret, scanner.Err()
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
package textseg
|
||||
|
||||
//go:generate go run make_tables.go -output tables.go
|
||||
//go:generate go run make_test_tables.go -output tables_test.go
|
||||
//go:generate ruby unicode2ragel.rb --url=http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,E_Base,E_Modifier,ZWJ,Glue_After_Zwj,E_Base_GAZ" -o grapheme_clusters_table.rl
|
||||
//go:generate ragel -Z grapheme_clusters.rl
|
||||
//go:generate gofmt -w grapheme_clusters.go
|
File diff suppressed because it is too large
Load Diff
|
@ -1,132 +0,0 @@
|
|||
package textseg
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Generated from grapheme_clusters.rl. DO NOT EDIT
|
||||
%%{
|
||||
# (except you are actually in grapheme_clusters.rl here, so edit away!)
|
||||
|
||||
machine graphclust;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
var Error = errors.New("invalid UTF8 text")
|
||||
|
||||
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
|
||||
// on grapheme cluster boundaries.
|
||||
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Ragel state
|
||||
cs := 0 // Current State
|
||||
p := 0 // "Pointer" into data
|
||||
pe := len(data) // End-of-data "pointer"
|
||||
ts := 0
|
||||
te := 0
|
||||
act := 0
|
||||
eof := pe
|
||||
|
||||
// Make Go compiler happy
|
||||
_ = ts
|
||||
_ = te
|
||||
_ = act
|
||||
_ = eof
|
||||
|
||||
startPos := 0
|
||||
endPos := 0
|
||||
|
||||
%%{
|
||||
include GraphemeCluster "grapheme_clusters_table.rl";
|
||||
|
||||
action start {
|
||||
startPos = p
|
||||
}
|
||||
|
||||
action end {
|
||||
endPos = p
|
||||
}
|
||||
|
||||
action emit {
|
||||
return endPos+1, data[startPos:endPos+1], nil
|
||||
}
|
||||
|
||||
ZWJGlue = ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?)?;
|
||||
AnyExtender = Extend | ZWJGlue | SpacingMark;
|
||||
Extension = AnyExtender*;
|
||||
ReplacementChar = (0xEF 0xBF 0xBD);
|
||||
|
||||
CRLFSeq = CR LF;
|
||||
ControlSeq = Control | ReplacementChar;
|
||||
HangulSeq = (
|
||||
L+ (((LV? V+ | LVT) T*)?|LV?) |
|
||||
LV V* T* |
|
||||
V+ T* |
|
||||
LVT T* |
|
||||
T+
|
||||
) Extension;
|
||||
EmojiSeq = (E_Base | E_Base_GAZ) Extend* E_Modifier? Extension;
|
||||
ZWJSeq = ZWJGlue Extension;
|
||||
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
|
||||
|
||||
UTF8Cont = 0x80 .. 0xBF;
|
||||
AnyUTF8 = (
|
||||
0x00..0x7F |
|
||||
0xC0..0xDF . UTF8Cont |
|
||||
0xE0..0xEF . UTF8Cont . UTF8Cont |
|
||||
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
|
||||
);
|
||||
|
||||
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
|
||||
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|E_Base|E_Base_GAZ|ZWJ|Regional_Indicator|Prepend)) Extension;
|
||||
|
||||
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
|
||||
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
|
||||
|
||||
CRLFTok = CRLFSeq >start @end;
|
||||
ControlTok = ControlSeq >start @end;
|
||||
HangulTok = HangulSeq >start @end;
|
||||
EmojiTok = EmojiSeq >start @end;
|
||||
ZWJTok = ZWJSeq >start @end;
|
||||
EmojiFlagTok = EmojiFlagSeq >start @end;
|
||||
OtherTok = OtherSeq >start @end;
|
||||
PrependTok = PrependSeq >start @end;
|
||||
|
||||
main := |*
|
||||
CRLFTok => emit;
|
||||
ControlTok => emit;
|
||||
HangulTok => emit;
|
||||
EmojiTok => emit;
|
||||
ZWJTok => emit;
|
||||
EmojiFlagTok => emit;
|
||||
PrependTok => emit;
|
||||
OtherTok => emit;
|
||||
|
||||
# any single valid UTF-8 character would also be valid per spec,
|
||||
# but we'll handle that separately after the loop so we can deal
|
||||
# with requesting more bytes if we're not at EOF.
|
||||
*|;
|
||||
|
||||
write init;
|
||||
write exec;
|
||||
}%%
|
||||
|
||||
// If we fall out here then we were unable to complete a sequence.
|
||||
// If we weren't able to complete a sequence then either we've
|
||||
// reached the end of a partial buffer (so there's more data to come)
|
||||
// or we have an isolated symbol that would normally be part of a
|
||||
// grapheme cluster but has appeared in isolation here.
|
||||
|
||||
if !atEOF {
|
||||
// Request more
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Just take the first UTF-8 sequence and return that.
|
||||
_, seqLen := utf8.DecodeRune(data)
|
||||
return seqLen, data[:seqLen], nil
|
||||
}
|
1583
vendor/github.com/apparentlymart/go-textseg/textseg/grapheme_clusters_table.rl
generated
vendored
1583
vendor/github.com/apparentlymart/go-textseg/textseg/grapheme_clusters_table.rl
generated
vendored
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,335 +0,0 @@
|
|||
#!/usr/bin/env ruby
|
||||
#
|
||||
# This scripted has been updated to accept more command-line arguments:
|
||||
#
|
||||
# -u, --url URL to process
|
||||
# -m, --machine Machine name
|
||||
# -p, --properties Properties to add to the machine
|
||||
# -o, --output Write output to file
|
||||
#
|
||||
# Updated by: Marty Schoch <marty.schoch@gmail.com>
|
||||
#
|
||||
# This script uses the unicode spec to generate a Ragel state machine
|
||||
# that recognizes unicode alphanumeric characters. It generates 5
|
||||
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
||||
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
||||
#
|
||||
# Usage: unicode2ragel.rb [options]
|
||||
# -e, --encoding [ucs4 | utf8] Data encoding
|
||||
# -h, --help Show this message
|
||||
#
|
||||
# This script was originally written as part of the Ferret search
|
||||
# engine library.
|
||||
#
|
||||
# Author: Rakan El-Khalil <rakan@well.com>
|
||||
|
||||
require 'optparse'
|
||||
require 'open-uri'
|
||||
|
||||
ENCODINGS = [ :utf8, :ucs4 ]
|
||||
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
|
||||
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
||||
DEFAULT_MACHINE_NAME= "WChar"
|
||||
|
||||
###
|
||||
# Display vars & default option
|
||||
|
||||
TOTAL_WIDTH = 80
|
||||
RANGE_WIDTH = 23
|
||||
@encoding = :utf8
|
||||
@chart_url = DEFAULT_CHART_URL
|
||||
machine_name = DEFAULT_MACHINE_NAME
|
||||
properties = []
|
||||
@output = $stdout
|
||||
|
||||
###
|
||||
# Option parsing
|
||||
|
||||
cli_opts = OptionParser.new do |opts|
|
||||
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
||||
@encoding = o.downcase.to_sym
|
||||
end
|
||||
opts.on("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
opts.on("-u", "--url URL", "URL to process") do |o|
|
||||
@chart_url = o
|
||||
end
|
||||
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
|
||||
machine_name = o
|
||||
end
|
||||
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
|
||||
properties = o
|
||||
end
|
||||
opts.on("-o", "--output FILE", "output file") do |o|
|
||||
@output = File.new(o, "w+")
|
||||
end
|
||||
end
|
||||
|
||||
cli_opts.parse(ARGV)
|
||||
unless ENCODINGS.member? @encoding
|
||||
puts "Invalid encoding: #{@encoding}"
|
||||
puts cli_opts
|
||||
exit
|
||||
end
|
||||
|
||||
##
|
||||
# Downloads the document at url and yields every alpha line's hex
|
||||
# range and description.
|
||||
|
||||
def each_alpha( url, property )
|
||||
open( url ) do |file|
|
||||
file.each_line do |line|
|
||||
next if line =~ /^#/;
|
||||
next if line !~ /; #{property} #/;
|
||||
|
||||
range, description = line.split(/;/)
|
||||
range.strip!
|
||||
description.gsub!(/.*#/, '').strip!
|
||||
|
||||
if range =~ /\.\./
|
||||
start, stop = range.split '..'
|
||||
else start = stop = range
|
||||
end
|
||||
|
||||
yield start.hex .. stop.hex, description
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Formats to hex at minimum width
|
||||
|
||||
def to_hex( n )
|
||||
r = "%0X" % n
|
||||
r = "0#{r}" unless (r.length % 2).zero?
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
||||
|
||||
def to_ucs4( range )
|
||||
rangestr = "0x" + to_hex(range.begin)
|
||||
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
||||
[ rangestr ]
|
||||
end
|
||||
|
||||
##
|
||||
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
||||
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
||||
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
||||
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
||||
|
||||
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
||||
|
||||
def to_utf8_enc( n )
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0x7ff
|
||||
y = 0xc0 | (n >> 6)
|
||||
z = 0x80 | (n & 0x3f)
|
||||
r = y << 8 | z
|
||||
elsif n <= 0xffff
|
||||
x = 0xe0 | (n >> 12)
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = x << 16 | y << 8 | z
|
||||
elsif n <= 0x10ffff
|
||||
w = 0xf0 | (n >> 18)
|
||||
x = 0x80 | (n >> 12) & 0x3f
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = w << 24 | x << 16 | y << 8 | z
|
||||
end
|
||||
|
||||
to_hex(r)
|
||||
end
|
||||
|
||||
def from_utf8_enc( n )
|
||||
n = n.hex
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0xdfff
|
||||
y = (n >> 8) & 0x1f
|
||||
z = n & 0x3f
|
||||
r = y << 6 | z
|
||||
elsif n <= 0xefffff
|
||||
x = (n >> 16) & 0x0f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = x << 10 | y << 6 | z
|
||||
elsif n <= 0xf7ffffff
|
||||
w = (n >> 24) & 0x07
|
||||
x = (n >> 16) & 0x3f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = w << 18 | x << 12 | y << 6 | z
|
||||
end
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# Given a range, splits it up into ranges that can be continuously
|
||||
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
||||
# This is not strictly needed since the current [5.1] unicode standard
|
||||
# doesn't have ranges that straddle utf8 boundaries. This is included
|
||||
# for completeness as there is no telling if that will ever change.
|
||||
|
||||
def utf8_ranges( range )
|
||||
ranges = []
|
||||
UTF8_BOUNDARIES.each do |max|
|
||||
if range.begin <= max
|
||||
if range.end <= max
|
||||
ranges << range
|
||||
return ranges
|
||||
end
|
||||
|
||||
ranges << (range.begin .. max)
|
||||
range = (max + 1) .. range.end
|
||||
end
|
||||
end
|
||||
ranges
|
||||
end
|
||||
|
||||
def build_range( start, stop )
|
||||
size = start.size/2
|
||||
left = size - 1
|
||||
return [""] if size < 1
|
||||
|
||||
a = start[0..1]
|
||||
b = stop[0..1]
|
||||
|
||||
###
|
||||
# Shared prefix
|
||||
|
||||
if a == b
|
||||
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
||||
"0x#{a} " + elt
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Unshared prefix, end of run
|
||||
|
||||
return ["0x#{a}..0x#{b} "] if left.zero?
|
||||
|
||||
###
|
||||
# Unshared prefix, not end of run
|
||||
# Range can be 0x123456..0x56789A
|
||||
# Which is equivalent to:
|
||||
# 0x123456 .. 0x12FFFF
|
||||
# 0x130000 .. 0x55FFFF
|
||||
# 0x560000 .. 0x56789A
|
||||
|
||||
ret = []
|
||||
ret << build_range(start, a + "FF" * left)
|
||||
|
||||
###
|
||||
# Only generate middle range if need be.
|
||||
|
||||
if a.hex+1 != b.hex
|
||||
max = to_hex(b.hex - 1)
|
||||
max = "FF" if b == "FF"
|
||||
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
||||
end
|
||||
|
||||
###
|
||||
# Don't generate last range if it is covered by first range
|
||||
|
||||
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
||||
ret.flatten!
|
||||
end
|
||||
|
||||
def to_utf8( range )
|
||||
utf8_ranges( range ).map do |r|
|
||||
begin_enc = to_utf8_enc(r.begin)
|
||||
end_enc = to_utf8_enc(r.end)
|
||||
build_range begin_enc, end_enc
|
||||
end.flatten!
|
||||
end
|
||||
|
||||
##
|
||||
# Perform a 3-way comparison of the number of codepoints advertised by
|
||||
# the unicode spec for the given range, the originally parsed range,
|
||||
# and the resulting utf8 encoded range.
|
||||
|
||||
def count_codepoints( code )
|
||||
code.split(' ').inject(1) do |acc, elt|
|
||||
if elt =~ /0x(.+)\.\.0x(.+)/
|
||||
if @encoding == :utf8
|
||||
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
||||
else
|
||||
acc * ($2.hex - $1.hex + 1)
|
||||
end
|
||||
else
|
||||
acc
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_valid?( range, desc, codes )
|
||||
spec_count = 1
|
||||
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
||||
range_count = range.end - range.begin + 1
|
||||
|
||||
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
||||
sum == spec_count and sum == range_count
|
||||
end
|
||||
|
||||
##
|
||||
# Generate the state maching to stdout
|
||||
|
||||
def generate_machine( name, property )
|
||||
pipe = " "
|
||||
@output.puts " #{name} = "
|
||||
each_alpha( @chart_url, property ) do |range, desc|
|
||||
|
||||
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
||||
|
||||
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
||||
# is_valid? range, desc, codes
|
||||
|
||||
range_width = codes.map { |a| a.size }.max
|
||||
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
||||
|
||||
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
||||
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
||||
|
||||
if desc.size > desc_width
|
||||
desc = desc[0..desc_width - 4] + "..."
|
||||
end
|
||||
|
||||
codes.each_with_index do |r, idx|
|
||||
desc = "" unless idx.zero?
|
||||
code = "%-#{range_width}s" % r
|
||||
@output.puts " #{pipe} #{code} ##{desc}"
|
||||
pipe = "|"
|
||||
end
|
||||
end
|
||||
@output.puts " ;"
|
||||
@output.puts ""
|
||||
end
|
||||
|
||||
@output.puts <<EOF
|
||||
# The following Ragel file was autogenerated with #{$0}
|
||||
# from: #{@chart_url}
|
||||
#
|
||||
# It defines #{properties}.
|
||||
#
|
||||
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
||||
# and that your input is in #{@encoding}.
|
||||
|
||||
%%{
|
||||
machine #{machine_name};
|
||||
|
||||
EOF
|
||||
|
||||
properties.each { |x| generate_machine( x, x ) }
|
||||
|
||||
@output.puts <<EOF
|
||||
}%%
|
||||
EOF
|
|
@ -1,19 +0,0 @@
|
|||
package textseg
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
|
||||
// on UTF8 sequence boundaries.
|
||||
//
|
||||
// This is included largely for completeness, since this behavior is already
|
||||
// built in to Go when ranging over a string.
|
||||
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
r, seqLen := utf8.DecodeRune(data)
|
||||
if r == utf8.RuneError && !atEOF {
|
||||
return 0, nil, nil
|
||||
}
|
||||
return seqLen, data[:seqLen], nil
|
||||
}
|
|
@ -1,5 +1,15 @@
|
|||
# HCL Changelog
|
||||
|
||||
## v2.4.0 (Apr 13, 2020)
|
||||
|
||||
### Enhancements
|
||||
|
||||
* The Unicode data tables that HCL uses to produce user-perceived "column" positions in diagnostics and other source ranges are now updated to Unicode 12.0.0, which will cause HCL to produce more accurate column numbers for combining characters introduced to Unicode since Unicode 9.0.0.
|
||||
|
||||
### Bugs Fixed
|
||||
|
||||
* json: Fix panic when parsing malformed JSON. ([#358](https://github.com/hashicorp/hcl/pull/358))
|
||||
|
||||
## v2.3.0 (Jan 3, 2020)
|
||||
|
||||
### Enhancements
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
module github.com/hashicorp/hcl/v2
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/agext/levenshtein v1.2.1
|
||||
github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3
|
||||
github.com/apparentlymart/go-textseg v1.0.0
|
||||
github.com/apparentlymart/go-textseg/v12 v12.0.0
|
||||
github.com/davecgh/go-spew v1.1.1
|
||||
github.com/go-test/deep v1.0.3
|
||||
github.com/google/go-cmp v0.3.1
|
||||
|
|
|
@ -4,6 +4,8 @@ github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3 h1:ZSTrOEhi
|
|||
github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3/go.mod h1:oL81AME2rN47vu18xqj1S1jPIPuN7afo62yKTNn3XMM=
|
||||
github.com/apparentlymart/go-textseg v1.0.0 h1:rRmlIsPEEhUTIKQb7T++Nz/A5Q6C9IuX2wFoYVvnCs0=
|
||||
github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
|
||||
github.com/apparentlymart/go-textseg/v12 v12.0.0 h1:bNEQyAGak9tojivJNkoqWErVCQbjdL7GzRt3F8NvfJ0=
|
||||
github.com/apparentlymart/go-textseg/v12 v12.0.0/go.mod h1:S/4uRK2UtaQttw1GenVJEynmyUenKwP++x/+DdGV/Ec=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
// attr (the default) indicates that the value is to be populated from an attribute
|
||||
// block indicates that the value is to populated from a block
|
||||
// label indicates that the value is to populated from a block label
|
||||
// optional is the same as attr, but the field is optional
|
||||
// remain indicates that the value is to be populated from the remaining body after populating other fields
|
||||
//
|
||||
// "attr" fields may either be of type *hcl.Expression, in which case the raw
|
||||
|
@ -34,6 +35,9 @@
|
|||
// the blocks being decoded. In this case, the name token is used only as
|
||||
// an identifier for the label in diagnostic messages.
|
||||
//
|
||||
// "optional" fields behave like "attr" fields, but they are optional
|
||||
// and will not give parsing errors if they are missing.
|
||||
//
|
||||
// "remain" can be placed on a single field that may be either of type
|
||||
// hcl.Body or hcl.Attributes, in which case any remaining body content is
|
||||
// placed into this field for delayed processing. If no "remain" field is
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/apparentlymart/go-textseg/v12/textseg"
|
||||
"github.com/hashicorp/hcl/v2"
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
)
|
||||
|
|
|
@ -5,7 +5,7 @@ import (
|
|||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/apparentlymart/go-textseg/v12/textseg"
|
||||
"github.com/hashicorp/hcl/v2"
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
)
|
||||
|
|
|
@ -273,7 +273,7 @@ tuple = "[" (
|
|||
object = "{" (
|
||||
(objectelem ("," objectelem)* ","?)?
|
||||
) "}";
|
||||
objectelem = (Identifier | Expression) "=" Expression;
|
||||
objectelem = (Identifier | Expression) ("=" | ":") Expression;
|
||||
```
|
||||
|
||||
Only tuple and object values can be directly constructed via native syntax.
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/apparentlymart/go-textseg/v12/textseg"
|
||||
"github.com/hashicorp/hcl/v2"
|
||||
)
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"bytes"
|
||||
"io"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/apparentlymart/go-textseg/v12/textseg"
|
||||
"github.com/hashicorp/hcl/v2"
|
||||
"github.com/hashicorp/hcl/v2/hclsyntax"
|
||||
)
|
||||
|
|
|
@ -3,7 +3,7 @@ package json
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/apparentlymart/go-textseg/v12/textseg"
|
||||
"github.com/hashicorp/hcl/v2"
|
||||
)
|
||||
|
||||
|
@ -107,6 +107,15 @@ func scan(buf []byte, start pos) []token {
|
|||
})
|
||||
// If we've encountered an invalid then we might as well stop
|
||||
// scanning since the parser won't proceed beyond this point.
|
||||
// We insert a synthetic EOF marker here to match the expectations
|
||||
// of consumers of this data structure.
|
||||
p.Pos.Column++
|
||||
p.Pos.Byte++
|
||||
tokens = append(tokens, token{
|
||||
Type: tokenEOF,
|
||||
Bytes: nil,
|
||||
Range: posRange(p, p),
|
||||
})
|
||||
return tokens
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"bufio"
|
||||
"bytes"
|
||||
|
||||
"github.com/apparentlymart/go-textseg/textseg"
|
||||
"github.com/apparentlymart/go-textseg/v12/textseg"
|
||||
)
|
||||
|
||||
// RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
|
||||
|
|
|
@ -95,8 +95,6 @@ github.com/apparentlymart/go-cidr/cidr
|
|||
# github.com/apparentlymart/go-dump v0.0.0-20190214190832-042adf3cf4a0
|
||||
## explicit
|
||||
github.com/apparentlymart/go-dump/dump
|
||||
# github.com/apparentlymart/go-textseg v1.0.0
|
||||
github.com/apparentlymart/go-textseg/textseg
|
||||
# github.com/apparentlymart/go-textseg/v12 v12.0.0
|
||||
github.com/apparentlymart/go-textseg/v12/textseg
|
||||
# github.com/apparentlymart/go-userdirs v0.0.0-20190512014041-4a23807e62b9
|
||||
|
@ -443,7 +441,7 @@ github.com/hashicorp/hcl/hcl/token
|
|||
github.com/hashicorp/hcl/json/parser
|
||||
github.com/hashicorp/hcl/json/scanner
|
||||
github.com/hashicorp/hcl/json/token
|
||||
# github.com/hashicorp/hcl/v2 v2.3.0
|
||||
# github.com/hashicorp/hcl/v2 v2.4.0
|
||||
## explicit
|
||||
github.com/hashicorp/hcl/v2
|
||||
github.com/hashicorp/hcl/v2/ext/customdecode
|
||||
|
|
Loading…
Reference in New Issue