// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package language import ( "errors" "strconv" "strings" "golang.org/x/text/internal/language" ) // ValueError is returned by any of the parsing functions when the // input is well-formed but the respective subtag is not recognized // as a valid value. type ValueError interface { error // Subtag returns the subtag for which the error occurred. Subtag() string } // Parse parses the given BCP 47 string and returns a valid Tag. If parsing // failed it returns an error and any part of the tag that could be parsed. // If parsing succeeded but an unknown value was found, it returns // ValueError. The Tag returned in this case is just stripped of the unknown // value. All other values are preserved. It accepts tags in the BCP 47 format // and extensions to this standard defined in // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // The resulting tag is canonicalized using the default canonicalization type. func Parse(s string) (t Tag, err error) { return Default.Parse(s) } // Parse parses the given BCP 47 string and returns a valid Tag. If parsing // failed it returns an error and any part of the tag that could be parsed. // If parsing succeeded but an unknown value was found, it returns // ValueError. The Tag returned in this case is just stripped of the unknown // value. All other values are preserved. It accepts tags in the BCP 47 format // and extensions to this standard defined in // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // The resulting tag is canonicalized using the the canonicalization type c. func (c CanonType) Parse(s string) (t Tag, err error) { tt, err := language.Parse(s) if err != nil { return makeTag(tt), err } tt, changed := canonicalize(c, tt) if changed { tt.RemakeString() } return makeTag(tt), err } // Compose creates a Tag from individual parts, which may be of type Tag, Base, // Script, Region, Variant, []Variant, Extension, []Extension or error. If a // Base, Script or Region or slice of type Variant or Extension is passed more // than once, the latter will overwrite the former. Variants and Extensions are // accumulated, but if two extensions of the same type are passed, the latter // will replace the former. For -u extensions, though, the key-type pairs are // added, where later values overwrite older ones. A Tag overwrites all former // values and typically only makes sense as the first argument. The resulting // tag is returned after canonicalizing using the Default CanonType. If one or // more errors are encountered, one of the errors is returned. func Compose(part ...interface{}) (t Tag, err error) { return Default.Compose(part...) } // Compose creates a Tag from individual parts, which may be of type Tag, Base, // Script, Region, Variant, []Variant, Extension, []Extension or error. If a // Base, Script or Region or slice of type Variant or Extension is passed more // than once, the latter will overwrite the former. Variants and Extensions are // accumulated, but if two extensions of the same type are passed, the latter // will replace the former. For -u extensions, though, the key-type pairs are // added, where later values overwrite older ones. A Tag overwrites all former // values and typically only makes sense as the first argument. The resulting // tag is returned after canonicalizing using CanonType c. If one or more errors // are encountered, one of the errors is returned. func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { var b language.Builder if err = update(&b, part...); err != nil { return und, err } b.Tag, _ = canonicalize(c, b.Tag) return makeTag(b.Make()), err } var errInvalidArgument = errors.New("invalid Extension or Variant") func update(b *language.Builder, part ...interface{}) (err error) { for _, x := range part { switch v := x.(type) { case Tag: b.SetTag(v.tag()) case Base: b.Tag.LangID = v.langID case Script: b.Tag.ScriptID = v.scriptID case Region: b.Tag.RegionID = v.regionID case Variant: if v.variant == "" { err = errInvalidArgument break } b.AddVariant(v.variant) case Extension: if v.s == "" { err = errInvalidArgument break } b.SetExt(v.s) case []Variant: b.ClearVariants() for _, v := range v { b.AddVariant(v.variant) } case []Extension: b.ClearExtensions() for _, e := range v { b.SetExt(e.s) } // TODO: support parsing of raw strings based on morphology or just extensions? case error: if v != nil { err = v } } } return } var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") // ParseAcceptLanguage parses the contents of an Accept-Language header as // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and // a list of corresponding quality weights. It is more permissive than RFC 2616 // and may return non-nil slices even if the input is not valid. // The Tags will be sorted by highest weight first and then by first occurrence. // Tags with a weight of zero will be dropped. An error will be returned if the // input could not be parsed. func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { var entry string for s != "" { if entry, s = split(s, ','); entry == "" { continue } entry, weight := split(entry, ';') // Scan the language. t, err := Parse(entry) if err != nil { id, ok := acceptFallback[entry] if !ok { return nil, nil, err } t = makeTag(language.Tag{LangID: id}) } // Scan the optional weight. w := 1.0 if weight != "" { weight = consume(weight, 'q') weight = consume(weight, '=') // consume returns the empty string when a token could not be // consumed, resulting in an error for ParseFloat. if w, err = strconv.ParseFloat(weight, 32); err != nil { return nil, nil, errInvalidWeight } // Drop tags with a quality weight of 0. if w <= 0 { continue } } tag = append(tag, t) q = append(q, float32(w)) } sortStable(&tagSort{tag, q}) return tag, q, nil } // consume removes a leading token c from s and returns the result or the empty // string if there is no such token. func consume(s string, c byte) string { if s == "" || s[0] != c { return "" } return strings.TrimSpace(s[1:]) } func split(s string, c byte) (head, tail string) { if i := strings.IndexByte(s, c); i >= 0 { return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) } return strings.TrimSpace(s), "" } // Add hack mapping to deal with a small number of cases that that occur // in Accept-Language (with reasonable frequency). var acceptFallback = map[string]language.Language{ "english": _en, "deutsch": _de, "italian": _it, "french": _fr, "*": _mul, // defined in the spec to match all languages. } type tagSort struct { tag []Tag q []float32 } func (s *tagSort) Len() int { return len(s.q) } func (s *tagSort) Less(i, j int) bool { return s.q[i] > s.q[j] } func (s *tagSort) Swap(i, j int) { s.tag[i], s.tag[j] = s.tag[j], s.tag[i] s.q[i], s.q[j] = s.q[j], s.q[i] }