420 lines
7.3 KiB
Go
420 lines
7.3 KiB
Go
|
package lexer
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
//XItemError is an error with the parser input
|
||
|
XItemError XItemType = "Error"
|
||
|
//XItemAbsLocPath is an absolute path
|
||
|
XItemAbsLocPath = "Absolute path"
|
||
|
//XItemAbbrAbsLocPath represents an abbreviated absolute path
|
||
|
XItemAbbrAbsLocPath = "Abbreviated absolute path"
|
||
|
//XItemAbbrRelLocPath marks the start of a path expression
|
||
|
XItemAbbrRelLocPath = "Abbreviated relative path"
|
||
|
//XItemRelLocPath represents a relative location path
|
||
|
XItemRelLocPath = "Relative path"
|
||
|
//XItemEndPath marks the end of a path
|
||
|
XItemEndPath = "End path instruction"
|
||
|
//XItemAxis marks an axis specifier of a path
|
||
|
XItemAxis = "Axis"
|
||
|
//XItemAbbrAxis marks an abbreviated axis specifier (just @ at this point)
|
||
|
XItemAbbrAxis = "Abbreviated attribute axis"
|
||
|
//XItemNCName marks a namespace name in a node test
|
||
|
XItemNCName = "Namespace"
|
||
|
//XItemQName marks the local name in an a node test
|
||
|
XItemQName = "Local name"
|
||
|
//XItemNodeType marks a node type in a node test
|
||
|
XItemNodeType = "Node type"
|
||
|
//XItemProcLit marks a processing-instruction literal
|
||
|
XItemProcLit = "processing-instruction"
|
||
|
//XItemFunction marks a function call
|
||
|
XItemFunction = "function"
|
||
|
//XItemArgument marks a function argument
|
||
|
XItemArgument = "function argument"
|
||
|
//XItemEndFunction marks the end of a function
|
||
|
XItemEndFunction = "end of function"
|
||
|
//XItemPredicate marks a predicate in an axis
|
||
|
XItemPredicate = "predicate"
|
||
|
//XItemEndPredicate marks a predicate in an axis
|
||
|
XItemEndPredicate = "end of predicate"
|
||
|
//XItemStrLit marks a string literal
|
||
|
XItemStrLit = "string literal"
|
||
|
//XItemNumLit marks a numeric literal
|
||
|
XItemNumLit = "numeric literal"
|
||
|
//XItemOperator marks an operator
|
||
|
XItemOperator = "operator"
|
||
|
//XItemVariable marks a variable reference
|
||
|
XItemVariable = "variable"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
eof = -(iota + 1)
|
||
|
)
|
||
|
|
||
|
//XItemType is the parser token types
|
||
|
type XItemType string
|
||
|
|
||
|
//XItem is the token emitted from the parser
|
||
|
type XItem struct {
|
||
|
Typ XItemType
|
||
|
Val string
|
||
|
}
|
||
|
|
||
|
type stateFn func(*Lexer) stateFn
|
||
|
|
||
|
//Lexer lexes out XPath expressions
|
||
|
type Lexer struct {
|
||
|
input string
|
||
|
start int
|
||
|
pos int
|
||
|
width int
|
||
|
items chan XItem
|
||
|
}
|
||
|
|
||
|
//Lex an XPath expresion on the io.Reader
|
||
|
func Lex(xpath string) chan XItem {
|
||
|
l := &Lexer{
|
||
|
input: xpath,
|
||
|
items: make(chan XItem),
|
||
|
}
|
||
|
go l.run()
|
||
|
return l.items
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) run() {
|
||
|
for state := startState; state != nil; {
|
||
|
state = state(l)
|
||
|
}
|
||
|
|
||
|
if l.peek() != eof {
|
||
|
l.errorf("Malformed XPath expression")
|
||
|
}
|
||
|
|
||
|
close(l.items)
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) emit(t XItemType) {
|
||
|
l.items <- XItem{t, l.input[l.start:l.pos]}
|
||
|
l.start = l.pos
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) emitVal(t XItemType, val string) {
|
||
|
l.items <- XItem{t, val}
|
||
|
l.start = l.pos
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) next() (r rune) {
|
||
|
if l.pos >= len(l.input) {
|
||
|
l.width = 0
|
||
|
return eof
|
||
|
}
|
||
|
|
||
|
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
||
|
|
||
|
l.pos += l.width
|
||
|
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) ignore() {
|
||
|
l.start = l.pos
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) backup() {
|
||
|
l.pos -= l.width
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) peek() rune {
|
||
|
r := l.next()
|
||
|
|
||
|
l.backup()
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) peekAt(n int) rune {
|
||
|
if n <= 1 {
|
||
|
return l.peek()
|
||
|
}
|
||
|
|
||
|
width := 0
|
||
|
var ret rune
|
||
|
|
||
|
for count := 0; count < n; count++ {
|
||
|
r, s := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
||
|
width += s
|
||
|
|
||
|
if l.pos+width > len(l.input) {
|
||
|
return eof
|
||
|
}
|
||
|
|
||
|
ret = r
|
||
|
}
|
||
|
|
||
|
return ret
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) accept(valid string) bool {
|
||
|
if strings.ContainsRune(valid, l.next()) {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
l.backup()
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) acceptRun(valid string) {
|
||
|
for strings.ContainsRune(valid, l.next()) {
|
||
|
}
|
||
|
l.backup()
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) skip(num int) {
|
||
|
for i := 0; i < num; i++ {
|
||
|
l.next()
|
||
|
}
|
||
|
l.ignore()
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) skipWS(ig bool) {
|
||
|
for {
|
||
|
n := l.next()
|
||
|
|
||
|
if n == eof || !unicode.IsSpace(n) {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
l.backup()
|
||
|
|
||
|
if ig {
|
||
|
l.ignore()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
||
|
l.items <- XItem{
|
||
|
XItemError,
|
||
|
fmt.Sprintf(format, args...),
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func isElemChar(r rune) bool {
|
||
|
return string(r) != ":" && string(r) != "/" &&
|
||
|
(unicode.Is(first, r) || unicode.Is(second, r) || string(r) == "*") &&
|
||
|
r != eof
|
||
|
}
|
||
|
|
||
|
func startState(l *Lexer) stateFn {
|
||
|
l.skipWS(true)
|
||
|
|
||
|
if string(l.peek()) == "/" {
|
||
|
l.next()
|
||
|
l.ignore()
|
||
|
|
||
|
if string(l.next()) == "/" {
|
||
|
l.ignore()
|
||
|
return abbrAbsLocPathState
|
||
|
}
|
||
|
|
||
|
l.backup()
|
||
|
return absLocPathState
|
||
|
} else if string(l.peek()) == `'` || string(l.peek()) == `"` {
|
||
|
if err := getStrLit(l, XItemStrLit); err != nil {
|
||
|
return l.errorf(err.Error())
|
||
|
}
|
||
|
|
||
|
if l.peek() != eof {
|
||
|
return startState
|
||
|
}
|
||
|
} else if getNumLit(l) {
|
||
|
l.skipWS(true)
|
||
|
if l.peek() != eof {
|
||
|
return startState
|
||
|
}
|
||
|
} else if string(l.peek()) == "$" {
|
||
|
l.next()
|
||
|
l.ignore()
|
||
|
r := l.peek()
|
||
|
for unicode.Is(first, r) || unicode.Is(second, r) {
|
||
|
l.next()
|
||
|
r = l.peek()
|
||
|
}
|
||
|
tok := l.input[l.start:l.pos]
|
||
|
if len(tok) == 0 {
|
||
|
return l.errorf("Empty variable name")
|
||
|
}
|
||
|
l.emit(XItemVariable)
|
||
|
l.skipWS(true)
|
||
|
if l.peek() != eof {
|
||
|
return startState
|
||
|
}
|
||
|
} else if st := findOperatorState(l); st != nil {
|
||
|
return st
|
||
|
} else {
|
||
|
if isElemChar(l.peek()) {
|
||
|
colons := 0
|
||
|
|
||
|
for {
|
||
|
if isElemChar(l.peek()) {
|
||
|
l.next()
|
||
|
} else if string(l.peek()) == ":" {
|
||
|
l.next()
|
||
|
colons++
|
||
|
} else {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if string(l.peek()) == "(" && colons <= 1 {
|
||
|
tok := l.input[l.start:l.pos]
|
||
|
err := procFunc(l, tok)
|
||
|
if err != nil {
|
||
|
return l.errorf(err.Error())
|
||
|
}
|
||
|
|
||
|
l.skipWS(true)
|
||
|
|
||
|
if string(l.peek()) == "/" {
|
||
|
l.next()
|
||
|
l.ignore()
|
||
|
|
||
|
if string(l.next()) == "/" {
|
||
|
l.ignore()
|
||
|
return abbrRelLocPathState
|
||
|
}
|
||
|
|
||
|
l.backup()
|
||
|
return relLocPathState
|
||
|
}
|
||
|
|
||
|
return startState
|
||
|
}
|
||
|
|
||
|
l.pos = l.start
|
||
|
return relLocPathState
|
||
|
} else if string(l.peek()) == "@" {
|
||
|
return relLocPathState
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func strPeek(str string, l *Lexer) bool {
|
||
|
for i := 0; i < len(str); i++ {
|
||
|
if string(l.peekAt(i+1)) != string(str[i]) {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
func findOperatorState(l *Lexer) stateFn {
|
||
|
l.skipWS(true)
|
||
|
|
||
|
switch string(l.peek()) {
|
||
|
case ">", "<", "!":
|
||
|
l.next()
|
||
|
if string(l.peek()) == "=" {
|
||
|
l.next()
|
||
|
}
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
case "|", "+", "-", "*", "=":
|
||
|
l.next()
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
case "(":
|
||
|
l.next()
|
||
|
l.emit(XItemOperator)
|
||
|
for state := startState; state != nil; {
|
||
|
state = state(l)
|
||
|
}
|
||
|
l.skipWS(true)
|
||
|
if string(l.next()) != ")" {
|
||
|
return l.errorf("Missing end )")
|
||
|
}
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
}
|
||
|
|
||
|
if strPeek("and", l) {
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
}
|
||
|
|
||
|
if strPeek("or", l) {
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
}
|
||
|
|
||
|
if strPeek("mod", l) {
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
}
|
||
|
|
||
|
if strPeek("div", l) {
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.next()
|
||
|
l.emit(XItemOperator)
|
||
|
return startState
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func getStrLit(l *Lexer, tok XItemType) error {
|
||
|
q := l.next()
|
||
|
var r rune
|
||
|
|
||
|
l.ignore()
|
||
|
|
||
|
for r != q {
|
||
|
r = l.next()
|
||
|
if r == eof {
|
||
|
return fmt.Errorf("Unexpected end of string literal.")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
l.backup()
|
||
|
l.emit(tok)
|
||
|
l.next()
|
||
|
l.ignore()
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func getNumLit(l *Lexer) bool {
|
||
|
const dig = "0123456789"
|
||
|
l.accept("-")
|
||
|
start := l.pos
|
||
|
l.acceptRun(dig)
|
||
|
|
||
|
if l.pos == start {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
if l.accept(".") {
|
||
|
l.acceptRun(dig)
|
||
|
}
|
||
|
|
||
|
l.emit(XItemNumLit)
|
||
|
return true
|
||
|
}
|