kuvia2/vendor/github.com/alecthomas/participle/v2/grammar.go
2022-01-15 00:09:03 +01:00

383 lines
9.6 KiB
Go

package participle
import (
"fmt"
"reflect"
"text/scanner"
"github.com/alecthomas/participle/v2/lexer"
)
type generatorContext struct {
lexer.Definition
typeNodes map[reflect.Type]node
symbolsToIDs map[lexer.TokenType]string
}
func newGeneratorContext(lex lexer.Definition) *generatorContext {
return &generatorContext{
Definition: lex,
typeNodes: map[reflect.Type]node{},
symbolsToIDs: lexer.SymbolsByRune(lex),
}
}
// Takes a type and builds a tree of nodes out of it.
func (g *generatorContext) parseType(t reflect.Type) (_ node, returnedError error) {
t = indirectType(t)
if n, ok := g.typeNodes[t]; ok {
return n, nil
}
if t.Implements(parseableType) {
return &parseable{t.Elem()}, nil
}
if reflect.PtrTo(t).Implements(parseableType) {
return &parseable{t}, nil
}
switch t.Kind() { // nolint: exhaustive
case reflect.Slice, reflect.Ptr:
t = indirectType(t.Elem())
if t.Kind() != reflect.Struct {
return nil, fmt.Errorf("expected a struct but got %T", t)
}
fallthrough
case reflect.Struct:
slexer, err := lexStruct(t)
if err != nil {
return nil, err
}
out := newStrct(t)
g.typeNodes[t] = out // Ensure we avoid infinite recursion.
if slexer.NumField() == 0 {
return nil, fmt.Errorf("can not parse into empty struct %s", t)
}
defer decorate(&returnedError, func() string { return slexer.Field().Name })
e, err := g.parseDisjunction(slexer)
if err != nil {
return nil, err
}
if e == nil {
return nil, fmt.Errorf("no grammar found in %s", t)
}
if token, _ := slexer.Peek(); !token.EOF() {
return nil, fmt.Errorf("unexpected input %q", token.Value)
}
out.expr = e
return out, nil
}
return nil, fmt.Errorf("%s should be a struct or should implement the Parseable interface", t)
}
func (g *generatorContext) parseDisjunction(slexer *structLexer) (node, error) {
out := &disjunction{}
for {
n, err := g.parseSequence(slexer)
if err != nil {
return nil, err
}
if n == nil {
return nil, fmt.Errorf("alternative expression %d cannot be empty", len(out.nodes)+1)
}
out.nodes = append(out.nodes, n)
if token, _ := slexer.Peek(); token.Type != '|' {
break
}
_, err = slexer.Next() // |
if err != nil {
return nil, err
}
}
if len(out.nodes) == 1 {
return out.nodes[0], nil
}
return out, nil
}
func (g *generatorContext) parseSequence(slexer *structLexer) (node, error) {
head := &sequence{}
cursor := head
loop:
for {
if token, err := slexer.Peek(); err != nil {
return nil, err
} else if token.Type == lexer.EOF {
break loop
}
term, err := g.parseTerm(slexer, true)
if err != nil {
return nil, err
}
if term == nil {
break loop
}
if cursor.node == nil {
cursor.head = true
cursor.node = term
} else {
cursor.next = &sequence{node: term}
cursor = cursor.next
}
}
if head.node == nil {
return nil, nil
}
if head.next == nil {
return head.node, nil
}
return head, nil
}
func (g *generatorContext) parseTermNoModifiers(slexer *structLexer, allowUnknown bool) (node, error) {
t, err := slexer.Peek()
if err != nil {
return nil, err
}
switch t.Type {
case '@':
return g.parseCapture(slexer)
case scanner.String, scanner.RawString, scanner.Char:
return g.parseLiteral(slexer)
case '!', '~':
return g.parseNegation(slexer)
case '[':
return g.parseOptional(slexer)
case '{':
return g.parseRepetition(slexer)
case '(':
// Also handles (? used for lookahead groups
return g.parseGroup(slexer)
case scanner.Ident:
return g.parseReference(slexer)
case lexer.EOF:
_, _ = slexer.Next()
return nil, nil
default:
if allowUnknown {
return nil, nil
}
return nil, fmt.Errorf("unexpected token %v", t)
}
}
func (g *generatorContext) parseTerm(slexer *structLexer, allowUnknown bool) (node, error) {
out, err := g.parseTermNoModifiers(slexer, allowUnknown)
if err != nil {
return nil, err
}
return g.parseModifier(slexer, out)
}
// Parse modifiers: ?, *, + and/or !
func (g *generatorContext) parseModifier(slexer *structLexer, expr node) (node, error) {
out := &group{expr: expr}
t, err := slexer.Peek()
if err != nil {
return nil, err
}
switch t.Type {
case '!':
out.mode = groupMatchNonEmpty
case '+':
out.mode = groupMatchOneOrMore
case '*':
out.mode = groupMatchZeroOrMore
case '?':
out.mode = groupMatchZeroOrOne
default:
return expr, nil
}
_, _ = slexer.Next()
return out, nil
}
// @<expression> captures <expression> into the current field.
func (g *generatorContext) parseCapture(slexer *structLexer) (node, error) {
_, _ = slexer.Next()
token, err := slexer.Peek()
if err != nil {
return nil, err
}
field := slexer.Field()
if token.Type == '@' {
_, _ = slexer.Next()
n, err := g.parseType(field.Type)
if err != nil {
return nil, err
}
return &capture{field, n}, nil
}
ft := indirectType(field.Type)
if ft.Kind() == reflect.Struct && ft != tokenType && ft != tokensType && !implements(ft, captureType) && !implements(ft, textUnmarshalerType) {
return nil, fmt.Errorf("structs can only be parsed with @@ or by implementing the Capture or encoding.TextUnmarshaler interfaces")
}
n, err := g.parseTermNoModifiers(slexer, false)
if err != nil {
return nil, err
}
return &capture{field, n}, nil
}
// A reference in the form <identifier> refers to a named token from the lexer.
func (g *generatorContext) parseReference(slexer *structLexer) (node, error) { // nolint: interfacer
token, err := slexer.Next()
if err != nil {
return nil, err
}
if token.Type != scanner.Ident {
return nil, fmt.Errorf("expected identifier but got %q", token)
}
typ, ok := g.Symbols()[token.Value]
if !ok {
return nil, fmt.Errorf("unknown token type %q", token)
}
return &reference{typ: typ, identifier: token.Value}, nil
}
// [ <expression> ] optionally matches <expression>.
func (g *generatorContext) parseOptional(slexer *structLexer) (node, error) {
_, _ = slexer.Next() // [
disj, err := g.parseDisjunction(slexer)
if err != nil {
return nil, err
}
n := &group{expr: disj, mode: groupMatchZeroOrOne}
next, err := slexer.Next()
if err != nil {
return nil, err
}
if next.Type != ']' {
return nil, fmt.Errorf("expected ] but got %q", next)
}
return n, nil
}
// { <expression> } matches 0 or more repititions of <expression>
func (g *generatorContext) parseRepetition(slexer *structLexer) (node, error) {
_, _ = slexer.Next() // {
disj, err := g.parseDisjunction(slexer)
if err != nil {
return nil, err
}
n := &group{expr: disj, mode: groupMatchZeroOrMore}
next, err := slexer.Next()
if err != nil {
return nil, err
}
if next.Type != '}' {
return nil, fmt.Errorf("expected } but got %q", next)
}
return n, nil
}
// ( <expression> ) groups a sub-expression
func (g *generatorContext) parseGroup(slexer *structLexer) (node, error) {
_, _ = slexer.Next() // (
peek, err := slexer.Peek()
if err != nil {
return nil, err
}
if peek.Type == '?' {
return g.subparseLookaheadGroup(slexer) // If there was an error peeking, code below will handle it
}
expr, err := g.subparseGroup(slexer)
if err != nil {
return nil, err
}
return &group{expr: expr}, nil
}
// (?[!=] <expression> ) requires a grouped sub-expression either matches or doesn't match, without consuming it
func (g *generatorContext) subparseLookaheadGroup(slexer *structLexer) (node, error) {
_, _ = slexer.Next() // ? - the opening ( was already consumed in parseGroup
var negative bool
next, err := slexer.Next()
if err != nil {
return nil, err
}
switch next.Type {
case '=':
negative = false
case '!':
negative = true
default:
return nil, fmt.Errorf("expected = or ! but got %q", next)
}
expr, err := g.subparseGroup(slexer)
if err != nil {
return nil, err
}
return &lookaheadGroup{expr: expr, negative: negative}, nil
}
// helper parsing <expression> ) to finish parsing groups or lookahead groups
func (g *generatorContext) subparseGroup(slexer *structLexer) (node, error) {
disj, err := g.parseDisjunction(slexer)
if err != nil {
return nil, err
}
next, err := slexer.Next() // )
if err != nil {
return nil, err
}
if next.Type != ')' {
return nil, fmt.Errorf("expected ) but got %q", next)
}
return disj, nil
}
// A token negation
//
// Accepts both the form !"some-literal" and !SomeNamedToken
func (g *generatorContext) parseNegation(slexer *structLexer) (node, error) {
_, _ = slexer.Next() // advance the parser since we have '!' right now.
next, err := g.parseTermNoModifiers(slexer, false)
if err != nil {
return nil, err
}
return &negation{next}, nil
}
// A literal string.
//
// Note that for this to match, the tokeniser must be able to produce this string. For example,
// if the tokeniser only produces individual characters but the literal is "hello", or vice versa.
func (g *generatorContext) parseLiteral(lex *structLexer) (node, error) { // nolint: interfacer
token, err := lex.Next()
if err != nil {
return nil, err
}
s := token.Value
t := lexer.TokenType(-1)
token, err = lex.Peek()
if err != nil {
return nil, err
}
if token.Type == ':' {
_, _ = lex.Next()
token, err = lex.Next()
if err != nil {
return nil, err
}
if token.Type != scanner.Ident {
return nil, fmt.Errorf("expected identifier for literal type constraint but got %q", token)
}
var ok bool
t, ok = g.Symbols()[token.Value]
if !ok {
return nil, fmt.Errorf("unknown token type %q in literal type constraint", token)
}
}
return &literal{s: s, t: t, tt: g.symbolsToIDs[t]}, nil
}
func indirectType(t reflect.Type) reflect.Type {
if t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice {
return indirectType(t.Elem())
}
return t
}
func implements(t, i reflect.Type) bool {
return t.Implements(i) || reflect.PtrTo(t).Implements(i)
}