elvish/pkg/parse/parse.go
2021-10-13 22:47:16 +01:00

1049 lines
24 KiB
Go

// Package parse implements the elvish parser.
//
// The parser builds a hybrid of AST (abstract syntax tree) and parse tree
// (a.k.a. concrete syntax tree). The AST part only includes parts that are
// semantically significant -- i.e. skipping whitespaces and symbols that do not
// alter the semantics, and is embodied in the fields of each *Node type. The
// parse tree part corresponds to all the text in the original source text, and
// is embodied in the children of each *Node type.
package parse
//go:generate stringer -type=PrimaryType,RedirMode,ExprCtx -output=string.go
import (
"bytes"
"fmt"
"io"
"unicode"
"src.elv.sh/pkg/diag"
)
// Tree represents a parsed tree.
type Tree struct {
Root *Chunk
Source Source
}
// Config keeps configuration options when parsing.
type Config struct {
// Destination of warnings. If nil, warnings are suppressed.
WarningWriter io.Writer
}
// Parse parses the given source. The returned error always has type *Error
// if it is not nil.
func Parse(src Source, cfg Config) (Tree, error) {
tree := Tree{&Chunk{}, src}
err := ParseAs(src, tree.Root, cfg)
return tree, err
}
// ParseAs parses the given source as a node, depending on the dynamic type of
// n. If the error is not nil, it always has type *Error.
func ParseAs(src Source, n Node, cfg Config) error {
ps := &parser{srcName: src.Name, src: src.Code, warn: cfg.WarningWriter}
ps.parse(n)
ps.done()
return ps.assembleError()
}
// Errors.
var (
errShouldBeForm = newError("", "form")
errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
errShouldBeFD = newError("", "a composite term representing fd")
errShouldBeFilename = newError("", "a composite term representing filename")
errShouldBeArray = newError("", "spaced")
errStringUnterminated = newError("string not terminated")
errInvalidEscape = newError("invalid escape sequence")
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit")
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit")
errInvalidEscapeControl = newError("invalid control sequence", "a codepoint between 0x3F and 0x5F")
errShouldBePrimary = newError("", "single-quoted string", "double-quoted string", "bareword")
errShouldBeVariableName = newError("", "variable name")
errShouldBeRBracket = newError("", "']'")
errShouldBeRBrace = newError("", "'}'")
errShouldBeBraceSepOrRBracket = newError("", "','", "'}'")
errShouldBeRParen = newError("", "')'")
errShouldBeCompound = newError("", "compound")
errShouldBeEqual = newError("", "'='")
errShouldBePipe = newError("", "'|'")
errBothElementsAndPairs = newError("cannot contain both list elements and map pairs")
errShouldBeNewline = newError("", "newline")
)
// Chunk = { PipelineSep | Space } { Pipeline { PipelineSep | Space } }
type Chunk struct {
node
Pipelines []*Pipeline
}
func (bn *Chunk) parse(ps *parser) {
bn.parseSeps(ps)
for startsPipeline(ps.peek()) {
ps.parse(&Pipeline{}).addTo(&bn.Pipelines, bn)
if bn.parseSeps(ps) == 0 {
break
}
}
}
func isPipelineSep(r rune) bool {
return r == '\r' || r == '\n' || r == ';'
}
// parseSeps parses pipeline separators along with whitespaces. It returns the
// number of pipeline separators parsed.
func (bn *Chunk) parseSeps(ps *parser) int {
nseps := 0
for {
r := ps.peek()
if isPipelineSep(r) {
// parse as a Sep
parseSep(bn, ps, r)
nseps++
} else if IsInlineWhitespace(r) || r == '#' {
// parse a run of spaces as a Sep
parseSpaces(bn, ps)
} else {
break
}
}
return nseps
}
// Pipeline = Form { '|' Form }
type Pipeline struct {
node
Forms []*Form
Background bool
}
func (pn *Pipeline) parse(ps *parser) {
ps.parse(&Form{}).addTo(&pn.Forms, pn)
for parseSep(pn, ps, '|') {
parseSpacesAndNewlines(pn, ps)
if !startsForm(ps.peek()) {
ps.error(errShouldBeForm)
return
}
ps.parse(&Form{}).addTo(&pn.Forms, pn)
}
parseSpaces(pn, ps)
if ps.peek() == '&' {
ps.next()
addSep(pn, ps)
pn.Background = true
parseSpaces(pn, ps)
}
}
func startsPipeline(r rune) bool {
return startsForm(r)
}
// Form = { Space } { { Assignment } { Space } }
// { Compound } { Space } { ( Compound | MapPair | Redir ) { Space } }
type Form struct {
node
Assignments []*Assignment
Head *Compound
Args []*Compound
Opts []*MapPair
Redirs []*Redir
}
func (fn *Form) parse(ps *parser) {
parseSpaces(fn, ps)
for fn.tryAssignment(ps) {
parseSpaces(fn, ps)
}
// Parse head.
if !startsCompound(ps.peek(), CmdExpr) {
if len(fn.Assignments) > 0 {
// Assignment-only form.
return
}
// Bad form.
ps.error(fmt.Errorf("bad rune at form head: %q", ps.peek()))
}
ps.parse(&Compound{ExprCtx: CmdExpr}).addAs(&fn.Head, fn)
parseSpaces(fn, ps)
for {
r := ps.peek()
switch {
case r == '&':
ps.next()
hasMapPair := startsCompound(ps.peek(), LHSExpr)
ps.backup()
if !hasMapPair {
// background indicator
return
}
ps.parse(&MapPair{}).addTo(&fn.Opts, fn)
case startsCompound(r, NormalExpr):
cn := &Compound{}
ps.parse(cn)
if isRedirSign(ps.peek()) {
// Redir
ps.parse(&Redir{Left: cn}).addTo(&fn.Redirs, fn)
} else {
parsed{cn}.addTo(&fn.Args, fn)
}
case isRedirSign(r):
ps.parse(&Redir{}).addTo(&fn.Redirs, fn)
default:
return
}
parseSpaces(fn, ps)
}
}
// tryAssignment tries to parse an assignment. If succeeded, it adds the parsed
// assignment to fn.Assignments and returns true. Otherwise it rewinds the
// parser and returns false.
func (fn *Form) tryAssignment(ps *parser) bool {
if !startsIndexing(ps.peek(), LHSExpr) {
return false
}
pos := ps.pos
errorEntries := ps.errors.Entries
parsedAssignment := ps.parse(&Assignment{})
// If errors were added, revert
if len(ps.errors.Entries) > len(errorEntries) {
ps.errors.Entries = errorEntries
ps.pos = pos
return false
}
parsedAssignment.addTo(&fn.Assignments, fn)
return true
}
func startsForm(r rune) bool {
return IsInlineWhitespace(r) || startsCompound(r, CmdExpr)
}
// Assignment = Indexing '=' Compound
type Assignment struct {
node
Left *Indexing
Right *Compound
}
func (an *Assignment) parse(ps *parser) {
ps.parse(&Indexing{ExprCtx: LHSExpr}).addAs(&an.Left, an)
head := an.Left.Head
if !ValidLHSVariable(head, true) {
ps.errorp(head, errShouldBeVariableName)
}
if !parseSep(an, ps, '=') {
ps.error(errShouldBeEqual)
}
ps.parse(&Compound{}).addAs(&an.Right, an)
}
func ValidLHSVariable(p *Primary, allowSigil bool) bool {
switch p.Type {
case Braced:
// TODO(xiaq): check further inside braced expression
return true
case SingleQuoted, DoubleQuoted:
// Quoted variable names may contain anything
return true
case Bareword:
// Bareword variable names may only contain runes that are valid in raw
// variable names
if p.Value == "" {
return false
}
name := p.Value
if allowSigil && name[0] == '@' {
name = name[1:]
}
for _, r := range name {
if !allowedInVariableName(r) {
return false
}
}
return true
default:
return false
}
}
// Redir = { Compound } { '<'|'>'|'<>'|'>>' } { Space } ( '&'? Compound )
type Redir struct {
node
Left *Compound
Mode RedirMode
RightIsFd bool
Right *Compound
}
func (rn *Redir) parse(ps *parser) {
// The parsing of the Left part is done in Form.parse.
if rn.Left != nil {
addChild(rn, rn.Left)
rn.From = rn.Left.From
}
begin := ps.pos
for isRedirSign(ps.peek()) {
ps.next()
}
sign := ps.src[begin:ps.pos]
switch sign {
case "<":
rn.Mode = Read
case ">":
rn.Mode = Write
case ">>":
rn.Mode = Append
case "<>":
rn.Mode = ReadWrite
default:
ps.error(errBadRedirSign)
}
addSep(rn, ps)
parseSpaces(rn, ps)
if parseSep(rn, ps, '&') {
rn.RightIsFd = true
}
ps.parse(&Compound{}).addAs(&rn.Right, rn)
if len(rn.Right.Indexings) == 0 {
if rn.RightIsFd {
ps.error(errShouldBeFD)
} else {
ps.error(errShouldBeFilename)
}
return
}
}
func isRedirSign(r rune) bool {
return r == '<' || r == '>'
}
// RedirMode records the mode of an IO redirection.
type RedirMode int
// Possible values for RedirMode.
const (
BadRedirMode RedirMode = iota
Read
Write
ReadWrite
Append
)
// Filter is the Elvish filter DSL. It uses the same syntax as arguments and
// options to a command.
type Filter struct {
node
Args []*Compound
Opts []*MapPair
}
func (qn *Filter) parse(ps *parser) {
parseSpaces(qn, ps)
for {
r := ps.peek()
switch {
case r == '&':
ps.parse(&MapPair{}).addTo(&qn.Opts, qn)
case startsCompound(r, NormalExpr):
ps.parse(&Compound{}).addTo(&qn.Args, qn)
default:
return
}
parseSpaces(qn, ps)
}
}
// Compound = { Indexing }
type Compound struct {
node
ExprCtx ExprCtx
Indexings []*Indexing
}
// ExprCtx represents special contexts of expression parsing.
type ExprCtx int
const (
// NormalExpr represents a normal expression, namely none of the special
// ones below. It is the default value.
NormalExpr ExprCtx = iota
// CmdExpr represents an expression used as the command in a form. In this
// context, unquoted <>*^ are treated as bareword characters.
CmdExpr
// LHSExpr represents an expression used as the left-hand-side in either
// assignments or map pairs. In this context, an unquoted = serves as an
// expression terminator and is thus not treated as a bareword character.
LHSExpr
// BracedElemExpr represents an expression used as an element in a braced
// expression. In this context, an unquoted , serves as an expression
// terminator and is thus not treated as a bareword character.
BracedElemExpr
// strictExpr is only meaningful to allowedInBareword.
strictExpr
)
func (cn *Compound) parse(ps *parser) {
cn.tilde(ps)
for startsIndexing(ps.peek(), cn.ExprCtx) {
ps.parse(&Indexing{ExprCtx: cn.ExprCtx}).addTo(&cn.Indexings, cn)
}
}
// tilde parses a tilde if there is one. It is implemented here instead of
// within Primary since a tilde can only appear as the first part of a
// Compound. Elsewhere tildes are barewords.
func (cn *Compound) tilde(ps *parser) {
if ps.peek() == '~' {
ps.next()
base := node{Ranging: diag.Ranging{From: ps.pos - 1, To: ps.pos},
sourceText: "~", parent: nil, children: nil}
pn := &Primary{node: base, Type: Tilde, Value: "~"}
in := &Indexing{node: base}
parsed{pn}.addAs(&in.Head, in)
parsed{in}.addTo(&cn.Indexings, cn)
}
}
func startsCompound(r rune, ctx ExprCtx) bool {
return startsIndexing(r, ctx)
}
// Indexing = Primary { '[' Array ']' }
type Indexing struct {
node
ExprCtx ExprCtx
Head *Primary
Indices []*Array
}
func (in *Indexing) parse(ps *parser) {
ps.parse(&Primary{ExprCtx: in.ExprCtx}).addAs(&in.Head, in)
for parseSep(in, ps, '[') {
if !startsArray(ps.peek()) {
ps.error(errShouldBeArray)
}
ps.parse(&Array{}).addTo(&in.Indices, in)
if !parseSep(in, ps, ']') {
ps.error(errShouldBeRBracket)
return
}
}
}
func startsIndexing(r rune, ctx ExprCtx) bool {
return startsPrimary(r, ctx)
}
// Array = { Space | '\n' } { Compound { Space | '\n' } }
type Array struct {
node
Compounds []*Compound
// When non-empty, records the occurrences of semicolons by the indices of
// the compounds they appear before. For instance, [; ; a b; c d;] results
// in Semicolons={0 0 2 4}.
Semicolons []int
}
func (sn *Array) parse(ps *parser) {
parseSep := func() { parseSpacesAndNewlines(sn, ps) }
parseSep()
for startsCompound(ps.peek(), NormalExpr) {
ps.parse(&Compound{}).addTo(&sn.Compounds, sn)
parseSep()
}
}
func startsArray(r rune) bool {
return IsWhitespace(r) || startsIndexing(r, NormalExpr)
}
// Primary is the smallest expression unit.
type Primary struct {
node
ExprCtx ExprCtx
Type PrimaryType
// Legacy lambda uses [args]{ body } instead of { |args| body }
LegacyLambda bool
// The unquoted string value. Valid for Bareword, SingleQuoted,
// DoubleQuoted, Variable, Wildcard and Tilde.
Value string
Elements []*Compound // Valid for List and Lambda
Chunk *Chunk // Valid for OutputCapture, ExitusCapture and Lambda
MapPairs []*MapPair // Valid for Map and Lambda
Braced []*Compound // Valid for Braced
}
// PrimaryType is the type of a Primary.
type PrimaryType int
// Possible values for PrimaryType.
const (
BadPrimary PrimaryType = iota
Bareword
SingleQuoted
DoubleQuoted
Variable
Wildcard
Tilde
ExceptionCapture
OutputCapture
List
Lambda
Map
Braced
)
func (pn *Primary) parse(ps *parser) {
r := ps.peek()
if !startsPrimary(r, pn.ExprCtx) {
ps.error(errShouldBePrimary)
return
}
// Try bareword early, since it has precedence over wildcard on *
// when ctx = commandExpr.
if allowedInBareword(r, pn.ExprCtx) {
pn.bareword(ps)
return
}
switch r {
case '\'':
pn.singleQuoted(ps)
case '"':
pn.doubleQuoted(ps)
case '$':
pn.variable(ps)
case '*':
pn.starWildcard(ps)
case '?':
if ps.hasPrefix("?(") {
pn.exitusCapture(ps)
} else {
pn.questionWildcard(ps)
}
case '(':
pn.outputCapture(ps)
case '[':
pn.lbracket(ps)
case '{':
pn.lbrace(ps)
default:
// Parse an empty bareword.
pn.Type = Bareword
}
}
func (pn *Primary) singleQuoted(ps *parser) {
pn.Type = SingleQuoted
ps.next()
pn.singleQuotedInner(ps)
}
// Parses a single-quoted string after the opening quote. Sets pn.Value but not
// pn.Type.
func (pn *Primary) singleQuotedInner(ps *parser) {
var buf bytes.Buffer
defer func() { pn.Value = buf.String() }()
for {
switch r := ps.next(); r {
case eof:
ps.error(errStringUnterminated)
return
case '\'':
if ps.peek() == '\'' {
// Two consecutive single quotes
ps.next()
buf.WriteByte('\'')
} else {
// End of string
return
}
default:
buf.WriteRune(r)
}
}
}
func (pn *Primary) doubleQuoted(ps *parser) {
pn.Type = DoubleQuoted
ps.next()
pn.doubleQuotedInner(ps)
}
// Parses a double-quoted string after the opening quote. Sets pn.Value but not
// pn.Type.
func (pn *Primary) doubleQuotedInner(ps *parser) {
var buf bytes.Buffer
defer func() { pn.Value = buf.String() }()
for {
switch r := ps.next(); r {
case eof:
ps.error(errStringUnterminated)
return
case '"':
return
case '\\':
switch r := ps.next(); r {
case 'c', '^': // control sequence
r := ps.next()
if r < 0x3F || r > 0x5F {
ps.backup()
ps.error(errInvalidEscapeControl)
ps.next()
}
if byte(r) == '?' { // special-case: \c? => del
buf.WriteByte(byte(0x7F))
} else {
buf.WriteByte(byte(r - 0x40))
}
case 'x', 'u', 'U': // two, four, or eight hex digits
var n int
switch r {
case 'x':
n = 2
case 'u':
n = 4
case 'U':
n = 8
}
var rr rune
for i := 0; i < n; i++ {
d, ok := hexToDigit(ps.next())
if !ok {
ps.backup()
ps.error(errInvalidEscapeHex)
break
}
rr = rr*16 + d
}
buf.WriteRune(rr)
case '0', '1', '2', '3', '4', '5', '6', '7': // three octal digits
rr := r - '0'
for i := 0; i < 2; i++ {
r := ps.next()
if r < '0' || r > '7' {
ps.backup()
ps.error(errInvalidEscapeOct)
break
}
rr = rr*8 + (r - '0')
}
buf.WriteRune(rr)
default:
if rr, ok := doubleEscape[r]; ok {
buf.WriteRune(rr)
} else {
ps.backup()
ps.error(errInvalidEscape)
ps.next()
}
}
default:
buf.WriteRune(r)
}
}
}
// a table for the simple double-quote escape sequences.
var doubleEscape = map[rune]rune{
// same as golang
'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r',
't': '\t', 'v': '\v', '\\': '\\', '"': '"',
// additional
'e': '\033',
}
var doubleUnescape = map[rune]rune{}
func init() {
for k, v := range doubleEscape {
doubleUnescape[v] = k
}
}
func hexToDigit(r rune) (rune, bool) {
switch {
case '0' <= r && r <= '9':
return r - '0', true
case 'a' <= r && r <= 'f':
return r - 'a' + 10, true
case 'A' <= r && r <= 'F':
return r - 'A' + 10, true
default:
return -1, false
}
}
func (pn *Primary) variable(ps *parser) {
pn.Type = Variable
ps.next()
switch r := ps.next(); r {
case eof:
ps.backup()
ps.error(errShouldBeVariableName)
ps.next()
case '\'':
pn.singleQuotedInner(ps)
case '"':
pn.doubleQuotedInner(ps)
default:
defer func() { pn.Value = ps.src[pn.From+1 : ps.pos] }()
if !allowedInVariableName(r) && r != '@' {
ps.backup()
ps.error(errShouldBeVariableName)
}
for allowedInVariableName(ps.peek()) {
ps.next()
}
}
}
// The following are allowed in variable names:
// * Anything beyond ASCII that is printable
// * Letters and numbers
// * The symbols "-_:~"
func allowedInVariableName(r rune) bool {
return (r >= 0x80 && unicode.IsPrint(r)) ||
('0' <= r && r <= '9') ||
('a' <= r && r <= 'z') ||
('A' <= r && r <= 'Z') ||
r == '-' || r == '_' || r == ':' || r == '~'
}
func (pn *Primary) starWildcard(ps *parser) {
pn.Type = Wildcard
for ps.peek() == '*' {
ps.next()
}
pn.Value = ps.src[pn.From:ps.pos]
}
func (pn *Primary) questionWildcard(ps *parser) {
pn.Type = Wildcard
if ps.peek() == '?' {
ps.next()
}
pn.Value = ps.src[pn.From:ps.pos]
}
func (pn *Primary) exitusCapture(ps *parser) {
ps.next()
ps.next()
addSep(pn, ps)
pn.Type = ExceptionCapture
ps.parse(&Chunk{}).addAs(&pn.Chunk, pn)
if !parseSep(pn, ps, ')') {
ps.error(errShouldBeRParen)
}
}
func (pn *Primary) outputCapture(ps *parser) {
pn.Type = OutputCapture
parseSep(pn, ps, '(')
ps.parse(&Chunk{}).addAs(&pn.Chunk, pn)
if !parseSep(pn, ps, ')') {
ps.error(errShouldBeRParen)
}
}
// List = '[' { Space } { Compound } ']'
// = '[' { Space } { MapPair { Space } } ']'
// Map = '[' { Space } '&' { Space } ']'
// Lambda = '[' { Space } { (Compound | MapPair) { Space } } ']' '{' Chunk '}'
func (pn *Primary) lbracket(ps *parser) {
parseSep(pn, ps, '[')
parseSpacesAndNewlines(pn, ps)
loneAmpersand := false
items:
for {
r := ps.peek()
switch {
case r == '&':
ps.next()
hasMapPair := startsCompound(ps.peek(), LHSExpr)
if !hasMapPair {
loneAmpersand = true
addSep(pn, ps)
parseSpacesAndNewlines(pn, ps)
break items
}
ps.backup()
ps.parse(&MapPair{}).addTo(&pn.MapPairs, pn)
case startsCompound(r, NormalExpr):
ps.parse(&Compound{}).addTo(&pn.Elements, pn)
default:
break items
}
parseSpacesAndNewlines(pn, ps)
}
if !parseSep(pn, ps, ']') {
ps.error(errShouldBeRBracket)
}
if parseSep(pn, ps, '{') {
pn.LegacyLambda = true
pn.lambda(ps)
} else {
if loneAmpersand || len(pn.MapPairs) > 0 {
if len(pn.Elements) > 0 {
// TODO(xiaq): Add correct position information.
ps.error(errBothElementsAndPairs)
}
pn.Type = Map
} else {
pn.Type = List
}
}
}
// lambda parses a lambda expression. The opening brace has been seen.
func (pn *Primary) lambda(ps *parser) {
pn.Type = Lambda
if !pn.LegacyLambda {
parseSpacesAndNewlines(pn, ps)
if parseSep(pn, ps, '|') {
parseSpacesAndNewlines(pn, ps)
items:
for {
r := ps.peek()
switch {
case r == '&':
ps.parse(&MapPair{}).addTo(&pn.MapPairs, pn)
case startsCompound(r, NormalExpr):
ps.parse(&Compound{}).addTo(&pn.Elements, pn)
default:
break items
}
parseSpacesAndNewlines(pn, ps)
}
if !parseSep(pn, ps, '|') {
ps.error(errShouldBePipe)
}
}
}
ps.parse(&Chunk{}).addAs(&pn.Chunk, pn)
if !parseSep(pn, ps, '}') {
ps.error(errShouldBeRBrace)
}
}
// Braced = '{' Compound { BracedSep Compounds } '}'
// BracedSep = { Space | '\n' } [ ',' ] { Space | '\n' }
func (pn *Primary) lbrace(ps *parser) {
parseSep(pn, ps, '{')
if r := ps.peek(); r == ';' || r == '\r' || r == '\n' || r == '|' || IsInlineWhitespace(r) {
pn.lambda(ps)
return
}
pn.Type = Braced
// TODO(xiaq): The compound can be empty, which allows us to parse {,foo}.
// Allowing compounds to be empty can be fragile in other cases.
ps.parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&pn.Braced, pn)
for isBracedSep(ps.peek()) {
parseSpacesAndNewlines(pn, ps)
// optional, so ignore the return value
parseSep(pn, ps, ',')
parseSpacesAndNewlines(pn, ps)
ps.parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&pn.Braced, pn)
}
if !parseSep(pn, ps, '}') {
ps.error(errShouldBeBraceSepOrRBracket)
}
}
func isBracedSep(r rune) bool {
return r == ',' || IsWhitespace(r)
}
func (pn *Primary) bareword(ps *parser) {
pn.Type = Bareword
defer func() { pn.Value = ps.src[pn.From:ps.pos] }()
for allowedInBareword(ps.peek(), pn.ExprCtx) {
ps.next()
}
}
// allowedInBareword returns where a rune is allowed in barewords in the given
// expression context. The special strictExpr context queries whether the rune
// is allowed in all contexts.
//
// The following are allowed in barewords:
//
// * Anything allowed in variable names
// * The symbols "./\@%+!"
// * The symbol "=", if ctx != lhsExpr && ctx != strictExpr
// * The symbol ",", if ctx != bracedExpr && ctx != strictExpr
// * The symbols "<>*^", if ctx = commandExpr
//
// The seemingly weird inclusion of \ is for easier path manipulation in
// Windows.
func allowedInBareword(r rune, ctx ExprCtx) bool {
return allowedInVariableName(r) || r == '.' || r == '/' ||
r == '\\' || r == '@' || r == '%' || r == '+' || r == '!' ||
(ctx != LHSExpr && ctx != strictExpr && r == '=') ||
(ctx != BracedElemExpr && ctx != strictExpr && r == ',') ||
(ctx == CmdExpr && (r == '<' || r == '>' || r == '*' || r == '^'))
}
func startsPrimary(r rune, ctx ExprCtx) bool {
return r == '\'' || r == '"' || r == '$' || allowedInBareword(r, ctx) ||
r == '?' || r == '*' || r == '(' || r == '[' || r == '{'
}
// MapPair = '&' { Space } Compound { Space } Compound
type MapPair struct {
node
Key, Value *Compound
}
func (mpn *MapPair) parse(ps *parser) {
parseSep(mpn, ps, '&')
ps.parse(&Compound{ExprCtx: LHSExpr}).addAs(&mpn.Key, mpn)
if len(mpn.Key.Indexings) == 0 {
ps.error(errShouldBeCompound)
}
if parseSep(mpn, ps, '=') {
parseSpacesAndNewlines(mpn, ps)
// Parse value part. It can be empty.
ps.parse(&Compound{}).addAs(&mpn.Value, mpn)
}
}
// Sep is the catch-all node type for leaf nodes that lack internal structures
// and semantics, and serve solely for syntactic purposes. The parsing of
// separators depend on the Parent node; as such it lacks a genuine parse
// method.
type Sep struct {
node
}
// NewSep makes a new Sep.
func NewSep(src string, begin, end int) *Sep {
return &Sep{node: node{diag.Ranging{From: begin, To: end}, src[begin:end], nil, nil}}
}
func (*Sep) parse(*parser) {
// A no-op, only to satisfy the Node interface.
}
func addSep(n Node, ps *parser) {
var begin int
ch := Children(n)
if len(ch) > 0 {
begin = ch[len(ch)-1].Range().To
} else {
begin = n.Range().From
}
if begin < ps.pos {
addChild(n, NewSep(ps.src, begin, ps.pos))
}
}
func parseSep(n Node, ps *parser, sep rune) bool {
if ps.peek() == sep {
ps.next()
addSep(n, ps)
return true
}
return false
}
func parseSpaces(n Node, ps *parser) {
parseSpacesInner(n, ps, false)
}
func parseSpacesAndNewlines(n Node, ps *parser) {
parseSpacesInner(n, ps, true)
}
func parseSpacesInner(n Node, ps *parser, newlines bool) {
spaces:
for {
r := ps.peek()
switch {
case IsInlineWhitespace(r):
ps.next()
case newlines && IsWhitespace(r):
ps.next()
case r == '#':
// Comment is like inline whitespace as long as we don't include the
// trailing newline.
ps.next()
for {
r := ps.peek()
if r == eof || r == '\r' || r == '\n' {
break
}
ps.next()
}
case r == '^':
// Line continuation is like inline whitespace.
ps.next()
switch ps.peek() {
case '\r':
ps.next()
if ps.peek() == '\n' {
ps.next()
}
case '\n':
ps.next()
case eof:
ps.error(errShouldBeNewline)
default:
ps.backup()
break spaces
}
default:
break spaces
}
}
addSep(n, ps)
}
// IsInlineWhitespace reports whether r is an inline whitespace character.
// Currently this includes space (Unicode 0x20) and tab (Unicode 0x9).
func IsInlineWhitespace(r rune) bool {
return r == ' ' || r == '\t'
}
// IsWhitespace reports whether r is a whitespace. Currently this includes
// inline whitespace characters and newline (Unicode 0xa).
func IsWhitespace(r rune) bool {
return IsInlineWhitespace(r) || r == '\r' || r == '\n'
}
func addChild(p Node, ch Node) {
p.n().addChild(ch)
ch.n().parent = p
}