parse: Simplify parsing expressions in special contexts.

This commit is contained in:
Qi Xiao 2017-12-31 18:51:26 +00:00
parent 64fd1fc3cd
commit d8dc0b6cf2
6 changed files with 80 additions and 133 deletions

View File

@ -115,7 +115,7 @@ var primaryTests = []emitTests{
func TestPrimary(t *testing.T) {
test(t, "primary", primaryTests,
func(e *Emitter, ps *parse.Parser) {
e.primary(parse.ParsePrimary(ps, false))
e.primary(parse.ParsePrimary(ps, parse.NormalExpr))
})
}

View File

@ -207,9 +207,9 @@ func (n *Compound) addToIndexings(ch *Indexing) {
addChild(n, ch)
}
func ParseCompound(ps *Parser, head bool) *Compound {
func ParseCompound(ps *Parser, ctx ExprCtx) *Compound {
n := &Compound{node: node{begin: ps.pos}}
n.parse(ps, head)
n.parse(ps, ctx)
n.end = ps.pos
n.sourceText = ps.src[n.begin:n.end]
return n
@ -237,9 +237,9 @@ func (n *Indexing) addToIndicies(ch *Array) {
addChild(n, ch)
}
func ParseIndexing(ps *Parser, head bool) *Indexing {
func ParseIndexing(ps *Parser, ctx ExprCtx) *Indexing {
n := &Indexing{node: node{begin: ps.pos}}
n.parse(ps, head)
n.parse(ps, ctx)
n.end = ps.pos
n.sourceText = ps.src[n.begin:n.end]
return n
@ -302,9 +302,9 @@ func (n *Primary) addToBraced(ch *Compound) {
addChild(n, ch)
}
func ParsePrimary(ps *Parser, head bool) *Primary {
func ParsePrimary(ps *Parser, ctx ExprCtx) *Primary {
n := &Primary{node: node{begin: ps.pos}}
n.parse(ps, head)
n.parse(ps, ctx)
n.end = ps.pos
n.sourceText = ps.src[n.begin:n.end]
return n

View File

@ -150,7 +150,7 @@ func (fn *Form) parse(ps *Parser) {
}
// Parse head.
if !startsCompound(ps.peek(), true) {
if !startsCompound(ps.peek(), CmdExpr) {
if len(fn.Assignments) > 0 {
// Assignment-only form.
return
@ -158,7 +158,7 @@ func (fn *Form) parse(ps *Parser) {
// Bad form.
ps.error(fmt.Errorf("bad rune at form head: %q", ps.peek()))
}
fn.setHead(ParseCompound(ps, true))
fn.setHead(ParseCompound(ps, CmdExpr))
parseSpaces(fn, ps)
for {
@ -166,14 +166,14 @@ func (fn *Form) parse(ps *Parser) {
switch {
case r == '&':
ps.next()
hasMapPair := startsCompound(ps.peek(), false)
hasMapPair := startsCompound(ps.peek(), LHSExpr)
ps.backup()
if !hasMapPair {
// background indicator
return
}
fn.addToOpts(ParseMapPair(ps))
case startsCompound(r, false):
case startsCompound(r, NormalExpr):
if ps.hasPrefix("?>") {
if fn.ExitusRedir != nil {
ps.error(errDuplicateExitusRedir)
@ -184,7 +184,7 @@ func (fn *Form) parse(ps *Parser) {
}
continue
}
cn := ParseCompound(ps, false)
cn := ParseCompound(ps, NormalExpr)
if isRedirSign(ps.peek()) {
// Redir
fn.addToRedirs(ParseRedir(ps, cn))
@ -226,7 +226,7 @@ func (fn *Form) parse(ps *Parser) {
// assignment to fn.Assignments and returns true. Otherwise it rewinds the
// parser and returns false.
func (fn *Form) tryAssignment(ps *Parser) bool {
if !startsIndexing(ps.peek(), false) || ps.peek() == '=' {
if !startsIndexing(ps.peek(), LHSExpr) {
return false
}
@ -244,7 +244,7 @@ func (fn *Form) tryAssignment(ps *Parser) bool {
}
func startsForm(r rune) bool {
return IsSpace(r) || startsCompound(r, true)
return IsSpace(r) || startsCompound(r, CmdExpr)
}
// Assignment = Indexing '=' Compound
@ -255,18 +255,16 @@ type Assignment struct {
}
func (an *Assignment) parse(ps *Parser) {
ps.cut('=')
an.setLeft(ParseIndexing(ps, false))
an.setLeft(ParseIndexing(ps, LHSExpr))
head := an.Left.Head
if !checkVariableInAssignment(head, ps) {
ps.errorp(head.Begin(), head.End(), errShouldBeVariableName)
}
ps.uncut('=')
if !parseSep(an, ps, '=') {
ps.error(errShouldBeEqual)
}
an.setRight(ParseCompound(ps, false))
an.setRight(ParseCompound(ps, NormalExpr))
}
func checkVariableInAssignment(p *Primary, ps *Parser) bool {
@ -300,7 +298,7 @@ func (ern *ExitusRedir) parse(ps *Parser) {
ps.next()
addSep(ern, ps)
parseSpaces(ern, ps)
ern.setDest(ParseCompound(ps, false))
ern.setDest(ParseCompound(ps, NormalExpr))
}
// Redir = { Compound } { '<'|'>'|'<>'|'>>' } { Space } ( '&'? Compound )
@ -341,7 +339,7 @@ func (rn *Redir) parse(ps *Parser, dest *Compound) {
if parseSep(rn, ps, '&') {
rn.RightIsFd = true
}
rn.setRight(ParseCompound(ps, false))
rn.setRight(ParseCompound(ps, NormalExpr))
if len(rn.Right.Indexings) == 0 {
if rn.RightIsFd {
ps.error(errShouldBeFD)
@ -374,10 +372,30 @@ type Compound struct {
Indexings []*Indexing
}
func (cn *Compound) parse(ps *Parser, head bool) {
// ExprCtx represents special contexts of expression parsing.
type ExprCtx int
const (
// NormalExpr represents a normal expression, namely none of the special
// ones below.
NormalExpr ExprCtx = iota
// CmdExpr represents an expression used as the command in a form. In this
// context, unquoted <>*^ are treated as bareword characters.
CmdExpr
// LHSExpr represents an expression used as the left-hand-side in either
// assignments or map pairs. In this context, an unquoted = serves as an
// expression terminator and is thus not treated as a bareword character.
LHSExpr
// BracedElemExpr represents an expression used as an element in a braced
// expression. In this context, an unquoted , serves as an expression
// terminator and is thus not treated as a bareword character.
BracedElemExpr
)
func (cn *Compound) parse(ps *Parser, ctx ExprCtx) {
cn.tilde(ps)
for startsIndexing(ps.peek(), head) {
cn.addToIndexings(ParseIndexing(ps, head))
for startsIndexing(ps.peek(), ctx) {
cn.addToIndexings(ParseIndexing(ps, ctx))
}
}
@ -395,8 +413,8 @@ func (cn *Compound) tilde(ps *Parser) {
}
}
func startsCompound(r rune, head bool) bool {
return startsIndexing(r, head)
func startsCompound(r rune, ctx ExprCtx) bool {
return startsIndexing(r, ctx)
}
// Indexing = Primary { '[' Array ']' }
@ -406,16 +424,14 @@ type Indexing struct {
Indicies []*Array
}
func (in *Indexing) parse(ps *Parser, head bool) {
in.setHead(ParsePrimary(ps, head))
func (in *Indexing) parse(ps *Parser, ctx ExprCtx) {
in.setHead(ParsePrimary(ps, ctx))
for parseSep(in, ps, '[') {
if !startsArray(ps.peek()) {
ps.error(errShouldBeArray)
}
ps.pushCutset()
in.addToIndicies(ParseArray(ps, false))
ps.popCutset()
if !parseSep(in, ps, ']') {
ps.error(errShouldBeRBracket)
@ -424,8 +440,8 @@ func (in *Indexing) parse(ps *Parser, head bool) {
}
}
func startsIndexing(r rune, head bool) bool {
return startsPrimary(r, head)
func startsIndexing(r rune, ctx ExprCtx) bool {
return startsPrimary(r, ctx)
}
// Array = { Space | '\n' } { Compound { Space | '\n' } }
@ -450,8 +466,8 @@ func (sn *Array) parse(ps *Parser, allowSemicolon bool) {
}
parseSep()
for startsCompound(ps.peek(), false) {
sn.addToCompounds(ParseCompound(ps, false))
for startsCompound(ps.peek(), NormalExpr) {
sn.addToCompounds(ParseCompound(ps, NormalExpr))
parseSep()
}
}
@ -461,7 +477,7 @@ func IsSpace(r rune) bool {
}
func startsArray(r rune) bool {
return IsSpaceOrNewline(r) || startsIndexing(r, false)
return IsSpaceOrNewline(r) || startsIndexing(r, NormalExpr)
}
// Primary is the smallest expression unit.
@ -497,17 +513,17 @@ const (
Braced
)
func (pn *Primary) parse(ps *Parser, head bool) {
func (pn *Primary) parse(ps *Parser, ctx ExprCtx) {
r := ps.peek()
if !startsPrimary(r, head) {
if !startsPrimary(r, ctx) {
ps.error(errShouldBePrimary)
return
}
// Try bareword early, since it has precedence over wildcard on *
// when head is true.
if allowedInBareword(r, head) {
pn.bareword(ps, head)
// when ctx = commandExpr.
if allowedInBareword(r, ctx) {
pn.bareword(ps, ctx)
return
}
@ -711,9 +727,7 @@ func (pn *Primary) exitusCapture(ps *Parser) {
pn.Type = ExceptionCapture
ps.pushCutset()
pn.setChunk(ParseChunk(ps))
ps.popCutset()
if !parseSep(pn, ps, ')') {
ps.error(errShouldBeRParen)
@ -724,9 +738,7 @@ func (pn *Primary) outputCapture(ps *Parser) {
pn.Type = OutputCapture
parseSep(pn, ps, '(')
ps.pushCutset()
pn.setChunk(ParseChunk(ps))
ps.popCutset()
if !parseSep(pn, ps, ')') {
ps.error(errShouldBeRParen)
@ -743,14 +755,13 @@ func (pn *Primary) lbracket(ps *Parser) {
parseSpacesAndNewlines(pn, ps)
loneAmpersand := false
ps.pushCutset()
items:
for {
r := ps.peek()
switch {
case r == '&':
ps.next()
hasMapPair := startsCompound(ps.peek(), false)
hasMapPair := startsCompound(ps.peek(), LHSExpr)
if !hasMapPair {
loneAmpersand = true
addSep(pn, ps)
@ -759,14 +770,13 @@ items:
}
ps.backup()
pn.addToMapPairs(ParseMapPair(ps))
case startsCompound(r, false):
pn.addToElements(ParseCompound(ps, false))
case startsCompound(r, NormalExpr):
pn.addToElements(ParseCompound(ps, NormalExpr))
default:
break items
}
parseSpacesAndNewlines(pn, ps)
}
ps.popCutset()
if !parseSep(pn, ps, ']') {
ps.error(errShouldBeRBracket)
@ -788,9 +798,7 @@ items:
// lambda parses a lambda expression. The opening brace has been seen.
func (pn *Primary) lambda(ps *Parser) {
pn.Type = Lambda
ps.pushCutset()
pn.setChunk(ParseChunk(ps))
ps.popCutset()
if !parseSep(pn, ps, '}') {
ps.error(errShouldBeRBrace)
}
@ -808,14 +816,9 @@ func (pn *Primary) lbrace(ps *Parser) {
pn.Type = Braced
ps.pushCutset()
defer ps.popCutset()
// XXX: The compound can be empty, which allows us to parse {,foo}.
// Allowing compounds to be empty can be fragile in other cases.
ps.cut(',')
pn.addToBraced(ParseCompound(ps, false))
ps.uncut(',')
pn.addToBraced(ParseCompound(ps, BracedElemExpr))
for isBracedSep(ps.peek()) {
parseSpacesAndNewlines(pn, ps)
@ -823,9 +826,7 @@ func (pn *Primary) lbrace(ps *Parser) {
parseSep(pn, ps, ',')
parseSpacesAndNewlines(pn, ps)
ps.cut(',')
pn.addToBraced(ParseCompound(ps, false))
ps.uncut(',')
pn.addToBraced(ParseCompound(ps, BracedElemExpr))
}
if !parseSep(pn, ps, '}') {
ps.error(errShouldBeBraceSepOrRBracket)
@ -836,30 +837,33 @@ func isBracedSep(r rune) bool {
return r == ',' || IsSpaceOrNewline(r)
}
func (pn *Primary) bareword(ps *Parser, head bool) {
func (pn *Primary) bareword(ps *Parser, ctx ExprCtx) {
pn.Type = Bareword
defer func() { pn.Value = ps.src[pn.begin:ps.pos] }()
for allowedInBareword(ps.peek(), head) {
for allowedInBareword(ps.peek(), ctx) {
ps.next()
}
}
// The following are allowed in barewords:
// * Anything allowed in variable names
// * The symbols "%+,./=@!\"
// * The symbols "<>*^", if the bareword is in head
// * The symbols "./\@%+!"
// * The symbol "=", if ctx != lhsExpr
// * The symbol ",", if ctx != bracedExpr
// * The symbols "<>*^", if ctx = commandExpr
//
// The seemingly weird inclusion of \ is for easier path manipulation in
// Windows.
func allowedInBareword(r rune, head bool) bool {
return allowedInVariableName(r) ||
r == '%' || r == '+' || r == ',' || r == '.' ||
r == '/' || r == '=' || r == '@' || r == '!' || r == '\\' ||
(head && (r == '<' || r == '>' || r == '*' || r == '^'))
func allowedInBareword(r rune, ctx ExprCtx) bool {
return allowedInVariableName(r) || r == '.' || r == '/' || r == '\\' ||
r == '@' || r == '%' || r == '+' || r == '!' ||
(ctx != LHSExpr && r == '=') ||
(ctx != BracedElemExpr && r == ',') ||
(ctx == CmdExpr && (r == '<' || r == '>' || r == '*' || r == '^'))
}
func startsPrimary(r rune, head bool) bool {
return r == '\'' || r == '"' || r == '$' || allowedInBareword(r, head) ||
func startsPrimary(r rune, ctx ExprCtx) bool {
return r == '\'' || r == '"' || r == '$' || allowedInBareword(r, ctx) ||
r == '?' || r == '*' || r == '(' || r == '[' || r == '{'
}
@ -872,18 +876,15 @@ type MapPair struct {
func (mpn *MapPair) parse(ps *Parser) {
parseSep(mpn, ps, '&')
// Parse key part, cutting on '='.
ps.cut('=')
mpn.setKey(ParseCompound(ps, false))
mpn.setKey(ParseCompound(ps, LHSExpr))
if len(mpn.Key.Indexings) == 0 {
ps.error(errShouldBeCompound)
}
ps.uncut('=')
if parseSep(mpn, ps, '=') {
parseSpacesAndNewlines(mpn, ps)
// Parse value part.
mpn.setValue(ParseCompound(ps, false))
mpn.setValue(ParseCompound(ps, NormalExpr))
// The value part can be empty.
}
}

View File

@ -56,9 +56,6 @@ func (ps *Parser) peek() rune {
return eof
}
r, _ := utf8.DecodeRuneInString(ps.src[ps.pos:])
if ps.currentCutset()[r] > 0 {
return eof
}
return r
}
@ -66,34 +63,12 @@ func (ps *Parser) hasPrefix(prefix string) bool {
return strings.HasPrefix(ps.src[ps.pos:], prefix)
}
// findWord looks ahead for [a-z]* that is also a valid compound. If the
// lookahead fails, it returns an empty string. It is useful for looking for
// command leaders.
func (ps *Parser) findPossibleLeader() string {
rest := ps.src[ps.pos:]
i := strings.IndexFunc(rest, func(r rune) bool {
return r < 'a' || r > 'z'
})
if i == -1 {
// The whole rest is just one possible leader.
return rest
}
r, _ := utf8.DecodeRuneInString(rest[i:])
if startsPrimary(r, false) {
return ""
}
return rest[:i]
}
func (ps *Parser) next() rune {
if ps.pos == len(ps.src) {
ps.overEOF++
return eof
}
r, s := utf8.DecodeRuneInString(ps.src[ps.pos:])
if ps.currentCutset()[r] > 0 {
return eof
}
ps.pos += s
return r
}
@ -127,35 +102,6 @@ func (ps *Parser) error(e error) {
ps.errorp(ps.pos, end, e)
}
func (ps *Parser) pushCutset(rs ...rune) {
ps.cutsets = append(ps.cutsets, map[rune]int{})
ps.cut(rs...)
}
func (ps *Parser) popCutset() {
n := len(ps.cutsets)
ps.cutsets[n-1] = nil
ps.cutsets = ps.cutsets[:n-1]
}
func (ps *Parser) currentCutset() map[rune]int {
return ps.cutsets[len(ps.cutsets)-1]
}
func (ps *Parser) cut(rs ...rune) {
cutset := ps.currentCutset()
for _, r := range rs {
cutset[r]++
}
}
func (ps *Parser) uncut(rs ...rune) {
cutset := ps.currentCutset()
for _, r := range rs {
cutset[r]--
}
}
func newError(text string, shouldbe ...string) error {
if len(shouldbe) == 0 {
return errors.New(text)

View File

@ -39,7 +39,7 @@ func QuoteAs(s string, q PrimaryType) (string, PrimaryType) {
// Contains unprintable character; force double quote.
return quoteDouble(s), DoubleQuoted
}
if !allowedInBareword(r, false) {
if !allowedInBareword(r, NormalExpr) {
bare = false
}
}

View File

@ -2,7 +2,7 @@
package parse
import "fmt"
import "strconv"
const _PrimaryType_name = "BadPrimaryBarewordSingleQuotedDoubleQuotedVariableWildcardTildeExceptionCaptureOutputCaptureListLambdaMapBraced"
@ -10,7 +10,7 @@ var _PrimaryType_index = [...]uint8{0, 10, 18, 30, 42, 50, 58, 63, 79, 92, 96, 1
func (i PrimaryType) String() string {
if i < 0 || i >= PrimaryType(len(_PrimaryType_index)-1) {
return fmt.Sprintf("PrimaryType(%d)", i)
return "PrimaryType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _PrimaryType_name[_PrimaryType_index[i]:_PrimaryType_index[i+1]]
}
@ -21,7 +21,7 @@ var _RedirMode_index = [...]uint8{0, 12, 16, 21, 30, 36}
func (i RedirMode) String() string {
if i < 0 || i >= RedirMode(len(_RedirMode_index)-1) {
return fmt.Sprintf("RedirMode(%d)", i)
return "RedirMode(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _RedirMode_name[_RedirMode_index[i]:_RedirMode_index[i+1]]
}