Preliminary parsing of control structures.

This commit is contained in:
Qi Xiao 2016-02-14 15:28:14 +01:00
parent fe2b07e936
commit 0e981ee6de
7 changed files with 314 additions and 27 deletions

View File

@ -41,6 +41,19 @@ var styleForSep = map[string]string{
"}": "1", "}": "1",
"&": "1", "&": "1",
"if": "33",
"then": "33",
"elif": "33",
"else": "33",
"fi": "33",
"while": "33",
"do": "33",
"done": "33",
"for": "33",
"in": "33",
"begin": "33",
"end": "33",
} }
// Styles for semantic coloring. // Styles for semantic coloring.

View File

@ -194,6 +194,11 @@ func (cp *compiler) form(n *parse.Form) Op {
} }
} }
if n.Control != nil {
cp.errorf(n.Control.Begin(), "control structure not yet implemented")
return func(ec *EvalCtx) {}
}
headStr, ok := oneString(n.Head) headStr, ok := oneString(n.Head)
if ok { if ok {
compileForm, ok := builtinSpecials[headStr] compileForm, ok := builtinSpecials[headStr]

View File

@ -31,6 +31,11 @@ func (n *Form) addToAssignments(ch *Assignment) {
addChild(n, ch) addChild(n, ch)
} }
func (n *Form) setControl(ch *Control) {
n.Control = ch
addChild(n, ch)
}
func (n *Form) setHead(ch *Compound) { func (n *Form) setHead(ch *Compound) {
n.Head = ch n.Head = ch
addChild(n, ch) addChild(n, ch)
@ -82,6 +87,49 @@ func parseAssignment(ps *parser) *Assignment {
return n return n
} }
func (n *Control) setCondition(ch *Chunk) {
n.Condition = ch
addChild(n, ch)
}
func (n *Control) setIterator(ch *Primary) {
n.Iterator = ch
addChild(n, ch)
}
func (n *Control) setArray(ch *Array) {
n.Array = ch
addChild(n, ch)
}
func (n *Control) setBody(ch *Chunk) {
n.Body = ch
addChild(n, ch)
}
func (n *Control) addToElifConditions(ch *Chunk) {
n.ElifConditions = append(n.ElifConditions, ch)
addChild(n, ch)
}
func (n *Control) addToElifBodies(ch *Chunk) {
n.ElifBodies = append(n.ElifBodies, ch)
addChild(n, ch)
}
func (n *Control) setElseBody(ch *Chunk) {
n.ElseBody = ch
addChild(n, ch)
}
func parseControl(ps *parser, leader string) *Control {
n := &Control{node: node{begin: ps.pos}}
n.parse(ps, leader)
n.end = ps.pos
n.sourceText = ps.src[n.begin:n.end]
return n
}
func (n *ExitusRedir) setDest(ch *Compound) { func (n *ExitusRedir) setDest(ch *Compound) {
n.Dest = ch n.Dest = ch
addChild(n, ch) addChild(n, ch)

View File

@ -2,17 +2,18 @@
package parse package parse
//go:generate ./boilerplate.py //go:generate ./boilerplate.py
//go:generate stringer -type=PrimaryType,RedirMode -output=string.go //go:generate stringer -type=PrimaryType,RedirMode,ControlKinD -output=string.go
import ( import (
"bytes" "bytes"
"errors" "errors"
"fmt"
"unicode" "unicode"
) )
// Parse parses elvish source. // Parse parses elvish source.
func Parse(src string) (*Chunk, error) { func Parse(src string) (*Chunk, error) {
ps := &parser{src, 0, 0, []map[rune]int{{}}, nil} ps := &parser{src, 0, 0, []map[rune]int{{}}, 0, nil}
bn := parseChunk(ps) bn := parseChunk(ps)
if ps.pos != len(src) { if ps.pos != len(src) {
ps.error(errUnexpectedRune) ps.error(errUnexpectedRune)
@ -26,19 +27,27 @@ func Parse(src string) (*Chunk, error) {
// Errors. // Errors.
var ( var (
errUnexpectedRune = errors.New("unexpected rune") errUnexpectedRune = errors.New("unexpected rune")
errShouldBeForm = newError("", "form") errShouldBeForm = newError("", "form")
errDuplicateExitusRedir = newError("duplicate exitus redir") errDuplicateExitusRedir = newError("duplicate exitus redir")
errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'") errShouldBeThen = newError("", "then")
errShouldBeFD = newError("", "a composite term representing fd") errShouldBeElifOrElseOrFi = newError("", "elif", "else", "fi")
errShouldBeFilename = newError("", "a composite term representing filename") errShouldBeFi = newError("", "fi")
errShouldBeArray = newError("", "spaced") errShouldBeDo = newError("", "do")
errStringUnterminated = newError("string not terminated") errShouldBeDone = newError("", "done")
errInvalidEscape = newError("invalid escape sequence") errShouldBeIn = newError("", "in")
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit") errShouldBePipelineSep = newError("", "';'", "newline")
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit") errShouldBeEnd = newError("", "end")
errInvalidEscapeControl = newError("invalid control sequence", "a rune between @ (0x40) and _(0x5F)") errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
errShouldBePrimary = newError("", errShouldBeFD = newError("", "a composite term representing fd")
errShouldBeFilename = newError("", "a composite term representing filename")
errShouldBeArray = newError("", "spaced")
errStringUnterminated = newError("string not terminated")
errInvalidEscape = newError("invalid escape sequence")
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit")
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit")
errInvalidEscapeControl = newError("invalid control sequence", "a rune between @ (0x40) and _(0x5F)")
errShouldBePrimary = newError("",
"single-quoted string", "double-quoted string", "bareword") "single-quoted string", "double-quoted string", "bareword")
errShouldBeVariableName = newError("", "variable name") errShouldBeVariableName = newError("", "variable name")
errShouldBeRBracket = newError("", "']'") errShouldBeRBracket = newError("", "']'")
@ -60,6 +69,18 @@ type Chunk struct {
func (bn *Chunk) parse(ps *parser) { func (bn *Chunk) parse(ps *parser) {
bn.parseSeps(ps) bn.parseSeps(ps)
for startsPipeline(ps.peek()) { for startsPipeline(ps.peek()) {
leader, starter := findLeader(ps)
if leader != "" && !starter && ps.controls > 0 {
// We found a non-starting leader and there is a control block that
// has not been closed. Stop parsing this chunk. We don't check the
// validity of the leader; the checking is done where the control
// block is parsed (e.g. (*Form).parseIf).
break
}
// We have more chance to check for validity of the leader, but
// eventually it will be checked in (*Form).parse. So we don't check it
// here, for more uniform error reporting and recovery.
bn.addToPipelines(parsePipeline(ps)) bn.addToPipelines(parsePipeline(ps))
if bn.parseSeps(ps) == 0 { if bn.parseSeps(ps) == 0 {
break break
@ -126,10 +147,27 @@ func startsPipeline(r rune) bool {
return startsForm(r) return startsForm(r)
} }
// Form = { Space } { { Assignment } { Space } } Compound { Space } { ( Compound | MapPair | Redir | ExitusRedir ) { Space } } // findLeader look aheads a command leader. It returns the leader and whether
// it starts a control block.
func findLeader(ps *parser) (string, bool) {
switch leader := ps.findPossibleLeader(); leader {
case "if", "while", "for", "do", "begin":
// Starting leaders are always legal.
return leader, true
case "then", "elif", "else", "fi", "done", "end":
return leader, false
default:
// There is no leader.
return "", false
}
}
// Form = { Space } { { Assignment } { Space } }
// { Compound | Control } { Space } { ( Compound | MapPair | Redir | ExitusRedir ) { Space } }
type Form struct { type Form struct {
node node
Assignments []*Assignment Assignments []*Assignment
Control *Control
Head *Compound Head *Compound
Args []*Compound Args []*Compound
NamedArgs []*MapPair NamedArgs []*MapPair
@ -142,14 +180,24 @@ func (fn *Form) parse(ps *parser) {
for fn.tryAssignment(ps) { for fn.tryAssignment(ps) {
parseSpaces(fn, ps) parseSpaces(fn, ps)
} }
if !startsCompound(ps.peek()) { leader, starter := findLeader(ps)
if leader != "" {
// Parse Control.
if starter {
fn.setControl(parseControl(ps, leader))
} else {
ps.error(fmt.Errorf("bogus command leader %q ignored", leader))
}
} else if startsCompound(ps.peek()) {
// Parse Head.
fn.setHead(parseCompound(ps))
parseSpaces(fn, ps)
} else {
if len(fn.Assignments) > 0 { if len(fn.Assignments) > 0 {
return return
} }
ps.error(errShouldBeCompound) ps.error(errShouldBeCompound)
} }
fn.setHead(parseCompound(ps))
parseSpaces(fn, ps)
for { for {
r := ps.peek() r := ps.peek()
@ -203,6 +251,9 @@ func (fn *Form) tryAssignment(ps *parser) bool {
return true return true
} }
func (fn *Form) parseIf(ps *parser) {
}
func startsForm(r rune) bool { func startsForm(r rune) bool {
return isSpace(r) || startsCompound(r) return isSpace(r) || startsCompound(r)
} }
@ -225,6 +276,128 @@ func (an *Assignment) parse(ps *parser) {
an.setSrc(parseCompound(ps)) an.setSrc(parseCompound(ps))
} }
// Control = IfControl | WhileControl | ForControl | BeginControl
// IfControl = If Chunk Then Chunk { Elif Chunk Then Chunk } [ Else Chunk ] Fi
// WhileControl = While Chunk Do Chunk [ Else Chunk ] Done
// ForControl = For Primary In Array PipelineSep Do Chunk [ Else Chunk ] Done
// BeginControl = Begin Chunk Done
// If = "if" Space { Space }
// (Similiar for Then, Elif, Else, Fi, While, Do, Done, For, Begin, End)
type Control struct {
node
Kind ControlKind
Condition *Chunk // Valid for IfControl and WhileControl.
Iterator *Primary // Valid for ForControl.
Array *Array // Valid for ForControl.
Body *Chunk // Valid for all.
ElifConditions []*Chunk // Valid for IfControl.
ElifBodies []*Chunk // Valid for IfControl.
ElseBody *Chunk // Valid for IfControl, WhileControl and ForControl.
}
// ControlKind identifies which control structure a Control represents.
type ControlKind int
// Possible values of ControlKind.
const (
BadControl ControlKind = iota
IfControl
WhileControl
ForControl
BeginControl
)
func (ctrl *Control) parse(ps *parser, leader string) {
ps.advance(len(leader))
addSep(ctrl, ps)
ps.controls++
defer func() { ps.controls-- }()
consumeLeader := func() string {
leader, _ := findLeader(ps)
if len(leader) > 0 {
ps.advance(len(leader))
addSep(ctrl, ps)
}
return leader
}
doElseDone := func() {
if consumeLeader() != "do" {
ps.error(errShouldBeDo)
}
ctrl.setBody(parseChunk(ps))
if leader, _ := findLeader(ps); leader == "else" {
consumeLeader()
ctrl.setElseBody(parseChunk(ps))
}
if consumeLeader() != "done" {
ps.error(errShouldBeDone)
}
}
switch leader {
case "if":
ctrl.Kind = IfControl
ctrl.setCondition(parseChunk(ps))
if consumeLeader() != "then" {
ps.error(errShouldBeThen)
}
ctrl.setBody(parseChunk(ps))
Elifs:
for {
switch consumeLeader() {
case "fi":
break Elifs
case "elif":
ctrl.addToElifConditions(parseChunk(ps))
if consumeLeader() != "then" {
ps.error(errShouldBeThen)
}
ctrl.addToElifBodies(parseChunk(ps))
case "else":
ctrl.setElseBody(parseChunk(ps))
if consumeLeader() != "fi" {
ps.error(errShouldBeFi)
}
break Elifs
default:
ps.error(errShouldBeElifOrElseOrFi)
break Elifs
}
}
case "while":
ctrl.Kind = WhileControl
ctrl.setCondition(parseChunk(ps))
doElseDone()
case "for":
ctrl.Kind = ForControl
parseSpaces(ctrl, ps)
ctrl.setIterator(parsePrimary(ps))
parseSpaces(ctrl, ps)
if consumeLeader() != "in" {
ps.error(errShouldBeIn)
}
ctrl.setArray(parseArray(ps))
switch ps.peek() {
case '\n', ';':
ps.next()
default:
ps.error(errShouldBePipelineSep)
}
doElseDone()
case "begin":
ctrl.Kind = BeginControl
ctrl.setBody(parseChunk(ps))
if consumeLeader() != "end" {
ps.error(errShouldBeEnd)
}
default:
ps.error(fmt.Errorf("unknown leader %q; parser error", leader))
}
}
// ExitusRedir = '?' '>' { Space } Compound // ExitusRedir = '?' '>' { Space } Compound
type ExitusRedir struct { type ExitusRedir struct {
node node

View File

@ -31,6 +31,14 @@ var goodCases = []struct {
// Lots of unnecessary whitespaces // Lots of unnecessary whitespaces
{" ;\n\n ls \t ;\n", ast{"Chunk", fs{"Pipelines": []string{"ls \t "}}}}, {" ;\n\n ls \t ;\n", ast{"Chunk", fs{"Pipelines": []string{"ls \t "}}}},
// Control structures.
/*
{"if true; then echo then; else echo else; fi",
ast{"Chunk/Pipeline/Form", fs{
"Control": "233",
}}},
*/
// Form // Form
{"ls x y", ast{"Chunk/Pipeline/Form", fs{ {"ls x y", ast{"Chunk/Pipeline/Form", fs{
"Head": "ls", "Head": "ls",

View File

@ -13,11 +13,12 @@ import (
// //
// NOTE: The str member is assumed to be valid UF-8. // NOTE: The str member is assumed to be valid UF-8.
type parser struct { type parser struct {
src string src string
pos int pos int
overEOF int overEOF int
cutsets []map[rune]int cutsets []map[rune]int
errors *errutil.Errors controls int
errors *errutil.Errors
} }
const eof rune = -1 const eof rune = -1
@ -41,6 +42,26 @@ func (ps *parser) hasPrefix(prefix string) bool {
return strings.HasPrefix(ps.src[ps.pos:], prefix) return strings.HasPrefix(ps.src[ps.pos:], prefix)
} }
// findWord look aheads a series of runes in [a-z] followed by ' ', '\t' or
// '\n'. If the lookahead fails, it returns an empty string. It is useful for
// looking for command leaders.
func (ps *parser) findPossibleLeader() string {
rest := ps.src[ps.pos:]
i := strings.IndexFunc(rest, func(r rune) bool {
return r < 'a' || r > 'z'
})
if i == -1 {
// The whole rest is just one possible leader.
return rest
}
switch rest[i] {
case ' ', '\t', '\n':
return rest[:i]
default:
return ""
}
}
func (ps *parser) next() rune { func (ps *parser) next() rune {
if ps.pos == len(ps.src) { if ps.pos == len(ps.src) {
ps.overEOF++ ps.overEOF++
@ -63,6 +84,14 @@ func (ps *parser) backup() {
ps.pos -= s ps.pos -= s
} }
func (ps *parser) advance(c int) {
ps.pos += c
if ps.pos > len(ps.src) {
ps.overEOF = ps.pos - len(ps.src)
ps.pos = len(ps.src)
}
}
func (ps *parser) error(e error) { func (ps *parser) error(e error) {
if ps.errors == nil { if ps.errors == nil {
ps.errors = &errutil.Errors{} ps.errors = &errutil.Errors{}

View File

@ -1,4 +1,4 @@
// generated by stringer -type=PrimaryType,RedirMode -output=string.go; DO NOT EDIT // Code generated by "stringer -type=PrimaryType,RedirMode,ControlKind -output=string.go"; DO NOT EDIT
package parse package parse
@ -9,7 +9,7 @@ const _PrimaryType_name = "BadPrimaryBarewordSingleQuotedDoubleQuotedVariableWil
var _PrimaryType_index = [...]uint8{0, 10, 18, 30, 42, 50, 58, 63, 75, 88, 92, 98, 101, 107} var _PrimaryType_index = [...]uint8{0, 10, 18, 30, 42, 50, 58, 63, 75, 88, 92, 98, 101, 107}
func (i PrimaryType) String() string { func (i PrimaryType) String() string {
if i < 0 || i+1 >= PrimaryType(len(_PrimaryType_index)) { if i < 0 || i >= PrimaryType(len(_PrimaryType_index)-1) {
return fmt.Sprintf("PrimaryType(%d)", i) return fmt.Sprintf("PrimaryType(%d)", i)
} }
return _PrimaryType_name[_PrimaryType_index[i]:_PrimaryType_index[i+1]] return _PrimaryType_name[_PrimaryType_index[i]:_PrimaryType_index[i+1]]
@ -20,8 +20,19 @@ const _RedirMode_name = "BadRedirModeReadWriteReadWriteAppend"
var _RedirMode_index = [...]uint8{0, 12, 16, 21, 30, 36} var _RedirMode_index = [...]uint8{0, 12, 16, 21, 30, 36}
func (i RedirMode) String() string { func (i RedirMode) String() string {
if i < 0 || i+1 >= RedirMode(len(_RedirMode_index)) { if i < 0 || i >= RedirMode(len(_RedirMode_index)-1) {
return fmt.Sprintf("RedirMode(%d)", i) return fmt.Sprintf("RedirMode(%d)", i)
} }
return _RedirMode_name[_RedirMode_index[i]:_RedirMode_index[i+1]] return _RedirMode_name[_RedirMode_index[i]:_RedirMode_index[i+1]]
} }
const _ControlKind_name = "BadControlIfControlWhileControlForControlBeginControl"
var _ControlKind_index = [...]uint8{0, 10, 19, 31, 41, 53}
func (i ControlKind) String() string {
if i < 0 || i >= ControlKind(len(_ControlKind_index)-1) {
return fmt.Sprintf("ControlKind(%d)", i)
}
return _ControlKind_name[_ControlKind_index[i]:_ControlKind_index[i+1]]
}