2016-02-24 20:00:11 +08:00
|
|
|
package parse
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"unicode"
|
2022-05-01 08:32:05 +08:00
|
|
|
"unicode/utf8"
|
2016-02-24 20:00:11 +08:00
|
|
|
)
|
|
|
|
|
2018-10-14 00:52:54 +08:00
|
|
|
// Quote returns a valid Elvish expression that evaluates to the given string.
|
|
|
|
// If s is a valid bareword, it is returned as is; otherwise it is quoted,
|
|
|
|
// preferring the use of single quotes.
|
2016-02-24 20:00:11 +08:00
|
|
|
func Quote(s string) string {
|
|
|
|
s, _ = QuoteAs(s, Bareword)
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2021-01-13 07:05:01 +08:00
|
|
|
// QuoteVariableName is like Quote, but quotes s if it contains any character
|
|
|
|
// that may not appear unquoted in variable names.
|
|
|
|
func QuoteVariableName(s string) string {
|
|
|
|
if s == "" {
|
|
|
|
return "''"
|
|
|
|
}
|
|
|
|
|
2021-05-19 03:32:13 +08:00
|
|
|
// Keep track of whether it is a valid (unquoted) variable name.
|
2021-01-13 07:05:01 +08:00
|
|
|
bare := true
|
2022-06-12 04:25:17 +08:00
|
|
|
for _, r := range s {
|
|
|
|
if r == unicode.ReplacementChar || !unicode.IsPrint(r) {
|
|
|
|
// Contains invalid UTF-8 sequence or unprintable character; force
|
|
|
|
// double quote.
|
2021-01-13 07:05:01 +08:00
|
|
|
return quoteDouble(s)
|
|
|
|
}
|
|
|
|
if !allowedInVariableName(r) {
|
|
|
|
bare = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if bare {
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
return quoteSingle(s)
|
|
|
|
}
|
|
|
|
|
2022-06-12 04:25:17 +08:00
|
|
|
// QuoteAs returns a representation of s in Elvish syntax, preferring the syntax
|
2018-10-14 00:52:54 +08:00
|
|
|
// specified by q, which must be one of Bareword, SingleQuoted, or DoubleQuoted.
|
|
|
|
// It returns the quoted string and the actual quoting.
|
2016-02-24 20:00:11 +08:00
|
|
|
func QuoteAs(s string, q PrimaryType) (string, PrimaryType) {
|
|
|
|
if q == DoubleQuoted {
|
|
|
|
// Everything can be quoted using double quotes, return directly.
|
|
|
|
return quoteDouble(s), DoubleQuoted
|
|
|
|
}
|
|
|
|
if s == "" {
|
|
|
|
return "''", SingleQuoted
|
|
|
|
}
|
|
|
|
|
|
|
|
// Keep track of whether it is a valid bareword.
|
|
|
|
bare := s[0] != '~'
|
2022-06-12 04:25:17 +08:00
|
|
|
for _, r := range s {
|
|
|
|
if r == unicode.ReplacementChar || !unicode.IsPrint(r) {
|
|
|
|
// Contains invalid UTF-8 sequence or unprintable character; force
|
|
|
|
// double quote.
|
2016-02-24 20:00:11 +08:00
|
|
|
return quoteDouble(s), DoubleQuoted
|
|
|
|
}
|
2018-01-01 03:12:36 +08:00
|
|
|
if !allowedInBareword(r, strictExpr) {
|
2016-02-24 20:00:11 +08:00
|
|
|
bare = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if q == Bareword && bare {
|
|
|
|
return s, Bareword
|
|
|
|
}
|
|
|
|
return quoteSingle(s), SingleQuoted
|
|
|
|
}
|
|
|
|
|
|
|
|
func quoteSingle(s string) string {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
buf.WriteByte('\'')
|
|
|
|
for _, r := range s {
|
|
|
|
buf.WriteRune(r)
|
|
|
|
if r == '\'' {
|
|
|
|
buf.WriteByte('\'')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buf.WriteByte('\'')
|
|
|
|
return buf.String()
|
|
|
|
}
|
|
|
|
|
2022-05-01 08:32:05 +08:00
|
|
|
// rtohex is optimized for the common cases encountered when encoding Elvish strings and should be
|
|
|
|
// more efficient than using fmt.Sprintf("%x").
|
2016-02-24 20:00:11 +08:00
|
|
|
func rtohex(r rune, w int) []byte {
|
|
|
|
bytes := make([]byte, w)
|
|
|
|
for i := w - 1; i >= 0; i-- {
|
|
|
|
d := byte(r % 16)
|
|
|
|
r /= 16
|
|
|
|
if d <= 9 {
|
|
|
|
bytes[i] = '0' + d
|
|
|
|
} else {
|
|
|
|
bytes[i] = 'a' + d - 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return bytes
|
|
|
|
}
|
|
|
|
|
|
|
|
func quoteDouble(s string) string {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
buf.WriteByte('"')
|
2022-06-12 04:25:17 +08:00
|
|
|
for s != "" {
|
2022-05-01 08:32:05 +08:00
|
|
|
r, w := utf8.DecodeRuneInString(s)
|
2022-06-12 04:25:17 +08:00
|
|
|
if r == utf8.RuneError && w == 1 {
|
2022-05-01 08:32:05 +08:00
|
|
|
// An invalid UTF-8 sequence was seen -- encode first byte as a hex literal.
|
|
|
|
buf.WriteByte('\\')
|
|
|
|
buf.WriteByte('x')
|
|
|
|
buf.Write(rtohex(rune(s[0]), 2))
|
2022-06-12 04:25:17 +08:00
|
|
|
} else if e, ok := doubleUnescape[r]; ok {
|
|
|
|
// This handles the escaping of " and \ too.
|
2016-02-24 20:00:11 +08:00
|
|
|
buf.WriteByte('\\')
|
2016-04-12 23:38:21 +08:00
|
|
|
buf.WriteRune(e)
|
2022-06-12 04:25:17 +08:00
|
|
|
} else if unicode.IsPrint(r) && r != utf8.RuneError {
|
|
|
|
// RuneError is technically printable, but don't print it directly
|
|
|
|
// to avoid confusion.
|
2022-05-01 08:32:05 +08:00
|
|
|
buf.WriteRune(r)
|
2022-06-12 04:25:17 +08:00
|
|
|
} else if r <= 0x7f {
|
|
|
|
// Unprintable characters in the ASCII range can be escaped with \x
|
|
|
|
// since they are one byte in UTF-8.
|
2022-05-01 08:32:05 +08:00
|
|
|
buf.WriteByte('\\')
|
|
|
|
buf.WriteByte('x')
|
|
|
|
buf.Write(rtohex(r, 2))
|
|
|
|
} else if r <= 0xffff {
|
2016-02-24 20:00:11 +08:00
|
|
|
buf.WriteByte('\\')
|
2022-05-01 08:32:05 +08:00
|
|
|
buf.WriteByte('u')
|
|
|
|
buf.Write(rtohex(r, 4))
|
2016-02-24 20:00:11 +08:00
|
|
|
} else {
|
2022-05-01 08:32:05 +08:00
|
|
|
buf.WriteByte('\\')
|
|
|
|
buf.WriteByte('U')
|
|
|
|
buf.Write(rtohex(r, 8))
|
2016-02-24 20:00:11 +08:00
|
|
|
}
|
2022-05-01 08:32:05 +08:00
|
|
|
s = s[w:]
|
2016-02-24 20:00:11 +08:00
|
|
|
}
|
|
|
|
buf.WriteByte('"')
|
|
|
|
return buf.String()
|
|
|
|
}
|