elvish/pkg/parse/quote.go
Qi Xiao ae9e6d1565 Fixup for #1531.
- Fix the handling of actual occurrences of U+FFFD.

- Fix an existing bug of QuoteVariableName("$\n").
2022-06-11 21:26:32 +01:00

141 lines
3.4 KiB
Go

package parse
import (
"bytes"
"unicode"
"unicode/utf8"
)
// Quote returns a valid Elvish expression that evaluates to the given string.
// If s is a valid bareword, it is returned as is; otherwise it is quoted,
// preferring the use of single quotes.
func Quote(s string) string {
s, _ = QuoteAs(s, Bareword)
return s
}
// QuoteVariableName is like Quote, but quotes s if it contains any character
// that may not appear unquoted in variable names.
func QuoteVariableName(s string) string {
if s == "" {
return "''"
}
// Keep track of whether it is a valid (unquoted) variable name.
bare := true
for _, r := range s {
if r == unicode.ReplacementChar || !unicode.IsPrint(r) {
// Contains invalid UTF-8 sequence or unprintable character; force
// double quote.
return quoteDouble(s)
}
if !allowedInVariableName(r) {
bare = false
}
}
if bare {
return s
}
return quoteSingle(s)
}
// QuoteAs returns a representation of s in Elvish syntax, preferring the syntax
// specified by q, which must be one of Bareword, SingleQuoted, or DoubleQuoted.
// It returns the quoted string and the actual quoting.
func QuoteAs(s string, q PrimaryType) (string, PrimaryType) {
if q == DoubleQuoted {
// Everything can be quoted using double quotes, return directly.
return quoteDouble(s), DoubleQuoted
}
if s == "" {
return "''", SingleQuoted
}
// Keep track of whether it is a valid bareword.
bare := s[0] != '~'
for _, r := range s {
if r == unicode.ReplacementChar || !unicode.IsPrint(r) {
// Contains invalid UTF-8 sequence or unprintable character; force
// double quote.
return quoteDouble(s), DoubleQuoted
}
if !allowedInBareword(r, strictExpr) {
bare = false
}
}
if q == Bareword && bare {
return s, Bareword
}
return quoteSingle(s), SingleQuoted
}
func quoteSingle(s string) string {
var buf bytes.Buffer
buf.WriteByte('\'')
for _, r := range s {
buf.WriteRune(r)
if r == '\'' {
buf.WriteByte('\'')
}
}
buf.WriteByte('\'')
return buf.String()
}
// rtohex is optimized for the common cases encountered when encoding Elvish strings and should be
// more efficient than using fmt.Sprintf("%x").
func rtohex(r rune, w int) []byte {
bytes := make([]byte, w)
for i := w - 1; i >= 0; i-- {
d := byte(r % 16)
r /= 16
if d <= 9 {
bytes[i] = '0' + d
} else {
bytes[i] = 'a' + d - 10
}
}
return bytes
}
func quoteDouble(s string) string {
var buf bytes.Buffer
buf.WriteByte('"')
for s != "" {
r, w := utf8.DecodeRuneInString(s)
if r == utf8.RuneError && w == 1 {
// An invalid UTF-8 sequence was seen -- encode first byte as a hex literal.
buf.WriteByte('\\')
buf.WriteByte('x')
buf.Write(rtohex(rune(s[0]), 2))
} else if e, ok := doubleUnescape[r]; ok {
// This handles the escaping of " and \ too.
buf.WriteByte('\\')
buf.WriteRune(e)
} else if unicode.IsPrint(r) && r != utf8.RuneError {
// RuneError is technically printable, but don't print it directly
// to avoid confusion.
buf.WriteRune(r)
} else if r <= 0x7f {
// Unprintable characters in the ASCII range can be escaped with \x
// since they are one byte in UTF-8.
buf.WriteByte('\\')
buf.WriteByte('x')
buf.Write(rtohex(r, 2))
} else if r <= 0xffff {
buf.WriteByte('\\')
buf.WriteByte('u')
buf.Write(rtohex(r, 4))
} else {
buf.WriteByte('\\')
buf.WriteByte('U')
buf.Write(rtohex(r, 8))
}
s = s[w:]
}
buf.WriteByte('"')
return buf.String()
}