elvish/pkg/mods/str/str.go
2021-10-23 13:19:14 +01:00

622 lines
13 KiB
Go

// Package str exposes functionality from Go's strings package as an Elvish
// module.
package str
import (
"bytes"
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"src.elv.sh/pkg/eval"
"src.elv.sh/pkg/eval/errs"
"src.elv.sh/pkg/eval/vals"
)
//elvdoc:fn compare
//
// ```elvish
// str:compare $a $b
// ```
//
// Compares two strings and output an integer that will be 0 if a == b,
// -1 if a < b, and +1 if a > b.
//
// ```elvish-transcript
// ~> str:compare a a
// ▶ 0
// ~> str:compare a b
// ▶ -1
// ~> str:compare b a
// ▶ 1
// ```
//elvdoc:fn contains
//
// ```elvish
// str:contains $str $substr
// ```
//
// Outputs whether `$str` contains `$substr` as a substring.
//
// ```elvish-transcript
// ~> str:contains abcd x
// ▶ $false
// ~> str:contains abcd bc
// ▶ $true
// ```
//elvdoc:fn contains-any
//
// ```elvish
// str:contains-any $str $chars
// ```
//
// Outputs whether `$str` contains any Unicode code points in `$chars`.
//
// ```elvish-transcript
// ~> str:contains-any abcd x
// ▶ $false
// ~> str:contains-any abcd xby
// ▶ $true
// ```
//elvdoc:fn count
//
// ```elvish
// str:count $str $substr
// ```
//
// Outputs the number of non-overlapping instances of `$substr` in `$s`.
// If `$substr` is an empty string, output 1 + the number of Unicode code
// points in `$s`.
//
// ```elvish-transcript
// ~> str:count abcdefabcdef bc
// ▶ 2
// ~> str:count abcdef ''
// ▶ 7
// ```
//elvdoc:fn equal-fold
//
// ```elvish
// str:equal-fold $str1 $str2
// ```
//
// Outputs if `$str1` and `$str2`, interpreted as UTF-8 strings, are equal
// under Unicode case-folding.
//
// ```elvish-transcript
// ~> str:equal-fold ABC abc
// ▶ $true
// ~> str:equal-fold abc ab
// ▶ $false
// ```
//elvdoc:fn from-codepoints
//
// ```elvish
// str:from-codepoints $number...
// ```
//
// Outputs a string consisting of the given Unicode codepoints. Example:
//
// ```elvish-transcript
// ~> str:from-codepoints 0x61
// ▶ a
// ~> str:from-codepoints 0x4f60 0x597d
// ▶ 你好
// ```
//
// @cf str:to-codepoints
func fromCodepoints(nums ...int) (string, error) {
var b bytes.Buffer
for _, num := range nums {
if num < 0 || num > unicode.MaxRune {
return "", errs.OutOfRange{
What: "codepoint",
ValidLow: "0", ValidHigh: strconv.Itoa(unicode.MaxRune),
Actual: hex(num),
}
}
if !utf8.ValidRune(rune(num)) {
return "", errs.BadValue{
What: "argument to str:from-codepoints",
Valid: "valid Unicode codepoint",
Actual: hex(num),
}
}
b.WriteRune(rune(num))
}
return b.String(), nil
}
func hex(i int) string {
if i < 0 {
return "-0x" + strconv.FormatInt(-int64(i), 16)
}
return "0x" + strconv.FormatInt(int64(i), 16)
}
//elvdoc:fn from-utf8-bytes
//
// ```elvish
// str:from-utf8-bytes $number...
// ```
//
// Outputs a string consisting of the given Unicode bytes. Example:
//
// ```elvish-transcript
// ~> str:from-utf8-bytes 0x61
// ▶ a
// ~> str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd
// ▶ 你好
// ```
//
// @cf str:to-utf8-bytes
func fromUtf8Bytes(nums ...int) (string, error) {
var b bytes.Buffer
for _, num := range nums {
if num < 0 || num > 255 {
return "", errs.OutOfRange{
What: "byte",
ValidLow: "0", ValidHigh: "255",
Actual: strconv.Itoa(num)}
}
b.WriteByte(byte(num))
}
if !utf8.Valid(b.Bytes()) {
return "", errs.BadValue{
What: "arguments to str:from-utf8-bytes",
Valid: "valid UTF-8 sequence",
Actual: fmt.Sprint(b.Bytes())}
}
return b.String(), nil
}
//elvdoc:fn has-prefix
//
// ```elvish
// str:has-prefix $str $prefix
// ```
//
// Outputs if `$str` begins with `$prefix`.
//
// ```elvish-transcript
// ~> str:has-prefix abc ab
// ▶ $true
// ~> str:has-prefix abc bc
// ▶ $false
// ```
//elvdoc:fn has-suffix
//
// ```elvish
// str:has-suffix $str $suffix
// ```
//
// Outputs if `$str` ends with `$suffix`.
//
// ```elvish-transcript
// ~> str:has-suffix abc ab
// ▶ $false
// ~> str:has-suffix abc bc
// ▶ $true
// ```
//elvdoc:fn index
//
// ```elvish
// str:index $str $substr
// ```
//
// Outputs the index of the first instance of `$substr` in `$str`, or -1
// if `$substr` is not present in `$str`.
//
// ```elvish-transcript
// ~> str:index abcd cd
// ▶ 2
// ~> str:index abcd xyz
// ▶ -1
// ```
//elvdoc:fn index-any
//
// ```elvish
// str:index-any $str $chars
// ```
//
// Outputs the index of the first instance of any Unicode code point
// from `$chars` in `$str`, or -1 if no Unicode code point from `$chars` is
// present in `$str`.
//
// ```elvish-transcript
// ~> str:index-any "chicken" "aeiouy"
// ▶ 2
// ~> str:index-any l33t aeiouy
// ▶ -1
// ```
//elvdoc:fn join
//
// ```elvish
// str:join $sep $input-list?
// ```
//
// Joins inputs with `$sep`. Examples:
//
// ```elvish-transcript
// ~> put lorem ipsum | str:join ,
// ▶ lorem,ipsum
// ~> str:join , [lorem ipsum]
// ▶ lorem,ipsum
// ~> str:join '' [lorem ipsum]
// ▶ loremipsum
// ~> str:join '...' [lorem ipsum]
// ▶ lorem...ipsum
// ```
//
// Etymology: Various languages,
// [Python](https://docs.python.org/3.6/library/stdtypes.html#str.join).
//
// @cf str:split
func join(sep string, inputs eval.Inputs) (string, error) {
var buf bytes.Buffer
var errJoin error
first := true
inputs(func(v interface{}) {
if errJoin != nil {
return
}
if s, ok := v.(string); ok {
if first {
first = false
} else {
buf.WriteString(sep)
}
buf.WriteString(s)
} else {
errJoin = errs.BadValue{
What: "input to str:join", Valid: "string", Actual: vals.Kind(v)}
}
})
return buf.String(), errJoin
}
//elvdoc:fn last-index
//
// ```elvish
// str:last-index $str $substr
// ```
//
// Outputs the index of the last instance of `$substr` in `$str`,
// or -1 if `$substr` is not present in `$str`.
//
// ```elvish-transcript
// ~> str:last-index "elven speak elvish" elv
// ▶ 12
// ~> str:last-index "elven speak elvish" romulan
// ▶ -1
// ```
//elvdoc:fn replace
//
// ```elvish
// str:replace &max=-1 $old $repl $source
// ```
//
// Replaces all occurrences of `$old` with `$repl` in `$source`. If `$max` is
// non-negative, it determines the max number of substitutions.
//
// **Note**: This command does not support searching by regular expressions, `$old`
// is always interpreted as a plain string. Use [re:replace](re.html#replace) if
// you need to search by regex.
type maxOpt struct{ Max int }
func (o *maxOpt) SetDefaultOptions() { o.Max = -1 }
func replace(opts maxOpt, old, repl, s string) string {
return strings.Replace(s, old, repl, opts.Max)
}
//elvdoc:fn split
//
// ```elvish
// str:split &max=-1 $sep $string
// ```
//
// Splits `$string` by `$sep`. If `$sep` is an empty string, split it into
// codepoints.
//
// If the `&max` option is non-negative, stops after producing the maximum
// number of results.
//
// ```elvish-transcript
// ~> str:split , lorem,ipsum
// ▶ lorem
// ▶ ipsum
// ~> str:split '' 你好
// ▶ 你
// ▶ 好
// ~> str:split &max=2 ' ' 'a b c d'
// ▶ a
// ▶ 'b c d'
// ```
//
// **Note**: This command does not support splitting by regular expressions,
// `$sep` is always interpreted as a plain string. Use [re:split](re.html#split)
// if you need to split by regex.
//
// Etymology: Various languages, in particular
// [Python](https://docs.python.org/3.6/library/stdtypes.html#str.split).
//
// @cf str:join
func split(fm *eval.Frame, opts maxOpt, sep, s string) error {
out := fm.ValueOutput()
parts := strings.SplitN(s, sep, opts.Max)
for _, p := range parts {
err := out.Put(p)
if err != nil {
return err
}
}
return nil
}
//elvdoc:fn title
//
// ```elvish
// str:title $str
// ```
//
// Outputs `$str` with all Unicode letters that begin words mapped to their
// Unicode title case.
//
// ```elvish-transcript
// ~> str:title "her royal highness"
// ▶ Her Royal Highness
// ```
//elvdoc:fn to-codepoints
//
// ```elvish
// str:to-codepoints $string
// ```
//
// Outputs value of each codepoint in `$string`, in hexadecimal. Examples:
//
// ```elvish-transcript
// ~> str:to-codepoints a
// ▶ 0x61
// ~> str:to-codepoints 你好
// ▶ 0x4f60
// ▶ 0x597d
// ```
//
// The output format is subject to change.
//
// @cf from-codepoints
func toCodepoints(fm *eval.Frame, s string) error {
out := fm.ValueOutput()
for _, r := range s {
err := out.Put("0x" + strconv.FormatInt(int64(r), 16))
if err != nil {
return err
}
}
return nil
}
//elvdoc:fn to-lower
//
// ```elvish
// str:to-lower $str
// ```
//
// Outputs `$str` with all Unicode letters mapped to their lower-case
// equivalent.
//
// ```elvish-transcript
// ~> str:to-lower 'ABC!123'
// ▶ abc!123
// ```
//elvdoc:fn to-utf8-bytes
//
// ```elvish
// str:to-utf8-bytes $string
// ```
//
// Outputs value of each byte in `$string`, in hexadecimal. Examples:
//
// ```elvish-transcript
// ~> str:to-utf8-bytes a
// ▶ 0x61
// ~> str:to-utf8-bytes 你好
// ▶ 0xe4
// ▶ 0xbd
// ▶ 0xa0
// ▶ 0xe5
// ▶ 0xa5
// ▶ 0xbd
// ```
//
// The output format is subject to change.
//
// @cf from-utf8-bytes
func toUtf8Bytes(fm *eval.Frame, s string) error {
out := fm.ValueOutput()
for _, r := range []byte(s) {
err := out.Put("0x" + strconv.FormatInt(int64(r), 16))
if err != nil {
return err
}
}
return nil
}
//elvdoc:fn to-title
//
// ```elvish
// str:to-title $str
// ```
//
// Outputs `$str` with all Unicode letters mapped to their Unicode title case.
//
// ```elvish-transcript
// ~> str:to-title "her royal highness"
// ▶ HER ROYAL HIGHNESS
// ~> str:to-title "хлеб"
// ▶ ХЛЕБ
// ```
//elvdoc:fn to-upper
//
// ```elvish
// str:to-upper
// ```
//
// Outputs `$str` with all Unicode letters mapped to their upper-case
// equivalent.
//
// ```elvish-transcript
// ~> str:to-upper 'abc!123'
// ▶ ABC!123
// ```
//elvdoc:fn trim
//
// ```elvish
// str:trim $str $cutset
// ```
//
// Outputs `$str` with all leading and trailing Unicode code points contained
// in `$cutset` removed.
//
// ```elvish-transcript
// ~> str:trim "¡¡¡Hello, Elven!!!" "!¡"
// ▶ 'Hello, Elven'
// ```
//elvdoc:fn trim-left
//
// ```elvish
// str:trim-left $str $cutset
// ```
//
// Outputs `$str` with all leading Unicode code points contained in `$cutset`
// removed. To remove a prefix string use [`str:trim-prefix`](#strtrim-prefix).
//
// ```elvish-transcript
// ~> str:trim-left "¡¡¡Hello, Elven!!!" "!¡"
// ▶ 'Hello, Elven!!!'
// ```
//elvdoc:fn trim-prefix
//
// ```elvish
// str:trim-prefix $str $prefix
// ```
//
// Outputs `$str` minus the leading `$prefix` string. If `$str` doesn't begin
// with `$prefix`, `$str` is output unchanged.
//
// ```elvish-transcript
// ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hello, "
// ▶ Elven!!!
// ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hola, "
// ▶ '¡¡¡Hello, Elven!!!'
// ```
//elvdoc:fn trim-right
//
// ```elvish
// str:trim-right $str $cutset
// ```
//
// Outputs `$str` with all leading Unicode code points contained in `$cutset`
// removed. To remove a suffix string use [`str:trim-suffix`](#strtrim-suffix).
//
// ```elvish-transcript
// ~> str:trim-right "¡¡¡Hello, Elven!!!" "!¡"
// ▶ '¡¡¡Hello, Elven'
// ```
//elvdoc:fn trim-space
//
// ```elvish
// str:trim-space $str
// ```
//
// Outputs `$str` with all leading and trailing white space removed as defined
// by Unicode.
//
// ```elvish-transcript
// ~> str:trim-space " \t\n Hello, Elven \n\t\r\n"
// ▶ 'Hello, Elven'
// ```
//elvdoc:fn trim-suffix
//
// ```elvish
// str:trim-suffix $str $suffix
// ```
//
// Outputs `$str` minus the trailing `$suffix` string. If `$str` doesn't end
// with `$suffix`, `$str` is output unchanged.
//
// ```elvish-transcript
// ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Elven!!!"
// ▶ ¡¡¡Hello
// ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Klingons!!!"
// ▶ '¡¡¡Hello, Elven!!!'
// ```
var Ns = eval.NsBuilder{}.AddGoFns("str:", fns).Ns()
var fns = map[string]interface{}{
"compare": strings.Compare,
"contains": strings.Contains,
"contains-any": strings.ContainsAny,
"count": strings.Count,
"equal-fold": strings.EqualFold,
// TODO: Fields, FieldsFunc
"from-codepoints": fromCodepoints,
"from-utf8-bytes": fromUtf8Bytes,
"has-prefix": strings.HasPrefix,
"has-suffix": strings.HasSuffix,
"index": strings.Index,
"index-any": strings.IndexAny,
// TODO: IndexFunc
"join": join,
"last-index": strings.LastIndex,
// TODO: LastIndexFunc, Map, Repeat
"replace": replace,
"split": split,
// TODO: SplitAfter
"title": strings.Title,
"to-codepoints": toCodepoints,
"to-lower": strings.ToLower,
"to-title": strings.ToTitle,
"to-upper": strings.ToUpper,
"to-utf8-bytes": toUtf8Bytes,
// TODO: ToLowerSpecial, ToTitleSpecial, ToUpperSpecial
"trim": strings.Trim,
"trim-left": strings.TrimLeft,
"trim-right": strings.TrimRight,
// TODO: TrimLeft,Right}Func
"trim-space": strings.TrimSpace,
"trim-prefix": strings.TrimPrefix,
"trim-suffix": strings.TrimSuffix,
}