mirror of
https://github.com/go-sylixos/elvish.git
synced 2024-12-05 03:17:50 +08:00
Add str:from-utf8-bytes, str:to-utf8-bytes and move builtin ord and chr to str module (#1081)
* pkg/eval/str: move builtin ord and chr to str Move builtin string function ord and chr to the str module and rename to to str:to-codepoints and str:from-codepoints respectively as suggested in #851. * pkg/eval/str: add from-utf8-bytes & to-utf8-bytes Add from-utf8-bytes and to-utf8-bytes functions to the str module. This functions differ from their *-codepoints in that they handle utf8 bytes instead of whole codepoints. Closes #851 * pkg/eval/str: range check for codepoint and bytes str:from-codepoints Add check if arguments codepoints are within valid unicode range, return an OutOfRange error otherwise. Return a BadValue error if the codepoint isn't valid. Add/change testcases. str:from-utf8-bytes Add check if byte arguments are within valid range, return an OutOfRange error otherwise. Return a BadValue error if the byte sequence isn't a valid UTF-8 sequence. Add/change testcases. Add additional test if piping from str:to-codepoints/str:to-utf8-bytes to str:from-codpoints/str:from-utf8-bytes returns the original input.
This commit is contained in:
parent
c42755892e
commit
1124c10b56
|
@ -1,2 +1,8 @@
|
|||
This is the draft release notes for 0.15.0, scheduled to be released on
|
||||
2021-01-01.
|
||||
|
||||
# Deprecated features
|
||||
|
||||
- The `chr` command is now deprecated. Use `str:from-codepoints` instead.
|
||||
|
||||
- The `ord` command is now deprecated. Use `str:to-codepoints` instead.
|
||||
|
|
|
@ -62,6 +62,8 @@ var ErrInputOfEawkMustBeString = errors.New("input of eawk must be string")
|
|||
// ord $string
|
||||
// ```
|
||||
//
|
||||
// This function is deprecated; use [str:to-codepoints](str.html#strto-codepoints) instead.
|
||||
//
|
||||
// Output value of each codepoint in `$string`, in hexadecimal. Examples:
|
||||
//
|
||||
// ```elvish-transcript
|
||||
|
@ -84,6 +86,8 @@ var ErrInputOfEawkMustBeString = errors.New("input of eawk must be string")
|
|||
// chr $number...
|
||||
// ```
|
||||
//
|
||||
// This function is deprecated; use [str:from-codepoints](str.html#strfrom-codepoints) instead.
|
||||
//
|
||||
// Outputs a string consisting of the given Unicode codepoints. Example:
|
||||
//
|
||||
// ```elvish-transcript
|
||||
|
|
|
@ -164,6 +164,10 @@ func (cp *compiler) checkDeprecatedBuiltin(name string, r diag.Ranger) {
|
|||
msg = `the "-time" command is deprecated; use "time" instead`
|
||||
case "^~":
|
||||
msg = `the "^" command is deprecated; use "math:pow" or "math:pow10" instead`
|
||||
case "ord":
|
||||
msg = `the "ord" command is deprecated; use "str:to-codepoints" instead`
|
||||
case "chr":
|
||||
msg = `the "chr" command is deprecated; use "str:from-codepoints" instead`
|
||||
default:
|
||||
return
|
||||
}
|
||||
|
|
|
@ -4,7 +4,11 @@ package str
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/elves/elvish/pkg/eval"
|
||||
"github.com/elves/elvish/pkg/eval/errs"
|
||||
|
@ -92,6 +96,80 @@ import (
|
|||
// ▶ $false
|
||||
// ```
|
||||
|
||||
//elvdoc:fn from-codepoints
|
||||
//
|
||||
// ```elvish
|
||||
// str:from-codepoints $number...
|
||||
// ```
|
||||
//
|
||||
// Outputs a string consisting of the given Unicode codepoints. Example:
|
||||
//
|
||||
// ```elvish-transcript
|
||||
// ~> str:from-codepoints 0x61
|
||||
// ▶ a
|
||||
// ~> str:from-codepoints 0x4f60 0x597d
|
||||
// ▶ 你好
|
||||
// ```
|
||||
//
|
||||
// @cf str:to-codepoints
|
||||
|
||||
func fromCodepoints(nums ...int) (string, error) {
|
||||
var b bytes.Buffer
|
||||
for _, num := range nums {
|
||||
if num < 0 || num > unicode.MaxRune {
|
||||
return "", errs.OutOfRange{
|
||||
What: "codepoint",
|
||||
ValidLow: 0, ValidHigh: unicode.MaxRune,
|
||||
Actual: strconv.Itoa(num)}
|
||||
}
|
||||
if !utf8.ValidRune(rune(num)) {
|
||||
return "", errs.BadValue{
|
||||
What: "argument to str:from-codepoints",
|
||||
Valid: "valid Unicode codepoint",
|
||||
Actual: "0x" + strconv.FormatInt(int64(num), 16)}
|
||||
}
|
||||
b.WriteRune(rune(num))
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
//elvdoc:fn from-utf8-bytes
|
||||
//
|
||||
// ```elvish
|
||||
// str:from-from-utf8-bytes $number...
|
||||
// ```
|
||||
//
|
||||
// Outputs a string consisting of the given Unicode bytes. Example:
|
||||
//
|
||||
// ```elvish-transcript
|
||||
// ~> str:from-utf8-bytes 0x61
|
||||
// ▶ a
|
||||
// ~> str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd
|
||||
// ▶ 你好
|
||||
// ```
|
||||
//
|
||||
// @cf str:to-utf8-bytes
|
||||
|
||||
func fromUtf8Bytes(nums ...int) (string, error) {
|
||||
var b bytes.Buffer
|
||||
for _, num := range nums {
|
||||
if num < 0 || num > 255 {
|
||||
return "", errs.OutOfRange{
|
||||
What: "byte",
|
||||
ValidLow: 0, ValidHigh: 255,
|
||||
Actual: strconv.Itoa(num)}
|
||||
}
|
||||
b.WriteByte(byte(num))
|
||||
}
|
||||
if !utf8.Valid(b.Bytes()) {
|
||||
return "", errs.BadValue{
|
||||
What: "arguments to str:from-utf8-bytes",
|
||||
Valid: "valid UTF-8 sequence",
|
||||
Actual: fmt.Sprint(b.Bytes())}
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
//elvdoc:fn has-prefix
|
||||
//
|
||||
// ```elvish
|
||||
|
@ -288,6 +366,33 @@ func split(fm *eval.Frame, opts maxOpt, sep, s string) {
|
|||
// ▶ Her Royal Highness
|
||||
// ```
|
||||
|
||||
//elvdoc:fn to-codepoints
|
||||
//
|
||||
// ```elvish
|
||||
// str:to-codepoints $string
|
||||
// ```
|
||||
//
|
||||
// Output value of each codepoint in `$string`, in hexadecimal. Examples:
|
||||
//
|
||||
// ```elvish-transcript
|
||||
// ~> str:to-codepoints a
|
||||
// ▶ 0x61
|
||||
// ~> str:to-codepoints 你好
|
||||
// ▶ 0x4f60
|
||||
// ▶ 0x597d
|
||||
// ```
|
||||
//
|
||||
// The output format is subject to change.
|
||||
//
|
||||
// @cf from-codepoints
|
||||
|
||||
func toCodepoints(fm *eval.Frame, s string) {
|
||||
out := fm.OutputChan()
|
||||
for _, r := range s {
|
||||
out <- "0x" + strconv.FormatInt(int64(r), 16)
|
||||
}
|
||||
}
|
||||
|
||||
//elvdoc:fn to-lower
|
||||
//
|
||||
// ```elvish
|
||||
|
@ -302,6 +407,37 @@ func split(fm *eval.Frame, opts maxOpt, sep, s string) {
|
|||
// ▶ abc!123
|
||||
// ```
|
||||
|
||||
//elvdoc:fn to-utf8-bytes
|
||||
//
|
||||
// ```elvish
|
||||
// str:to-utf8-bytes $string
|
||||
// ```
|
||||
//
|
||||
// Output value of each byte in `$string`, in hexadecimal. Examples:
|
||||
//
|
||||
// ```elvish-transcript
|
||||
// ~> str:to-utf8-bytes a
|
||||
// ▶ 0x61
|
||||
// ~> str:to-utf8-bytes 你好
|
||||
// ▶ 0xe4
|
||||
// ▶ 0xbd
|
||||
// ▶ 0xa0
|
||||
// ▶ 0xe5
|
||||
// ▶ 0xa5
|
||||
// ▶ 0xbd
|
||||
// ```
|
||||
//
|
||||
// The output format is subject to change.
|
||||
//
|
||||
// @cf from-utf8-bytes
|
||||
|
||||
func toUtf8Bytes(fm *eval.Frame, s string) {
|
||||
out := fm.OutputChan()
|
||||
for _, r := range []byte(s) {
|
||||
out <- "0x" + strconv.FormatInt(int64(r), 16)
|
||||
}
|
||||
}
|
||||
|
||||
//elvdoc:fn to-title
|
||||
//
|
||||
// ```elvish
|
||||
|
@ -428,10 +564,12 @@ var fns = map[string]interface{}{
|
|||
"count": strings.Count,
|
||||
"equal-fold": strings.EqualFold,
|
||||
// TODO: Fields, FieldsFunc
|
||||
"has-prefix": strings.HasPrefix,
|
||||
"has-suffix": strings.HasSuffix,
|
||||
"index": strings.Index,
|
||||
"index-any": strings.IndexAny,
|
||||
"from-codepoints": fromCodepoints,
|
||||
"from-utf8-bytes": fromUtf8Bytes,
|
||||
"has-prefix": strings.HasPrefix,
|
||||
"has-suffix": strings.HasSuffix,
|
||||
"index": strings.Index,
|
||||
"index-any": strings.IndexAny,
|
||||
// TODO: IndexFunc
|
||||
"join": join,
|
||||
"last-index": strings.LastIndex,
|
||||
|
@ -439,10 +577,12 @@ var fns = map[string]interface{}{
|
|||
"replace": replace,
|
||||
"split": split,
|
||||
// TODO: SplitAfter
|
||||
"title": strings.Title,
|
||||
"to-lower": strings.ToLower,
|
||||
"to-title": strings.ToTitle,
|
||||
"to-upper": strings.ToUpper,
|
||||
"title": strings.Title,
|
||||
"to-codepoints": toCodepoints,
|
||||
"to-lower": strings.ToLower,
|
||||
"to-title": strings.ToTitle,
|
||||
"to-upper": strings.ToUpper,
|
||||
"to-utf8-bytes": toUtf8Bytes,
|
||||
// TODO: ToLowerSpecial, ToTitleSpecial, ToUpperSpecial
|
||||
"trim": strings.Trim,
|
||||
"trim-left": strings.TrimLeft,
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
package str
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
"unicode"
|
||||
|
||||
"github.com/elves/elvish/pkg/eval"
|
||||
"github.com/elves/elvish/pkg/eval/errs"
|
||||
|
@ -31,6 +34,35 @@ func TestStr(t *testing.T) {
|
|||
That(`str:equal-fold abc ABC`).Puts(true),
|
||||
That(`str:equal-fold abc A`).Puts(false),
|
||||
|
||||
That(`str:from-codepoints 0x61`).Puts("a"),
|
||||
That(`str:from-codepoints 0x4f60 0x597d`).Puts("你好"),
|
||||
That(`str:from-codepoints -0x1`).ThrowsCause(errs.OutOfRange{
|
||||
What: "codepoint",
|
||||
ValidLow: 0, ValidHigh: unicode.MaxRune,
|
||||
Actual: strconv.Itoa(-1)}),
|
||||
That(fmt.Sprintf(`str:from-codepoints 0x%x`, unicode.MaxRune+1)).ThrowsCause(errs.OutOfRange{
|
||||
What: "codepoint",
|
||||
ValidLow: 0, ValidHigh: unicode.MaxRune,
|
||||
Actual: strconv.Itoa(unicode.MaxRune + 1)}),
|
||||
That(`str:from-codepoints 0xd800`).ThrowsCause(errs.BadValue{
|
||||
What: "argument to str:from-codepoints",
|
||||
Valid: "valid Unicode codepoint",
|
||||
Actual: "0xd800"}),
|
||||
That(`str:from-utf8-bytes 0x61`).Puts("a"),
|
||||
That(`str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd`).Puts("你好"),
|
||||
That(`str:from-utf8-bytes -1`).ThrowsCause(errs.OutOfRange{
|
||||
What: "byte",
|
||||
ValidLow: 0, ValidHigh: 255,
|
||||
Actual: strconv.Itoa(-1)}),
|
||||
That(`str:from-utf8-bytes 256`).ThrowsCause(errs.OutOfRange{
|
||||
What: "byte",
|
||||
ValidLow: 0, ValidHigh: 255,
|
||||
Actual: strconv.Itoa(256)}),
|
||||
That(`str:from-utf8-bytes 0xff 0x3 0xaa`).ThrowsCause(errs.BadValue{
|
||||
What: "arguments to str:from-utf8-bytes",
|
||||
Valid: "valid UTF-8 sequence",
|
||||
Actual: "[255 3 170]"}),
|
||||
|
||||
That(`str:has-prefix abc`).ThrowsAny(),
|
||||
That(`str:has-prefix abcd ab`).Puts(true),
|
||||
That(`str:has-prefix abcd cd`).Puts(false),
|
||||
|
@ -62,6 +94,14 @@ func TestStr(t *testing.T) {
|
|||
That(`str:split : /usr:/bin:/tmp`).Puts("/usr", "/bin", "/tmp"),
|
||||
That(`str:split : /usr:/bin:/tmp &max=2`).Puts("/usr", "/bin:/tmp"),
|
||||
|
||||
That(`str:to-codepoints a`).Puts("0x61"),
|
||||
That(`str:to-codepoints 你好`).Puts("0x4f60", "0x597d"),
|
||||
That(`str:to-codepoints 你好 | str:from-codepoints (all)`).Puts("你好"),
|
||||
|
||||
That(`str:to-utf8-bytes a`).Puts("0x61"),
|
||||
That(`str:to-utf8-bytes 你好`).Puts("0xe4", "0xbd", "0xa0", "0xe5", "0xa5", "0xbd"),
|
||||
That(`str:to-utf8-bytes 你好 | str:from-utf8-bytes (all)`).Puts("你好"),
|
||||
|
||||
That(`str:title abc`).Puts("Abc"),
|
||||
That(`str:title "abc def"`).Puts("Abc Def"),
|
||||
That(`str:to-lower abc def`).ThrowsAny(),
|
||||
|
|
Loading…
Reference in New Issue
Block a user