Add from-terminated and to-terminated commands

This change makes feeding output to commands which handle NUL terminated
"lines" (e.g., `fzf -read0` or `xargs -0`) extremely fast compared to
using an explicit Elvish loop that does `print $val"\x00"`. Similarly for
handling input from commands that produce NUL terminated "lines" (e.g.,
`find . -print0`) compared to an Elvish loop using `read-upto "\x00"`.

Resolves #1070
Related #1053
This commit is contained in:
Kurtis Rader 2021-05-15 19:27:39 -07:00 committed by Qi Xiao
parent 9b733b14d8
commit 267e015bd6
5 changed files with 132 additions and 20 deletions

View File

@ -73,6 +73,10 @@ New features in the standard library:
- New options to the `edit:command-history` command: `&dedup`,
`&newest-first`, and `&cmd-only` ([#1053](https://b.elv.sh/1053)).
- New `from-terminated` and `to-terminated` commands to allow efficient
streaming of lines terminated by ASCII NUL or other line terminators
([#1070](https://b.elv.sh/1070)).
New features in the interactive editor:
- The editor now supports setting global bindings via `$edit:global-binding`.

View File

@ -2,6 +2,7 @@ package eval
import (
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
@ -43,13 +44,15 @@ func init() {
"only-values": onlyValues,
// Bytes to value
"slurp": slurp,
"from-lines": fromLines,
"from-json": fromJSON,
"slurp": slurp,
"from-lines": fromLines,
"from-json": fromJSON,
"from-terminated": fromTerminated,
// Value to bytes
"to-lines": toLines,
"to-json": toJSON,
"to-lines": toLines,
"to-json": toJSON,
"to-terminated": toTerminated,
// File and pipe
"fopen": fopen,
@ -94,14 +97,14 @@ func put(fm *Frame, args ...interface{}) {
//elvdoc:fn read-upto
//
// ```elvish
// read-upto $delim
// read-upto $terminator
// ```
//
// Reads byte input until `$delim` or end-of-file is encountered, and outputs
// the part of the input read as a string value. The output contains the
// trailing `$delim`, unless `read-upto` terminated at end-of-file.
// Reads byte input until `$terminator` or end-of-file is encountered. It outputs the part of the
// input read as a string value. The output contains the trailing `$terminator`, unless `read-upto`
// terminated at end-of-file.
//
// The `$delim` argument must be a single rune in the ASCII range.
// The `$terminator` must be a single ASCII character such as `"\x00"` (NUL).
//
// Examples:
//
@ -116,8 +119,8 @@ func put(fm *Frame, args ...interface{}) {
// ▶ foobar
// ```
func readUpto(fm *Frame, last string) (string, error) {
if len(last) != 1 {
func readUpto(fm *Frame, terminator string) (string, error) {
if len(terminator) != 1 || terminator[0] > 127 {
return "", ErrArgs
}
in := fm.InputFile()
@ -132,7 +135,7 @@ func readUpto(fm *Frame, last string) (string, error) {
return "", err
}
buf = append(buf, b[0])
if b[0] == last[0] {
if b[0] == terminator[0] {
break
}
}
@ -594,7 +597,7 @@ func slurp(fm *Frame) (string, error) {
// ▶ a
// ```
//
// @cf to-lines
// @cf from-terminated read-upto to-lines
func fromLines(fm *Frame) {
linesToChan(fm.InputFile(), fm.OutputChan())
@ -687,6 +690,37 @@ func fromJSONInterface(v interface{}) (interface{}, error) {
}
}
//elvdoc:fn from-terminated
//
// ```elvish
// from-terminated $terminator
// ```
//
// Splits byte input into lines at each `$terminator` character, and writes them to the value
// output. Value input is ignored.
//
// The `$terminator` must be a single ASCII character such as `"\x00"` (NUL).
//
// ```elvish-transcript
// ~> { echo a; echo b } | from-terminated "\x00"
// ▶ "a\nb\n"
// ~> print "a\000b" | from-terminated "\x00"
// ▶ a
// ▶ b
// ```
//
// @cf from-lines read-upto to-terminated
var ErrInvalidTerminator = errors.New("terminator must be a single ASCII char")
func fromTerminated(fm *Frame, terminator string) error {
if len(terminator) != 1 || terminator[0] > 127 {
return ErrInvalidTerminator
}
terminatedToChan(fm.InputFile(), fm.OutputChan(), terminator[0])
return nil
}
//elvdoc:fn to-lines
//
// ```elvish
@ -708,16 +742,48 @@ func fromJSONInterface(v interface{}) (interface{}, error) {
// a
// ```
//
// @cf from-lines
// @cf from-lines to-terminated
func toLines(fm *Frame, inputs Inputs) {
out := fm.OutputFile()
inputs(func(v interface{}) {
fmt.Fprintln(out, vals.ToString(v))
})
}
//elvdoc:fn to-terminated
//
// ```elvish
// to-terminated $terminator $input?
// ```
//
// Writes each value input to the byte output with the specified terminator character. Byte input is
// ignored. This behavior is useful, for example, when feeding output into a program that accepts
// NUL terminated lines to avoid ambiguities if the values contains newline characters.
//
// The `$terminator` must be a single ASCII character such as `"\x00"` (NUL).
//
// ```elvish-transcript
// ~> put a b | to-terminated "\x00" | cat -evt
// a^@b^@
// ~> to-terminated "\x00" [a b] | cat -evt
// a^@b^@
// ```
//
// @cf from-terminated to-lines
func toTerminated(fm *Frame, terminator string, inputs Inputs) error {
if len(terminator) != 1 || terminator[0] > 127 {
return ErrInvalidTerminator
}
out := fm.OutputFile()
inputs(func(v interface{}) {
fmt.Fprint(out, vals.ToString(v), terminator)
})
return nil
}
//elvdoc:fn to-json
//
// ```elvish

View File

@ -3,6 +3,7 @@ package eval_test
import (
"testing"
"src.elv.sh/pkg/eval"
. "src.elv.sh/pkg/eval/evaltest"
"src.elv.sh/pkg/eval/vals"
)
@ -87,6 +88,14 @@ func TestFromLines(t *testing.T) {
)
}
func TestFromTerminated(t *testing.T) {
Test(t,
That(`print "a\nb\x00\x00c\x00d" | from-terminated "\x00"`).Puts("a\nb", "", "c", "d"),
That(`print aXbXcXXd | from-terminated "X"`).Puts("a", "b", "c", "", "d"),
That(`from-terminated "xyz"`).Throws(eval.ErrInvalidTerminator),
)
}
func TestFromJson(t *testing.T) {
Test(t,
That(`echo '{"k": "v", "a": [1, 2]}' '"foo"' | from-json`).
@ -104,6 +113,14 @@ func TestToLines(t *testing.T) {
)
}
func TestToTerminated(t *testing.T) {
Test(t,
That(`put "l\norem" ipsum | to-terminated "\x00"`).Prints("l\norem\x00ipsum\x00"),
That(`to-terminated "X" [a b c]`).Prints("aXbXcX"),
That(`to-terminated "XYZ" [a b c]`).Throws(eval.ErrInvalidTerminator),
)
}
func TestToJson(t *testing.T) {
Test(t,
That(`put [&k=v &a=[1 2]] foo | to-json`).

View File

@ -152,6 +152,22 @@ func linesToChan(r io.Reader, ch chan<- interface{}) {
}
}
func terminatedToChan(r io.Reader, ch chan<- interface{}, terminator byte) {
filein := bufio.NewReader(r)
for {
line, err := filein.ReadString(terminator)
if line != "" {
ch <- strutil.ChopTerminator(line, terminator)
}
if err != nil {
if err != io.EOF {
logger.Println("error on reading:", err)
}
break
}
}
}
// fork returns a modified copy of ec. The ports are forked, and the name is
// changed to the given value. Other fields are copied shallowly.
func (fm *Frame) fork(name string) *Frame {

View File

@ -1,11 +1,20 @@
package strutil
// ChopLineEnding removes a line ending ("\r\n" or "\n") from the end of s. It
// returns itself if it doesn't end with a line ending.
// ChopLineEnding removes a line ending ("\r\n" or "\n") from the end of `s`. It returns `s` if it
// doesn't end with a line ending.
func ChopLineEnding(s string) string {
if len(s) >= 2 && s[len(s)-2:] == "\r\n" {
if len(s) >= 2 && s[len(s)-2:] == "\r\n" { // Windows line ending
return s[:len(s)-2]
} else if len(s) >= 1 && s[len(s)-1] == '\n' {
} else if len(s) >= 1 && s[len(s)-1] == '\n' { // UNIX line ending
return s[:len(s)-1]
}
return s
}
// ChopTerminator removes a specific `terminator` byte from the end of `s`. It returns `s` if it
// doesn't end with the specified terminator.
func ChopTerminator(s string, terminator byte) string {
if len(s) >= 1 && s[len(s)-1] == terminator {
return s[:len(s)-1]
}
return s