elvish/pkg/glob/glob.go
2021-08-30 08:57:34 +01:00

280 lines
7.8 KiB
Go

// Package glob implements globbing for elvish.
package glob
import (
"os"
"runtime"
"unicode/utf8"
)
// TODO: On Windows, preserve the original path separator (/ or \) specified in
// the glob pattern.
// PathInfo keeps a path resulting from glob expansion and its FileInfo. The
// FileInfo is useful for efficiently determining if a given pathname satisfies
// a particular constraint without doing an extra stat.
type PathInfo struct {
// The generated path, consistent with the original glob pattern. It cannot
// be replaced by Info.Name(), which is just the final path component.
Path string
Info os.FileInfo
}
// Glob returns a list of file names satisfying the given pattern.
func Glob(p string, cb func(PathInfo) bool) bool {
return Parse(p).Glob(cb)
}
// Glob returns a list of file names satisfying the Pattern.
func (p Pattern) Glob(cb func(PathInfo) bool) bool {
segs := p.Segments
dir := ""
// TODO(xiaq): This is a hack solely for supporting globs that start with
// ~ (tilde) in the eval package.
if p.DirOverride != "" {
dir = p.DirOverride
}
if len(segs) > 0 && IsSlash(segs[0]) {
segs = segs[1:]
dir += "/"
} else if runtime.GOOS == "windows" && len(segs) > 1 && IsLiteral(segs[0]) && IsSlash(segs[1]) {
// TODO: Handle Windows UNC paths.
elem := segs[0].(Literal).Data
if isDrive(elem) {
segs = segs[2:]
dir = elem + "/"
}
}
return glob(segs, dir, cb)
}
// isLetter returns true if the byte is an ASCII letter.
func isLetter(chr byte) bool {
return ('a' <= chr && chr <= 'z') || ('A' <= chr && chr <= 'Z')
}
// isDrive returns true if the string looks like a Windows drive letter path prefix.
func isDrive(s string) bool {
return len(s) == 2 && s[1] == ':' && isLetter(s[0])
}
// glob finds all filenames matching the given Segments in the given dir, and
// calls the callback on all of them. If the callback returns false, globbing is
// interrupted, and glob returns false. Otherwise it returns true.
func glob(segs []Segment, dir string, cb func(PathInfo) bool) bool {
// Consume non-wildcard path elements simply by following the path. This may
// seem like an optimization, but is actually required for "." and ".." to
// be used as path elements, as they do not appear in the result of ReadDir.
for len(segs) > 1 && IsLiteral(segs[0]) && IsSlash(segs[1]) {
elem := segs[0].(Literal).Data
segs = segs[2:]
dir += elem + "/"
if info, err := os.Stat(dir); err != nil || !info.IsDir() {
return true
}
}
if len(segs) == 0 {
if info, err := os.Stat(dir); err == nil {
return cb(PathInfo{dir, info})
}
return true
} else if len(segs) == 1 && IsLiteral(segs[0]) {
path := dir + segs[0].(Literal).Data
if info, err := os.Stat(path); err == nil {
return cb(PathInfo{path, info})
}
return true
}
infos, err := readDir(dir)
if err != nil {
// TODO(xiaq): Silently drop the error.
return true
}
i := -1
// nexti moves i to the next index in segs that is either / or ** (in other
// words, something that matches /).
nexti := func() {
for i++; i < len(segs); i++ {
if IsSlash(segs[i]) || IsWild1(segs[i], StarStar) {
break
}
}
}
nexti()
// Enumerate the position of the first slash. In the presence of multiple
// **'s in the pattern, the first slash may be in any of those.
//
// For instance, in x**y**z, the first slash may be in the first ** or the
// second:
// 1) If it is in the first, then pattern is equivalent to x*/**y**z. We
// match directories with x* and recurse in each subdirectory with the
// pattern **y**z.
// 2) If it is the in the second, we know that since the first ** can no
// longer contain any slashes, we treat it as * (this is done in
// matchElement). The pattern is now equivalent to x*y*/**z. We match
// directories with x*y* and recurse in each subdirectory with the
// pattern **z.
//
// The rules are:
// 1) For each **, we treat it as */** and all previous ones as *. We match
// subdirectories with the part before /, and recurse in subdirectories
// with the pattern after /.
// 2) If a literal / is encountered, we return after recursing in the
// subdirectories.
for i < len(segs) {
slash := IsSlash(segs[i])
var first, rest []Segment
if slash {
// segs = x/y. Match dir with x, recurse on y.
first, rest = segs[:i], segs[i+1:]
} else {
// segs = x**y. Match dir with x*, recurse on **y.
first, rest = segs[:i+1], segs[i:]
}
for _, info := range infos {
name := info.Name()
if matchElement(first, name) && info.IsDir() {
if !glob(rest, dir+name+"/", cb) {
return false
}
}
}
if slash {
// First slash cannot appear later than a slash in the pattern.
return true
}
nexti()
}
// If we reach here, it is possible to have no slashes at all. Simply match
// the entire pattern with all files.
for _, info := range infos {
name := info.Name()
if matchElement(segs, name) {
dirname := dir + name
info, err := os.Stat(dirname)
if err != nil {
return true
}
if !cb(PathInfo{dirname, info}) {
return false
}
}
}
return true
}
// readDir is just like os.ReadDir except that it treats an argument of "" as ".".
func readDir(dir string) ([]os.DirEntry, error) {
if dir == "" {
dir = "."
}
return os.ReadDir(dir)
}
// matchElement matches a path element against segments, which may not contain
// any Slash segments. It treats StarStar segments as they are Star segments.
func matchElement(segs []Segment, name string) bool {
if len(segs) == 0 {
return name == ""
}
// If the name start with "." and the first segment is a Wild, only match
// when MatchHidden is true.
if len(name) > 0 && name[0] == '.' && IsWild(segs[0]) && !segs[0].(Wild).MatchHidden {
return false
}
segs:
for len(segs) > 0 {
// Find a chunk. A chunk is an optional Star followed by a run of
// fixed-length segments (Literal and Question).
var i int
for i = 1; i < len(segs); i++ {
if IsWild2(segs[i], Star, StarStar) {
break
}
}
chunk := segs[:i]
startsWithStar := IsWild2(chunk[0], Star, StarStar)
var startingStar Wild
if startsWithStar {
startingStar = chunk[0].(Wild)
chunk = chunk[1:]
}
segs = segs[i:]
// TODO: Implement a quick path when len(segs) == 0 by matching
// backwards.
// Match at the current position. If this is the last chunk, we need to
// make sure name is exhausted by the matching.
ok, rest := matchFixedLength(chunk, name)
if ok && (rest == "" || len(segs) > 0) {
name = rest
continue
}
if startsWithStar {
// TODO: Optimize by stopping at len(name) - LB(# bytes segs can
// match) rather than len(names)
for i := 0; i < len(name); {
r, rsize := utf8.DecodeRuneInString(name[i:])
j := i + rsize
// Match name[:j] with the starting *, and the rest with chunk.
if !startingStar.Match(r) {
break
}
ok, rest := matchFixedLength(chunk, name[j:])
if ok && (rest == "" || len(segs) > 0) {
name = rest
continue segs
}
i = j
}
}
return false
}
return name == ""
}
// matchFixedLength returns whether a run of fixed-length segments (Literal and
// Question) matches a prefix of name. It returns whether the match is
// successful and if if it is, the remaining part of name.
func matchFixedLength(segs []Segment, name string) (bool, string) {
for _, seg := range segs {
if name == "" {
return false, ""
}
switch seg := seg.(type) {
case Literal:
n := len(seg.Data)
if len(name) < n || name[:n] != seg.Data {
return false, ""
}
name = name[n:]
case Wild:
if seg.Type == Question {
r, n := utf8.DecodeRuneInString(name)
if !seg.Match(r) {
return false, ""
}
name = name[n:]
} else {
panic("matchFixedLength given non-question wild segment")
}
default:
panic("matchFixedLength given non-literal non-wild segment")
}
}
return true, name
}