pkg/md: Add TTYCodec.

This commit is contained in:
Qi Xiao 2022-11-12 22:15:03 +00:00
parent d811c8dd37
commit 0af790d133
7 changed files with 649 additions and 51 deletions

View File

@ -64,7 +64,7 @@ type FmtCodec struct {
// last Op was OpBulletListEnd or OpOrderedListEnd. Used to alternate list
// punctuation when a list follows directly after another of the same type.
poppedListPunct rune
// Whether a new stanza was already started.
// Value of op.Type of the last Do call.
lastOpType OpType
}
@ -103,16 +103,8 @@ func (c *FmtCodec) Do(op Op) {
c.poppedListPunct = poppedListPunct
}()
switch op.Type {
case OpThematicBreak, OpHeading, OpCodeBlock, OpHTMLBlock, OpParagraph,
OpBlockquoteStart, OpBulletListStart, OpOrderedListStart:
if c.sb.Len() > 0 && c.lastOpType != OpBlockquoteStart && c.lastOpType != OpListItemStart {
c.writeLine("")
}
case OpListItemStart:
if c.sb.Len() > 0 && c.lastOpType != OpBulletListStart && c.lastOpType != OpOrderedListStart {
c.writeLine("")
}
if c.sb.Len() > 0 && needNewStanza(op.Type, c.lastOpType) {
c.writeLine("")
}
defer func() {
c.lastOpType = op.Type
@ -130,7 +122,7 @@ func (c *FmtCodec) Do(op Op) {
case OpHeading:
c.startLine()
c.write(strings.Repeat("#", op.Number) + " ")
c.doInlineContent(op.Content, true)
c.writeSegmentsATXHeading(c.buildSegments(op.Content))
if op.Info != "" {
c.write(" {" + op.Info + "}")
}
@ -169,7 +161,12 @@ func (c *FmtCodec) Do(op Op) {
}
case OpParagraph:
c.startLine()
c.doInlineContent(op.Content, false)
segs := c.buildSegments(op.Content)
if c.Width > 0 {
c.writeSegmentsParagraphReflow(segs, c.Width)
} else {
c.writeSegmentsParagraph(segs)
}
c.finishLine()
case OpBlockquoteStart:
c.containerStart = c.sb.Len()
@ -232,6 +229,21 @@ func (c *FmtCodec) Do(op Op) {
}
}
func needNewStanza(cur, last OpType) bool {
switch cur {
case OpThematicBreak, OpHeading, OpCodeBlock, OpHTMLBlock, OpParagraph,
OpBlockquoteStart, OpBulletListStart, OpOrderedListStart:
// Start of new block that does not coincide with the start of an outer
// block.
return last != OpBlockquoteStart && last != OpListItemStart
case OpListItemStart:
// A list item that is not the first in the list. The first item is
// already handled when OpBulletListStart or OpOrderedListStart is seen.
return last != OpBulletListStart && last != OpOrderedListStart
}
return false
}
func codeFences(info string, lines []string) (string, string) {
var fenceRune rune
var runLens map[int]bool
@ -290,17 +302,6 @@ const (
segLinkOrImageEnd
)
func (c *FmtCodec) doInlineContent(ops []InlineOp, atxHeading bool) {
segs := c.buildSegments(ops)
if atxHeading {
c.writeSegmentsATXHeading(segs)
} else if c.Width > 0 {
c.writeSegmentsParagraphReflow(segs, c.Width)
} else {
c.writeSegmentsParagraph(segs)
}
}
func (c *FmtCodec) buildSegments(ops []InlineOp) []segment {
var segs []segment
write := func(s string) {
@ -953,37 +954,38 @@ func escapeAmpersandBackslash(s, set string) string {
return sb.String()
}
func (c *FmtCodec) startLine() {
for _, container := range c.containers {
c.write(container.useMarker())
func (c *FmtCodec) startLine() { startLine(c, c.containers) }
func (c *FmtCodec) writeLine(s string) { writeLine(c, c.containers, s) }
func (c *FmtCodec) finishLine() { c.write("\n") }
func (c *FmtCodec) write(s string) { c.sb.WriteString(s) }
type writer interface{ write(string) }
func startLine(w writer, containers stack[*fmtContainer]) {
for _, container := range containers {
w.write(container.useMarker())
}
}
func (c *FmtCodec) finishLine() {
c.write("\n")
}
func (c *FmtCodec) writeLine(s string) {
func writeLine(w writer, containers stack[*fmtContainer], s string) {
if s == "" {
// When writing a blank line, trim trailing spaces from the markers.
//
// This duplicates startLine, but merges the markers for ease of
// trimming.
var markers strings.Builder
for _, container := range c.containers {
for _, container := range containers {
markers.WriteString(container.useMarker())
}
c.write(strings.TrimRight(markers.String(), " "))
c.finishLine()
w.write(strings.TrimRight(markers.String(), " "))
w.write("\n")
return
}
c.startLine()
c.write(s)
c.finishLine()
startLine(w, containers)
w.write(s)
w.write("\n")
}
func (c *FmtCodec) write(s string) { c.sb.WriteString(s) }
type fmtContainer struct {
typ fmtContainerType
punct rune // punctuation used to build the marker
@ -1002,7 +1004,7 @@ const (
func (ct *fmtContainer) useMarker() string {
m := ct.marker
if ct.typ != fmtBlockquote {
ct.marker = strings.Repeat(" ", len(m))
ct.marker = strings.Repeat(" ", wcwidth.Of(m))
}
return m
}

View File

@ -69,14 +69,15 @@ type inlineParser struct {
buf buffer
}
const emailLocalPuncts = ".!#$%&'*+/=?^_`{|}~-"
const (
scheme = `[a-zA-Z][a-zA-Z0-9+.-]{1,31}`
emailLocalPuncts = ".!#$%&'*+/=?^_`{|}~-"
)
var (
// https://spec.commonmark.org/0.30/#uri-autolink
uriAutolinkRegexp = regexp.MustCompile(`^<` +
`[a-zA-Z][a-zA-Z0-9+.-]{1,31}` + // scheme
`:[^\x00-\x19 <>]*` +
`>`)
uriAutolinkRegexp = regexp.MustCompile(
`^<` + scheme + `:[^\x00-\x19 <>]*` + `>`)
// https://spec.commonmark.org/0.30/#email-autolink
emailAutolinkRegexp = regexp.MustCompile(
`^<[a-zA-Z0-9` + emailLocalPuncts + `]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>`)

View File

@ -8,7 +8,9 @@
// - [FmtCodec] formats Markdown. This is used in [src.elv.sh/cmd/elvmdfmt],
// used for formatting Markdown files in the Elvish repo.
//
// Another Codec for rendering Markdown in the terminal will be added in future.
// - [TTYCodec] renders Markdown in the terminal. This will be used in a help
// system that can used directly from Elvish to render documentation of
// Elvish modules.
//
// # Why another Markdown implementation?
//

View File

@ -303,9 +303,9 @@ func concat[T any](a, b []T) []T {
return c
}
var (
hr = strings.Repeat("═", 40)
)
var hr = strings.Repeat("═", 40)
func hrFence(s string) string { return "\n" + hr + "\n" + s + "\n" + hr }
func dedent(text string) string {
lines := strings.Split(strings.TrimPrefix(text, "\n"), "\n")

282
pkg/md/tty.go Normal file
View File

@ -0,0 +1,282 @@
package md
import (
"fmt"
"regexp"
"strings"
"src.elv.sh/pkg/ui"
"src.elv.sh/pkg/wcwidth"
)
// TTYCodec renders Markdown in a terminal.
//
// The rendered text uses the following style:
//
// - Adjacent blocks are always separated with one blank line.
//
// - Thematic breaks are rendered as "────" (four U+2500 "box drawing light
// horizontal").
//
// - Headings are rendered like "# Heading" in bold, with the same number of
// hashes as in Markdown
//
// - Code blocks are indented two spaces, with syntax highlighting if the
// language is elvish or elvish-transcript.
//
// - HTML blocks are ignored.
//
// - Paragraphs are always reflowed to fit the given width.
//
// - Blockquotes start with "│ " (U+2502 "box drawing light vertical", then a
// space) on each line.
//
// - Bullet list items start with "• " (U+2022 "bullet", then a space) on the
// first line. Continuation lines are indented two spaces.
//
// - Ordered list items start with "X. " (where X is a number) on the first
// line. Continuation lines are indented three spaces.
//
// - Code spans are underlined.
//
// - Emphasis makes the text italic. (Some terminal emulators turn italic text
// into inverse text, which is not ideal but fine.)
//
// - Strong emphasis makes the text bold.
//
// - Links are rendered with their text content underlined. If the link is
// absolute (starts with scheme:), the destination is rendered like "
// (https://example.com)" after the text content.
//
// Relative links like "language.html#exactness" assumes HTML output, so
// the destination is not useful when reading in a terminal.
//
// The link description is ignored for now since Elvish's Markdown sources
// never use them.
//
// - Images are rendered like "Image: alt text (https://example.com/a.png)".
//
// - Autolinks have their text content rendered.
//
// - Raw HTML is mostly ignored, except that text between <kbd> and </kbd>
// becomes inverse video.
//
// - Hard line breaks are respected.
//
// The structure of the implementation closely mirrors [FmtCodec] in a lot of
// places, without the complexity of handling all edge cases correctly, but with
// the slight complexity of handling styles.
type TTYCodec struct {
Width int
buf ui.Text
// Current active container blocks. The punct field is not used; the
// TTYCodec uses fixed punctuations for each type.
containers stack[*fmtContainer]
// Value of op.Type of the last Do call.
lastOpType OpType
}
// Text returns the rendering result as a [ui.Text].
func (c *TTYCodec) Text() ui.Text { return c.buf }
// String returns the rendering result as a string with ANSI escape sequences.
func (c *TTYCodec) String() string { return c.buf.String() }
// Do processes an Op.
func (c *TTYCodec) Do(op Op) {
defer func() {
c.lastOpType = op.Type
}()
if len(c.buf) > 0 && op.Type != OpHTMLBlock && needNewStanza(op.Type, c.lastOpType) {
c.writeLine("")
}
switch op.Type {
case OpThematicBreak:
c.writeLine("────")
case OpHeading:
c.startLine()
c.writeStyled(ui.T(strings.Repeat("#", op.Number)+" ", ui.Bold))
c.doInlineContent(op.Content, true)
c.finishLine()
case OpCodeBlock:
// TODO: Highlight
for _, line := range op.Lines {
c.writeLine(" " + line)
}
case OpHTMLBlock:
// Do nothing
case OpParagraph:
c.startLine()
c.doInlineContent(op.Content, false)
c.finishLine()
case OpBlockquoteStart:
c.containers.push(&fmtContainer{typ: fmtBlockquote, marker: "│ "})
case OpBlockquoteEnd:
c.containers.pop()
case OpListItemStart:
if ct := c.containers.peek(); ct.typ == fmtBulletItem {
ct.marker = "• "
} else {
ct.marker = fmt.Sprintf("%d. ", ct.number)
}
case OpListItemEnd:
ct := c.containers.peek()
ct.marker = ""
ct.number++
case OpBulletListStart:
c.containers.push(&fmtContainer{typ: fmtBulletItem})
case OpBulletListEnd:
c.containers.pop()
case OpOrderedListStart:
c.containers.push(&fmtContainer{typ: fmtOrderedItem, number: op.Number})
case OpOrderedListEnd:
c.containers.pop()
}
}
var absoluteDest = regexp.MustCompile(`^` + scheme + `:`)
func (c *TTYCodec) doInlineContent(ops []InlineOp, heading bool) {
var stylings stack[ui.Styling]
if heading {
stylings.push(ui.Bold)
}
var (
write func(string)
hardLineBreak func()
)
if heading || c.Width == 0 {
write = func(s string) {
c.writeStyled(ui.T(s, stylings...))
}
// When writing heading, ignore hard line break.
//
// When writing paragraph without reflowing, a hard line break will be
// followed by an OpNewline, which will result in a line break.
hardLineBreak = func() {}
} else {
maxWidth := c.Width
for _, ct := range c.containers {
maxWidth -= wcwidth.Of(ct.marker)
}
// The reflowing algorithm below is very similar to
// [FmtCodec.writeSegmentsParagraphReflow], except that the step to
// build spans and the step to arrange spans on lines are combined, and
// the span is a ui.Text rather than a strings.Builder.
currentLineWidth := 0
var currentSpan ui.Text
var prefixSpace ui.Text
writeSpan := func(t ui.Text) {
if len(t) == 0 {
return
}
w := wcwidthOfText(t)
if currentLineWidth == 0 {
c.writeStyled(t)
currentLineWidth = w
} else if currentLineWidth+1+w <= maxWidth {
c.writeStyled(prefixSpace)
c.writeStyled(t)
currentLineWidth += w + 1
} else {
c.finishLine()
c.startLine()
c.writeStyled(t)
currentLineWidth = w
}
}
write = func(s string) {
parts := whitespaceRunRegexp.Split(s, -1)
currentSpan = append(currentSpan, ui.T(parts[0], stylings...)...)
if len(parts) > 1 {
writeSpan(currentSpan)
prefixSpace = ui.T(" ", stylings...)
for _, s := range parts[1 : len(parts)-1] {
writeSpan(ui.T(s, stylings...))
}
currentSpan = ui.T(parts[len(parts)-1], stylings...)
}
}
hardLineBreak = func() {
writeSpan(currentSpan)
currentSpan = nil
currentLineWidth = 0
c.finishLine()
c.startLine()
}
defer func() {
writeSpan(currentSpan)
}()
}
writeLinkDest := func(dest string) {
if absoluteDest.MatchString(dest) {
write(" (")
write(dest)
write(")")
}
}
for _, op := range ops {
switch op.Type {
case OpText:
write(op.Text)
case OpRawHTML:
switch op.Text {
case "<kbd>":
stylings.push(ui.Inverse)
case "</kbd>":
stylings.pop()
}
case OpNewLine:
if heading || c.Width > 0 {
write(" ")
} else {
c.finishLine()
c.startLine()
}
case OpCodeSpan:
stylings.push(ui.Underlined)
write(op.Text)
stylings.pop()
case OpEmphasisStart:
stylings.push(ui.Italic)
case OpEmphasisEnd:
stylings.pop()
case OpStrongEmphasisStart:
stylings.push(ui.Bold)
case OpStrongEmphasisEnd:
stylings.pop()
case OpLinkStart:
stylings.push(ui.Underlined)
case OpLinkEnd:
stylings.pop()
writeLinkDest(op.Dest)
case OpImage:
write("Image: ")
write(op.Alt)
writeLinkDest(op.Dest)
case OpAutolink:
write(op.Text)
case OpHardLineBreak:
hardLineBreak()
}
}
}
func wcwidthOfText(t ui.Text) int {
w := 0
for _, seg := range t {
w += wcwidth.Of(seg.Text)
}
return w
}
func (c *TTYCodec) startLine() { startLine(c, c.containers) }
func (c *TTYCodec) writeLine(s string) { writeLine(c, c.containers, s) }
func (c *TTYCodec) finishLine() { c.write("\n") }
func (c *TTYCodec) write(s string) { c.writeStyled(ui.T(s)) }
func (c *TTYCodec) writeStyled(t ui.Text) { c.buf = append(c.buf, t...) }

311
pkg/md/tty_test.go Normal file
View File

@ -0,0 +1,311 @@
package md_test
import (
"reflect"
"testing"
. "src.elv.sh/pkg/md"
"src.elv.sh/pkg/ui"
)
var stylesheet = ui.RuneStylesheet{
'/': ui.Italic, '#': ui.Bold, '^': ui.Inverse, '_': ui.Underlined,
}
var ttyTests = []struct {
name string
markdown string
width int
ttyRender ui.Text
}{
// Blocks
{
name: "thematic break",
markdown: "---",
ttyRender: ui.T("────\n"),
},
{
name: "heading",
markdown: dedent(`
# h1
## h2
content
`),
ttyRender: markLines(
"# h1", stylesheet,
"####",
"",
"## h2", stylesheet,
"#####",
"",
"content",
),
},
{
name: "code block",
markdown: dedent(`
Run this:
~~~
echo foo
~~~
`),
ttyRender: ui.T(dedent(`
Run this:
echo foo
`)),
},
{
name: "HTML block",
markdown: dedent(`
foo
<!-- comment -->
bar
`),
ttyRender: ui.T(dedent(`
foo
bar
`)),
},
{
name: "blockquote",
markdown: dedent(`
Quote:
> foo
>> lorem
>
> bar
`),
ttyRender: ui.T(dedent(`
Quote:
foo
lorem
bar
`)),
},
{
name: "bullet list",
markdown: dedent(`
List:
- one
more
- two
more
`),
ttyRender: ui.T(dedent(`
List:
one
more
two
more
`)),
},
{
name: "ordered list",
markdown: dedent(`
List:
1. one
more
1. two
more
`),
ttyRender: ui.T(dedent(`
List:
1. one
more
2. two
more
`)),
},
{
name: "nested blocks",
markdown: dedent(`
> foo
> - item
> 1. one
> 1. another
> - another item
`),
ttyRender: ui.T(dedent(`
foo
item
1. one
2. another
another item
`)),
},
// Inline
{
name: "text",
markdown: "foo bar",
ttyRender: ui.T("foo bar\n"),
},
{
name: "inline kbd tag",
markdown: "Press <kbd>Enter</kbd>.",
ttyRender: markLines(
"Press Enter.", stylesheet,
" ^^^^^ "),
},
{
name: "code span",
markdown: "Use `put`.",
ttyRender: markLines(
"Use put.", stylesheet,
" ___ "),
},
{
name: "emphasis",
markdown: "Try *this*.",
ttyRender: markLines(
"Try this.", stylesheet,
" //// "),
},
{
name: "strong emphasis",
markdown: "Try **that**.",
ttyRender: markLines(
"Try that.", stylesheet,
" #### "),
},
{
name: "link with absolute destination",
markdown: "Visit [example](https://example.com).",
ttyRender: markLines(
"Visit example (https://example.com).", stylesheet,
" _______ "),
},
{
name: "link with relative destination",
markdown: "See [section X](#x) and [page Y](y.html).",
ttyRender: markLines(
"See section X and page Y.", stylesheet,
" _________ ______ "),
},
{
name: "image",
markdown: "![Example logo](https://example.com/logo.png)",
ttyRender: ui.T("Image: Example logo (https://example.com/logo.png)\n"),
},
{
name: "autolink",
markdown: "Visit <https://example.com>.",
ttyRender: ui.T("Visit https://example.com.\n"),
},
{
name: "hard line break",
markdown: dedent(`
foo\
bar
`),
ttyRender: ui.T("foo\nbar\n"),
},
// Reflow
{
name: "reflow text",
markdown: "foo bar lorem ipsum",
width: 8,
ttyRender: ui.T(dedent(`
foo bar
lorem
ipsum
`)),
},
{
name: "styled text on the same line when reflowing",
markdown: "*foo bar* lorem ipsum",
width: 8,
ttyRender: markLines(
"foo bar", stylesheet,
"///////",
"lorem",
"ipsum"),
},
{
name: "styled text broken up when reflowing",
markdown: "foo bar *lorem ipsum*",
width: 8,
ttyRender: markLines(
"foo bar",
"lorem", stylesheet,
"/////",
"ipsum", stylesheet,
"/////"),
},
{
name: "multiple lines merged when reflowing",
markdown: dedent(`
foo
bar
`),
width: 8,
ttyRender: ui.T("foo bar\n"),
},
{
name: "hard line break when reflowing",
markdown: dedent(`
foo\
bar
`),
width: 8,
ttyRender: ui.T(dedent(`
foo
bar
`)),
},
}
func TestTTYCodec(t *testing.T) {
for _, tc := range ttyTests {
t.Run(tc.name, func(t *testing.T) {
codec := TTYCodec{Width: tc.width}
Render(tc.markdown, &codec)
got := ui.NormalizeText(codec.Text())
if !reflect.DeepEqual(got, tc.ttyRender) {
t.Errorf("markdown: %s\ngot: %s\nwant:%s",
hrFence(tc.markdown),
hrFence(got.VTString()), hrFence(tc.ttyRender.VTString()))
}
})
}
}
func markLines(args ...any) ui.Text {
// Add newlines to each line.
//
// TODO: Change ui.MarkLines to do this.
for i := 0; i < len(args); i++ {
switch arg := args[i].(type) {
case string:
args[i] = arg + "\n"
case ui.RuneStylesheet:
// Skip over the next argument
i++
}
}
return ui.NormalizeText(ui.MarkLines(args...))
}

View File

@ -18,7 +18,7 @@ type RuneStylesheet map[rune]Styling
// This function is mainly useful for constructing multi-line Text's with
// alignment across those lines. An example:
//
// var stylesheet = map[rune]string{
// var stylesheet = RuneStylesheet{
// '-': Reverse,
// 'x': Stylings(Blue, BgGreen),
// }