pkg/md: Parse multiple paragraphs.

This commit is contained in:
Qi Xiao 2022-10-01 21:57:56 +01:00
parent 9f6a55ce30
commit befab9b5f0
2 changed files with 81 additions and 17 deletions

View File

@ -40,25 +40,81 @@ type TagPair struct {
// Render parses markdown and renders it according to the output syntax.
func Render(text string, syntax OutputSyntax) string {
var sb strings.Builder
lines := lineSplitter{text, 0}
root := blockNode{}
for lines.more() {
line := lines.next()
root.contentBuilder.WriteString(line)
p := blockParser{
lines: lineSplitter{text, 0},
syntax: syntax,
blocks: []block{{typ: documentBlock}},
}
content := strings.Trim(strings.TrimSuffix(root.contentBuilder.String(), "\n"), " \t")
sb.WriteString(syntax.Paragraph.Start)
sb.WriteString(renderInline(content, syntax))
sb.WriteString(syntax.Paragraph.End)
sb.WriteByte('\n')
return sb.String()
p.render()
return p.sb.String()
}
type blockNode struct {
contentBuilder strings.Builder
type blockParser struct {
lines lineSplitter
syntax OutputSyntax
blocks []block
sb strings.Builder
}
func (p *blockParser) render() {
for p.lines.more() {
line := p.lines.next()
if line == "\n" {
switch p.leaf().typ {
case documentBlock:
// Nothing to do
case paragraphBlock:
p.pop()
}
} else {
switch p.leaf().typ {
case documentBlock:
p.push(paragraphBlock).text.WriteString(line)
case paragraphBlock:
p.leaf().text.WriteString(line)
}
}
}
for len(p.blocks) > 0 {
p.pop()
}
}
func (p *blockParser) push(typ blockType) *block {
switch typ {
case paragraphBlock:
p.sb.WriteString(p.syntax.Paragraph.Start)
}
p.blocks = append(p.blocks, block{typ: typ})
return p.leaf()
}
func (p *blockParser) leaf() *block { return &p.blocks[len(p.blocks)-1] }
func (p *blockParser) pop() {
leaf := p.leaf()
switch leaf.typ {
case paragraphBlock:
text := strings.Trim(strings.TrimSuffix(leaf.text.String(), "\n"), " \t")
p.sb.WriteString(renderInline(text, p.syntax))
p.sb.WriteString(p.syntax.Paragraph.End)
p.sb.WriteByte('\n')
}
p.blocks = p.blocks[:len(p.blocks)-1]
}
type block struct {
typ blockType
text strings.Builder
}
type blockType uint
const (
documentBlock blockType = iota
paragraphBlock
)
// Splits a string into lines, preserving the trailing newlines.
type lineSplitter struct {
text string

View File

@ -4,6 +4,7 @@ import (
_ "embed"
"encoding/json"
"fmt"
"regexp"
"strings"
"testing"
@ -58,6 +59,11 @@ var htmlSyntax = OutputSyntax{
Escape: escapeHTML,
}
var (
linkRef = regexp.MustCompile(`(^|\n)\[([^\\\[\]]|\\[\\\[\]])+\]:`)
listItem = regexp.MustCompile(`(^|\n)\* `)
)
func TestRender(t *testing.T) {
for _, tc := range spec {
t.Run(fmt.Sprintf("%s/%d", tc.Section, tc.Example), func(t *testing.T) {
@ -70,8 +76,11 @@ func TestRender(t *testing.T) {
if strings.HasPrefix(tc.Markdown, "```") || strings.HasPrefix(tc.Markdown, "~~~") || strings.HasPrefix(tc.Markdown, " ") {
t.Skipf("Code block not supported")
}
if strings.Contains(tc.Markdown, "\n\n") {
t.Skipf("Multiple blocks not supported")
if linkRef.MatchString(tc.Markdown) {
t.Skipf("Link reference not supported")
}
if listItem.MatchString(tc.Markdown) {
t.Skipf("List item not supported")
}
if strings.HasPrefix(tc.Markdown, "<a ") {
t.Skipf("HTML block not supported")
@ -96,7 +105,6 @@ func supportedSection(section string) bool {
"Fenced code blocks",
"HTML blocks",
"Link reference definitions",
"Paragraphs",
"Blank lines",
"Block quotes",
"List items",