From 383a7f2696ca969f69216645dd08eb864ecf218f Mon Sep 17 00:00:00 2001 From: Qi Xiao Date: Mon, 26 Sep 2022 01:07:51 +0100 Subject: [PATCH] pkg/md: Support HTML entities. --- pkg/md/md.go | 14 ++++++++++++-- pkg/md/md_test.go | 30 +++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/pkg/md/md.go b/pkg/md/md.go index 2781eeb6..41ebf5a6 100644 --- a/pkg/md/md.go +++ b/pkg/md/md.go @@ -15,6 +15,7 @@ package md import ( "fmt" + "html" "regexp" "strings" "unicode" @@ -130,6 +131,7 @@ var isASCIIPunct = map[byte]bool{ } var ( + entityRegexp = regexp.MustCompile(`^&(?:[a-zA-Z0-9]+|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});`) openTagRegexp = regexp.MustCompile(fmt.Sprintf(`^<`+ `[a-zA-Z][a-zA-Z0-9-]*`+ // tag name (`(?:`+ @@ -348,6 +350,14 @@ func (p *inlineParser) render() { } } parseText() + case '&': + entity := entityRegexp.FindString(p.text[begin:]) + if entity != "" { + p.buf.push(piece{text: p.syntax.escape(html.UnescapeString(entity))}) + p.pos = begin + len(entity) + } else { + parseText() + } case '\\': if p.pos < len(p.text) && isASCIIPunct[p.text[p.pos]] { begin++ @@ -552,7 +562,7 @@ func (p *linkTailParser) parse() (n int, dest, title string) { if p.pos == len(p.text) || p.text[p.pos] != ')' { return -1, "", "" } - return p.pos + 1, destBuilder.String(), titleBuilder.String() + return p.pos + 1, html.UnescapeString(destBuilder.String()), html.UnescapeString(titleBuilder.String()) } func (p *linkTailParser) skipWhitespaces() { @@ -590,7 +600,7 @@ func isASCIIControl(b byte) bool { func isMeta(b byte) bool { switch b { - case '[', ']', '*', '_', '`', '\\', '!', '<', '\n': + case '!', '[', ']', '*', '_', '`', '\\', '&', '<', '\n': return true default: return false diff --git a/pkg/md/md_test.go b/pkg/md/md_test.go index c16a2eb5..f62beb7c 100644 --- a/pkg/md/md_test.go +++ b/pkg/md/md_test.go @@ -26,11 +26,12 @@ func init() { var ( escapeHTML = strings.NewReplacer( - `"`, """, "<", "<", ">", ">").Replace + "&", "&", `"`, """, "<", "<", ">", ">").Replace escapeDest = strings.NewReplacer( `"`, "%22", `\`, "%5C", " ", "%20", "`", "%60", "[", "%5B", "]", "%5D", - "ä", "%C3%A4", " ", "%C2%A0").Replace + "ö", "%C3%B6", + "ä", "%C3%A4", " ", "%C2%A0").Replace ) var htmlSyntax = outSyntax{ @@ -67,17 +68,18 @@ func TestConvertInline(t *testing.T) { if strings.HasPrefix(tc.Markdown, "#") { t.Skipf("Header not supported") } + if strings.HasPrefix(tc.Markdown, "```") || strings.HasPrefix(tc.Markdown, " ") { + t.Skipf("Code block not supported") + } if strings.Contains(tc.Markdown, "\n\n") { t.Skipf("Multiple blocks not supported") } - if strings.Contains(tc.HTML, "&") { - t.Skipf("Ampersand escape not implemented correctly yet") - } - want := strings.TrimSuffix(strings.TrimPrefix(tc.HTML, "

"), "

\n") + "\n" - got := renderInline(tc.Markdown, htmlSyntax) + want := strings.TrimSuffix(strings.TrimPrefix( + strings.TrimRight(tc.HTML, "\n"), "

"), "

") + got := strings.TrimRight(renderInline(tc.Markdown, htmlSyntax), "\n") if want != got { - t.Errorf("input:\n%swant:\n%sgot:\n%s", tc.Markdown, want, got) + t.Errorf("input:\n%swant:\n%s\ngot:\n%s", tc.Markdown, want, got) } }) } @@ -85,7 +87,17 @@ func TestConvertInline(t *testing.T) { func supportedSection(section string) bool { switch section { - case "Inlines", "Code spans", "Emphasis and strong emphasis", "Links", "Autolinks", "Images", "Raw HTML", "Hard line breaks", "Soft line breaks", "Textual content": + case "Entity and numeric character references", + "Inlines", + "Code spans", + "Emphasis and strong emphasis", + "Links", + "Autolinks", + "Images", + "Raw HTML", + "Hard line breaks", + "Soft line breaks", + "Textual content": return true default: return false