pkg/md: Fix more bugs discovered by fuzzing.

- When parsing starting markers, consider the rest of the line to be in a new paragraph after one marker is parsed. - FmtCodec: Escape info string in fenced code blocks. - FmtCodec: Use "_" as emphasis marker also after end of emphasis or strong emphasis.
2024-12-13 01:47:51 +08:00 · 2022-11-02 13:13:33 +00:00 · 2022-11-02 13:13:33 +00:00 · 1320f16b5f
commit 1320f16b5f
parent d1f09b47de
6 changed files with 95 additions and 23 deletions
--- a/pkg/md/fmt.go
+++ b/pkg/md/fmt.go
@ -192,9 +192,9 @@ func codeFences(info string, lines []string) (string, string) {
 	}
 	fence := strings.Repeat(string(fenceRune), l)
 	if fenceRune == '~' && strings.HasPrefix(info, "~") {
-		return fence + " " + info, fence
+		return fence + " " + escapeText(info), fence
 	}
-	return fence + info, fence
+	return fence + escapeText(info), fence
 }

 func identicalBulletMarkers(containers []*fmtContainer) bool {
@ -330,8 +330,15 @@ func (c *FmtCodec) doInlineContent(ops []InlineOp, atxHeading bool) {
 			c.write(delim)
 		case OpEmphasisStart:
 			marker := '*'
-			if i > 0 && ops[i-1].Type == OpEmphasisStart {
-				marker = pickPunct('*', '_', c.emphasisMarkers.peek())
+			if len(c.pieces) > 0 {
+				// Use "_" instead if this follows immediately after another
+				// OpEmphasisStart/End or OpStrongEmphasisStart/End that already
+				// uses "*". In all cases the marker is written as a standalone
+				// piece.
+				last := c.pieces[len(c.pieces)-1]
+				if last == "*" || last == "**" {
+					marker = '_'
+				}
 			}
 			c.emphasisMarkers.push(marker)
 			c.write(string(marker))
--- a/pkg/md/md.go
+++ b/pkg/md/md.go
@ -575,6 +575,11 @@ func (t *blockTree) parseStartingMarkers(line string, newParagraph bool) (string

 		line = line[len(marker):]
 		containers = append(containers, c)
+		// After parsing at least one starting marker, the rest of the line is
+		// in a new paragraph. This means that bullet list marker can be
+		// terminated by end of line or tab (instead of space), and ordered list
+		// marker with number != 1 are allowed.
+		newParagraph = true
 	}
 	return line, containers
 }
--- a/pkg/md/testdata/fuzz/FuzzFmtPreservesHTMLRender/722391ce85af71be128f8d7aa7b3620e9d4810458b8148c119256a93647fea3e
+++ b/pkg/md/testdata/fuzz/FuzzFmtPreservesHTMLRender/722391ce85af71be128f8d7aa7b3620e9d4810458b8148c119256a93647fea3e
@ -0,0 +1,2 @@
+go test fuzz v1
+string("00000000000000000000000000000000 _0000_*0*")
--- a/pkg/md/testdata/fuzz/FuzzFmtPreservesHTMLRender/85278dc7594f426d6b7b4412586b262b25229d360f6c5fd07cc57839c048abee
+++ b/pkg/md/testdata/fuzz/FuzzFmtPreservesHTMLRender/85278dc7594f426d6b7b4412586b262b25229d360f6c5fd07cc57839c048abee
@ -0,0 +1,2 @@
+go test fuzz v1
+string("0\n* 0) 00")
--- a/pkg/md/testdata/fuzz/FuzzFmtPreservesHTMLRender/d66d86f848073d5c90685009906e1fb0840748fd8d3a452ee8cb3e6d3f1b29e9
+++ b/pkg/md/testdata/fuzz/FuzzFmtPreservesHTMLRender/d66d86f848073d5c90685009906e1fb0840748fd8d3a452ee8cb3e6d3f1b29e9
@ -0,0 +1,2 @@
+go test fuzz v1
+string("~~~\\\\!")
--- a/pkg/md/testutils_test.go
+++ b/pkg/md/testutils_test.go
@ -21,7 +21,7 @@ type testCase struct {

 func (tc *testCase) testName() string {
 	if tc.Name != "" {
-		return tc.Name
+		return fmt.Sprintf("%s/%s", tc.Section, tc.Name)
 	}
 	return fmt.Sprintf("%s/Example %d", tc.Section, tc.Example)
 }
@ -66,7 +66,8 @@ var htmlTestCases = concat(specTestCases, supplementalHTMLTestCases)
 // https://github.com/commonmark/commonmark.js) is the most convenient.
 var supplementalHTMLTestCases = []testCase{
 	{
-		Name:     "Fenced code blocks supplemental/Empty line in list item",
+		Section:  "Fenced code blocks supplemental",
+		Name:     "Empty line in list item",
 		Markdown: "- ```\n  a\n\n  ```\n",
 		HTML: dedent(`
 			<ul>
@ -79,7 +80,8 @@ var supplementalHTMLTestCases = []testCase{
 			`),
 	},
 	{
-		Name: "HTML blocks supplemental/Closed by lack of blockquote marker",
+		Section: "HTML blocks supplemental",
+		Name:    "Closed by lack of blockquote marker",
 		Markdown: dedent(`
 			> <pre>

@ -93,7 +95,8 @@ var supplementalHTMLTestCases = []testCase{
 			`),
 	},
 	{
-		Name: "HTML blocks supplemental/Closed by insufficient list item indentation",
+		Section: "HTML blocks supplemental",
+		Name:    "Closed by insufficient list item indentation",
 		Markdown: dedent(`
 			- <pre>
 			 a
@ -108,7 +111,8 @@ var supplementalHTMLTestCases = []testCase{
 			`),
 	},
 	{
-		Name: "Blockquotes supplemental/Increasing level",
+		Section: "Blockquotes supplemental",
+		Name:    "Increasing level",
 		Markdown: dedent(`
 			> a
 			>> b
@ -123,7 +127,8 @@ var supplementalHTMLTestCases = []testCase{
 			`),
 	},
 	{
-		Name: "Blockquotes supplemental/Reducing level",
+		Section: "Blockquotes supplemental",
+		Name:    "Reducing level",
 		Markdown: dedent(`
 			>> a
 			>
@ -139,7 +144,8 @@ var supplementalHTMLTestCases = []testCase{
 			`),
 	},
 	{
-		Name: "List items supplemental/Two leading empty lines with spaces",
+		Section: "List items supplemental",
+		Name:    "Two leading empty lines with spaces",
 		Markdown: dedent(`
 			- 
 			  
@ -153,37 +159,80 @@ var supplementalHTMLTestCases = []testCase{
 			`),
 	},
 	{
-		Name:     "Emphasis and strong emphasis supplemental/Star after letter before punctuation does not start emphasis",
+		Section: "List supplemental",
+		Name:    "Two-level bullet list with no content interrupting paragraph",
+		Markdown: dedent(`
+			a
+			- -
+			`),
+		HTML: dedent(`
+			<p>a</p>
+			<ul>
+			<li>
+			<ul>
+			<li></li>
+			</ul>
+			</li>
+			</ul>
+			`),
+	},
+	{
+		Section: "List supplemental",
+		Name:    "Ordered list with non-1 start in bullet list interrupting paragraph",
+		Markdown: dedent(`
+			a
+			- 2.
+			`),
+		HTML: dedent(`
+			<p>a</p>
+			<ul>
+			<li>
+			<ol start="2">
+			<li></li>
+			</ol>
+			</li>
+			</ul>
+			`),
+	},
+	{
+		Section:  "Emphasis and strong emphasis supplemental",
+		Name:     "Star after letter before punctuation does not start emphasis",
 		Markdown: `a*$*`,
 		HTML:     `<p>a*$*</p>` + "\n",
 	},
 	{
-		Name:     "Links supplemental/Backslash and entity in destination",
+		Section:  "Links supplemental",
+		Name:     "Backslash and entity in destination",
 		Markdown: `[a](\&gt;)`,
 		HTML:     `<p><a href="&amp;gt;">a</a></p>` + "\n",
 	},
 	{
-		Name:     "Links supplemental/Backslash and entity in title",
+		Section:  "Links supplemental",
+		Name:     "Backslash and entity in title",
 		Markdown: `[a](b (\&gt;))`,
 		HTML:     `<p><a href="b" title="&amp;gt;">a</a></p>` + "\n",
 	},
 	{
-		Name:     "Links supplemental/Unmatched ( in destination, with title",
+		Section:  "Links supplemental",
+		Name:     "Unmatched ( in destination, with title",
 		Markdown: `[a](http://( "b")`,
 		HTML:     "<p>[a](http://( &quot;b&quot;)</p>\n",
 	},
 	{
-		Name:     "Links supplemental/Unescaped ( in title started with (",
+		Section:  "Links supplemental",
+		Name:     "Unescaped ( in title started with (",
 		Markdown: `[a](b (()))`,
 		HTML:     "<p>[a](b (()))</p>\n",
 	},
 	{
-		Name:     "Links supplemental/Literal & in destination",
+		Section:  "Links supplemental",
+		Name:     "Literal & in destination",
 		Markdown: `[a](http://b?c&d)`,
 		HTML:     `<p><a href="http://b?c&amp;d">a</a></p>` + "\n",
 	},
 	{
-		Name: "Image supplemental/Omit hard line break tag in alt",
+		Section: "Image supplemental",
+		Name:    "Omit hard line break tag in alt",
 		Markdown: dedent(`
 			![a\
 			b](c.png)
@ -200,29 +249,34 @@ var supplementalHTMLTestCases = []testCase{
 	// CommonMark.js is inconsistent here and does not escape the < and > in the
 	// alt attribute: https://github.com/commonmark/commonmark.js/issues/264
 	{
-		Name:     "Image supplemental/Keep raw HTML in alt",
+		Section:  "Image supplemental",
+		Name:     "Keep raw HTML in alt",
 		Markdown: "![a <a></a>](b.png)",
 		HTML:     `<p><img src="b.png" alt="a &lt;a&gt;&lt;/a&gt;" /></p>` + "\n",
 	},
 	// CommonMark.js has a bug and will not generate the expected output:
 	// https://github.com/commonmark/commonmark.js/issues/263
 	{
-		Name:     "Autolinks supplemental/Entity",
+		Section:  "Autolinks supplemental",
+		Name:     "Entity",
 		Markdown: `<http://&gt;>`,
 		HTML:     `<p><a href="http://%3E">http://&gt;</a></p>` + "\n",
 	},
 	{
-		Name:     "Raw HTML supplemental/unclosed <",
+		Section:  "Raw HTML supplemental",
+		Name:     "unclosed <",
 		Markdown: `a<`,
 		HTML:     "<p>a&lt;</p>\n",
 	},
 	{
-		Name:     "Raw HTML supplemental/unclosed <!--",
+		Section:  "Raw HTML supplemental",
+		Name:     "unclosed <!--",
 		Markdown: `a<!--`,
 		HTML:     "<p>a&lt;!--</p>\n",
 	},
 	{
-		Name:     "Soft line breaks supplemental/trailing spaces in last line",
+		Section:  "Soft line breaks supplemental",
+		Name:     "trailing spaces in last line",
 		Markdown: "a  \n",
 		HTML:     "<p>a</p>\n",
 	},