def test_emphasis_384(): """ Test case 384: (part 2) Intraword emphasis is disallowed for _: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """_пристаням_стремятся""" expected_tokens = [ "[para(1,1):]", "[text:_:]", "[text:пристаням:]", "[text:_:]", "[text:стремятся:]", "[end-para]", ] expected_gfm = """<p>_пристаням_стремятся</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_145(): """ Test case 145: (part 1) The end tag can occur on the same line as the start tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<style>p{color:red;}</style> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:<style>p{color:red;}</style>:]", "[end-html-block]", "[para(2,1):]", "[emphasis:1:*]", "[text:foo:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<style>p{color:red;}</style> <p><em>foo</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_147(): """ Test case 147: Note that anything on the last line after the end tag will be included in the HTML block: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<script> foo </script>1. *bar*""" expected_tokens = [ "[html-block(1,1)]", "[text:<script>\nfoo\n</script>1. *bar*:]", "[end-html-block]", ] expected_gfm = """<script> foo </script>1. *bar*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_456(): """ Test case 456: (part 6) Note that when delimiters do not match evenly, Rule 11 determines that the excess literal * characters will appear outside of the emphasis, rather than inside it: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """*foo****""" expected_tokens = [ "[para(1,1):]", "[emphasis:1:*]", "[text:foo:]", "[end-emphasis::1:*]", "[text:***:]", "[end-para]", ] expected_gfm = """<p><em>foo</em>***</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_extra_005(): """ When encoding link characters, special attention is used for the % characters as the CommonMark parser treats "%<hex-char><hex-char>" as non-encodable. Make sure this is tested at the end of the link. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = "[link](http://google.com/search%)" expected_tokens = [ "[para(1,1):]", "[link:inline:http://google.com/search%25::http://google.com/search%:::link]", "[text:link:]", "[end-link::]", "[end-para]", ] expected_gfm = '<p><a href="http://google.com/search%25">link</a></p>' # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm)
def test_emphasis_363(): """ Test case 363: Unicode nonbreaking spaces count as whitespace, too: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """*\u00A0a\u00A0*""" expected_tokens = [ "[para(1,1):]", "[text:*:]", "[text:\u00A0a\u00A0:]", "[text:*:]", "[end-para]", ] expected_gfm = """<p>*\u00A0a\u00A0*</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_361(): """ Test case 361: This is not emphasis, because the opening * is followed by whitespace, and hence not part of a left-flanking delimiter run: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """a * foo bar*""" expected_tokens = [ "[para(1,1):]", "[text:a :]", "[text:*:]", "[text: foo bar:]", "[text:*:]", "[end-para]", ] expected_gfm = """<p>a * foo bar*</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_344(): """ Test case 344: No stripping occurs if the code span contains only spaces: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """` ` ` `""" expected_tokens = [ "[para(1,1):\n]", "[icode-span: :`::]", "[text:\n::\n]", "[icode-span: :`::]", "[end-para]", ] expected_gfm = """<p><code> </code> <code> </code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_346(): """ Test case 346: (part 2) Line endings are treated like spaces: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`` foo ``""" expected_tokens = [ "[para(1,1):\n\n]", "[icode-span:foo :``:\a\n\a \a:\a\n\a \a]", "[end-para]", ] expected_gfm = """<p><code>foo </code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_381(): """ Test case 381: This is not emphasis, because the second _ is preceded by punctuation and followed by an alphanumeric: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """_(_foo)""" expected_tokens = [ "[para(1,1):]", "[text:_:]", "[text:(:]", "[text:_:]", "[text:foo):]", "[end-para]", ] expected_gfm = """<p>_(_foo)</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_382(): """ Test case 382: This is emphasis within emphasis: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """_(_foo_)_""" expected_tokens = [ "[para(1,1):]", "[emphasis:1:_]", "[text:(:]", "[emphasis:1:_]", "[text:foo:]", "[end-emphasis::1:_]", "[text:):]", "[end-emphasis::1:_]", "[end-para]", ] expected_gfm = """<p><em>(<em>foo</em>)</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_386(): """ Test case 386: This is emphasis, even though the closing delimiter is both left- and right-flanking, because it is followed by punctuation: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """_(bar)_.""" expected_tokens = [ "[para(1,1):]", "[emphasis:1:_]", "[text:(bar):]", "[end-emphasis::1:_]", "[text:.:]", "[end-para]", ] expected_gfm = """<p><em>(bar)</em>.</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_385(): """ Test case 385: (part 3) Intraword emphasis is disallowed for _: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """_foo_bar_baz_""" expected_tokens = [ "[para(1,1):]", "[emphasis:1:_]", "[text:foo:]", "[text:_:]", "[text:bar:]", "[text:_:]", "[text:baz:]", "[end-emphasis::1:_]", "[end-para]", ] expected_gfm = """<p><em>foo_bar_baz</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_380(): """ Test case 380: Rule 4: This is not emphasis, because the closing _ is preceded by whitespace: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """_foo bar _""" expected_tokens = [ "[para(1,1):]", "[text:_:]", "[text:foo bar :]", "[text:_:]", "[end-para]", ] expected_gfm = """<p>_foo bar _</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_356(): """ Test case 356: But this is an autolink: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<http://foo.bar.`baz>`""" expected_tokens = [ "[para(1,1):]", "[uri-autolink:http://foo.bar.`baz]", "[text:`:]", "[end-para]", ] expected_gfm = """<p><a href="http://foo.bar.%60baz">http://foo.bar.`baz</a>`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_347(): """ Test case 347: Interior spaces are not collapsed: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`foo bar\a baz`""".replace("\a", " ") expected_tokens = [ "[para(1,1):\n]", "[icode-span:foo bar \a\n\a \abaz:`::]", "[end-para]", ] expected_gfm = """<p><code>foo bar baz</code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_359(): """ Test case 359: The following case also illustrates the need for opening and closing backtick strings to be equal in length: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`foo``bar``""" expected_tokens = [ "[para(1,1):]", "[text:`foo:]", "[icode-span:bar:``::]", "[end-para]", ] expected_gfm = """<p>`foo<code>bar</code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_348(): """ Test case 348: Note that backslash escapes do not work in code spans. All backslashes are treated literally: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`foo\\`bar`""" expected_tokens = [ "[para(1,1):]", "[icode-span:foo\\:`::]", "[text:bar`:]", "[end-para]", ] expected_gfm = """<p><code>foo\\</code>bar`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_365(): """ Test case 365: (part 2) Intraword emphasis with * is permitted: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """5*6*78""" expected_tokens = [ "[para(1,1):]", "[text:5:]", "[emphasis:1:*]", "[text:6:]", "[end-emphasis::1:*]", "[text:78:]", "[end-para]", ] expected_gfm = """<p>5<em>6</em>78</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_351(): """ Test case 351: Code span backticks have higher precedence than any other inline constructs except HTML tags and autolinks. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """*foo`*`""" expected_tokens = [ "[para(1,1):]", "[text:*:]", "[text:foo:]", "[icode-span:*:`::]", "[end-para]", ] expected_gfm = """<p>*foo<code>*</code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_362(): """ Test case 362: This is not emphasis, because the opening * is preceded by an alphanumeric and followed by punctuation, and hence not part of a left-flanking delimiter run: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """a*"foo"*""" expected_tokens = [ "[para(1,1):]", "[text:a:]", "[text:*:]", '[text:\a"\a"\afoo\a"\a"\a:]', "[text:*:]", "[end-para]", ] expected_gfm = """<p>a*"foo"*</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_352(): """ Test case 352: And this is not parsed as a link: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """[not a `link](/foo`)""" expected_tokens = [ "[para(1,1):]", "[text:[:]", "[text:not a :]", "[icode-span:link](/foo:`::]", "[text:):]", "[end-para]", ] expected_gfm = """<p>[not a <code>link](/foo</code>)</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_447(): """ Test case 447: (part 3) Rule 11 """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """foo *_*""" expected_tokens = [ "[para(1,1):]", "[text:foo :]", "[emphasis:1:*]", "[text:_:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<p>foo <em>_</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_353(): """ Test case 353: Code spans, HTML tags, and autolinks have the same precedence. Thus, this is code: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`<a href="`">`""" expected_tokens = [ "[para(1,1):]", '[icode-span:\a<\a<\aa href=\a"\a"\a:`::]', '[text:\a"\a"\a\a>\a>\a`:]', "[end-para]", ] expected_gfm = """<p><code><a href="</code>">`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_extra_004(): """ Test to make sure the wide range of characters meets the GRM/CommonMark encodings. Note that since % is followed by a 2 digit hex value, it is encoded per the common mark libraries except for the % and the 2 digit hex value following it. Another example of this is example 511: https://github.github.com/gfm/#example-511 """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = ( "[link](!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~)" ) expected_tokens = [ "[para(1,1):]", "[link:inline:!%22#$%12&'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link]", "[text:link:]", "[end-link::]", "[end-para]", ] expected_gfm = '<p><a href="!%22#$%12&\'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~">link</a></p>' # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm)
def test_code_spans_354(): """ Test case 354: But this is an HTML tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<a href="`">`""" expected_tokens = [ "[para(1,1):]", '[raw-html:a href="`"]', "[text:`:]", "[end-para]", ] expected_gfm = """<p><a href="`">`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_146(): """ Test case 146: (part 2) The end tag can occur on the same line as the start tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<!-- foo -->*bar* *baz*""" expected_tokens = [ "[html-block(1,1)]", "[text:<!-- foo -->*bar*:]", "[end-html-block]", "[para(2,1):]", "[emphasis:1:*]", "[text:baz:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<!-- foo -->*bar* <p><em>baz</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_355(): """ Test case 355: And this is code: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`<http://foo.bar.`baz>`""" expected_tokens = [ "[para(1,1):]", "[icode-span:\a<\a<\ahttp://foo.bar.:`::]", "[text:baz\a>\a>\a`:]", "[end-para]", ] expected_gfm = """<p><code><http://foo.bar.</code>baz>`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_120(): """ Test case 120: (part 2) Some simple examples follow. Here are some basic HTML blocks of type 6: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ <div> *hello* <foo><a>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\n *hello*\n <foo><a>: ]", "[end-html-block]", ] expected_gfm = """ <div> *hello* <foo><a>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_link_reference_definitions_187(): """ Test case 187: Link reference definitions can occur inside block containers, like lists and block quotations. They affect the entire document, not just the container in which they are defined: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """[foo] > [foo]: /url""" expected_tokens = [ "[para(1,1):]", "[link:shortcut:/url:::::foo]", "[text:foo:]", "[end-link::]", "[end-para]", "[BLANK(2,1):]", "[block-quote(3,1):]", "[link-ref-def(3,3):True::foo:: :/url:::::]", "[end-block-quote]", ] expected_gfm = """<p><a href="/url">foo</a></p> <blockquote> </blockquote>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)