def test_remove_head(self):
     text = '<!doctype html>' \
            '<html><head><title>Title text</title></head><body><p>Para text</p></body></html>'
     self.assertJson(
         [{'children': ['Para text'], 'tag': 'p'}],
         convert_html_to_telegraph_format(text, clean_html=True)
     )
    def test_multiple_pre(self):
        html3 = '''
<pre><code class="python hljs">my_list = [<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">4</span>, <span class="hljs-number">5</span>, <span class="hljs-number">6</span>, <span class="hljs-number">7</span>]
EVEN = slice(<span class="hljs-number">1</span>, <span class="hljs-keyword">None</span>, <span class="hljs-number">2</span>)
print(my_list[EVEN])     <span class="hljs-comment"># [2, 4, 6]</span>
</code></pre>
<p> paragraph splitter</p>
<pre> String anotherCodeBlock = "separated code block"</pre>
<pre>  String anotherCodeBlock2 = "separated code block2"</pre>
<pre>  String anotherCodeBlock3 = "separated code block3"</pre>
<p> paragraph splitter</p>
<pre>  String anotherCodeBlock4 = "separated code block4"</pre>
<pre>  String anotherCodeBlock5 = "separated code block5"</pre>
<p> paragraph splitter</p>
<pre>  String anotherCodeBlock6 = "separated code block6"</pre>
                '''
        self.assertJson(
            [
                {"tag": "pre", "children": [
                    "my_list = [1, 2, 3, 4, 5, 6, 7]\nEVEN = slice(1, None, 2)\nprint(my_list[EVEN])     # [2, 4, 6]\n"]},
                {"tag": "p", "children": [" paragraph splitter"]},
                {"tag": "pre", "children": [
                    " String anotherCodeBlock = \"separated code block\"\n  String anotherCodeBlock2 = \"separated code block2\"\n  String anotherCodeBlock3 = \"separated code block3\""]},
                {"tag": "p", "children": [" paragraph splitter"]},
                {"tag": "pre", "children": [
                    "  String anotherCodeBlock4 = \"separated code block4\"\n  String anotherCodeBlock5 = \"separated code block5\""]},
                {"tag": "p", "children": [" paragraph splitter"]},
                {"tag": "pre", "children": ["  String anotherCodeBlock6 = \"separated code block6\""]}
            ],
            convert_html_to_telegraph_format(html3, clean_html=True)
        )
 def test_em_text_after_html(self):
     # Text node after inline element should be wrapped into single paragraph together with em
     html_text_after = '<em> Em text </em>Text node after'
     self.assertJson(
         [{"children": [{"tag": "em", "children": [" Em text "]}, "Text node after"], "tag": "p"}],
         convert_html_to_telegraph_format(html_text_after, clean_html=True)
     )
 def test_duplicated_bad_tags(self):
     # paragraph appears twice in bad_tags list
     text = '<aside><figure><figcaption><p>Text</p></figcaption></figure></aside>'
     self.assertJson(
         [{'children': [{'children': ['Text'], 'tag': 'figcaption'}], 'tag': 'figure'}],
         convert_html_to_telegraph_format(text, clean_html=True)
     )
 def test_bad_para_inside_figcaption_with_link(self):
     figcaption_para_with_link = '<figure><figcaption>' \
                                 '<p><a href="https://telegram.org/">Telegram</a></p><p>Text after link</p>' \
                                 '</figcaption></figure>'
     self.assertJson(
         [{"tag": "figure", "children": [{"tag": "figcaption", "children": ["Telegram\nText after link"]}]}],
         convert_html_to_telegraph_format(figcaption_para_with_link, clean_html=True)
     )
 def test_bad_para(self):
     html = '<aside><p>text inside para</p><p>another para</p></aside>'
     # TODO: write html6 test
     html6 = '''<blockquote class="cut">\n<p>text inside</p>\n</blockquote>'''
     self.assertJson(
         [{'children': ['text inside para\nanother para'], 'tag': 'aside'}],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
 def test_figure_inside_with_img(self):
     html_figure_inside_with_img = '<div><figure>Some figure content <img src="image.png"/></figure></div>'
     self.assertJson(
         [
             {"children": ["Some figure content ", {"attrs": {"src": "image.png"}, "tag": "img"}], "tag": "figure"}
         ],
         convert_html_to_telegraph_format(html_figure_inside_with_img, clean_html=True)
     )
 def test_iframe_with_text(self):
     self.assertJson(
         [
             {'tag': 'figure', 'children': [{'tag': 'iframe', 'attrs': {
                 'src': '/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef'}}]}
         ],
         convert_html_to_telegraph_format(self.iframe_with_text, clean_html=True)
     )
 def test_em_text_before_html(self):
     # Text node before inline element should be wrapped into separate paragraph
     html_text_before = 'text node before <em> Em text </em>'
     self.assertJson(
         [{'children': ["text node before "], "tag": "p"},
          {"children": [{"tag": "em", "children": [" Em text "]}], "tag": "p"}],
         convert_html_to_telegraph_format(html_text_before, clean_html=True)
     )
 def test_image_html_with_text_after(self):
     self.assertJson(
         [
             {"tag": "figure", "children": [{"tag": "img", "attrs": {"src": "image1.jpg"}}]},
             {"tag": "p", "children": [" Text after"]}
         ],
         convert_html_to_telegraph_format(self.html_with_text_after, clean_html=True)
     )
 def test_image_html_with_text_before(self):
     self.assertJson(
         [
             {"children": ["Text before "], "tag": "p"},
             {"children": [{"attrs": {"src": "image0.jpg"}, "tag": "img"}], "tag": "figure"}
         ],
         convert_html_to_telegraph_format(self.html_with_text_before, clean_html=True)
     )
 def test_image_tag_at_the_top(self):
     html = '<img src="image.jpg" title="image"/>'
     self.assertJson(
         [
             {"children": [{"attrs": {"src": "image.jpg"}, "tag": "img"}], "tag": "figure"}
         ],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
 def test_iframe_text_before(self):
     iframe_text_before = 'text before <iframe></iframe>'
     self.assertJson(
         [
             {'children': ['text before '], 'tag': 'p'}
         ],
         convert_html_to_telegraph_format(iframe_text_before, clean_html=True)
     )
 def test_twitter_links(self):
     html = '''
     <blockquote class="twitter-tweet"><p>
     <a href="https://twitter.com/JoshConstine">@JoshConstine</a>
     <a href="https://twitter.com/TechCrunch">@TechCrunch</a> The distribution of games</p>
     <a href="https://twitter.com/durov/status/803680844200210432"></a>
     <a name="no_href"></a></blockquote>
     '''
     self.assertJson(self.assert_with, convert_html_to_telegraph_format(html, clean_html=True))
    def test_br_tags(self):
        # multiple br tags should be replaced with one line break
        html = '<br><br /> <br class="somebrclass">  <div>' \
               '</div> <br id="somebrid"/> <p>text</p> <br>'

        self.assertJson(
            [{'tag': 'p', 'children': ['text']}],
            convert_html_to_telegraph_format(html, clean_html=True)
        )
 def test_br_tags_two(self):
     html = '<br><br /> <br clear="someoldattribute">  <div>' \
            '</div> <br/> text <br>'
     self.assertJson(
         [
             {'children': ['\ntext \n'], 'tag': 'p'}
         ],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
 def test_iframe_with_figure(self):
     iframe_with_figure = '<figure><iframe src="//www.youtube.com/embed/abcdef"></iframe>Text after </figure>'
     self.assertJson(
         [
             {u'tag': u'figure', u'children': [{u'tag': u'iframe', u'attrs': {
                 u'src': u'/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef'}}, u'Text after ']}
         ],
         convert_html_to_telegraph_format(iframe_with_figure, clean_html=True)
     )
 def test_figure_inside(self):
     html_figure_inside = '<div><figure>Some figure content</figure><p>paragraph text</p></div>'
     self.assertJson(
         [
             {"children": ["Some figure content"], "tag": "figure"},
             {"children": ["paragraph text"], "tag": "p"}
         ],
         convert_html_to_telegraph_format(html_figure_inside, clean_html=True)
     )
 def test_iframe_telegram(self):
     iframe_telegram = '<iframe src="https://t.me/tginfo/1220?embed=1"></iframe>'
     self.assertJson(
         [
             {'tag': 'figure', 'children': [{'tag': 'iframe', 'attrs':
                 {'src': '/embed/telegram?url=https%3A%2F%2Ft.me%2Ftginfo%2F1220%3Fembed%3D1'}}]}
         ],
         convert_html_to_telegraph_format(iframe_telegram, clean_html=True)
     )
 def test_image_inside_paragraph_with_figure(self):
     para_with_figure = '<p> <figure> <img src="image0.jpg"/> <figcaption>test</figcaption></figure> </p>'
     self.assertJson(
         [
             {'tag': 'figure', 'children': [' ', {'tag': 'img', 'attrs': {'src': 'image0.jpg'}}, ' ',
                                             {'tag': 'figcaption', 'children': ['test']}]}
         ],
         convert_html_to_telegraph_format(para_with_figure, clean_html=True)
     )
    def test_empty_links(self):
        html = '<a href="http://example.com/">   <img src="http://httpbin.org/image/jpeg"/>   </a>'

        self.assertJson(
            [
                {'tag': 'figure', 'children': [{'tag': 'img', 'attrs': {'src': 'http://httpbin.org/image/jpeg'}}]}
            ],
            convert_html_to_telegraph_format(html, clean_html=True)
        )
 def test_iframe_vimeo(self):
     iframe_vimeo = '<iframe src="https://player.vimeo.com/video/1185346"></iframe>'
     self.assertJson(
         [
             {'tag': 'figure', 'children': [
                 {'tag': 'iframe', 'attrs': {'src': '/embed/vimeo?url=https%3A%2F%2Fvimeo.com%2F1185346'}}]}
         ],
         convert_html_to_telegraph_format(iframe_vimeo, clean_html=True)
     )
 def test_h3_after_text_with_br(self):
     html = '<h3> H3 header</h3> text after h3 header<br/> and new line'
     self.assertJson(
         [
             {"children": [" H3 header"], "tag": "h3"},
             {"children": [" text after h3 header\nand new line"], "tag": "p"}
         ],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
 def test_image_inside_paragraph_data_img(self):
     html = '<p> Text <img src="  data:image/png;base64,' \
                 'iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/' \
                 'w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHw' \
                 'AAAABJRU5ErkJggg=="/></p>'
     self.assertJson(
         [{'children': [' Text '], 'tag': 'p'}],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
 def test_iframe_text_after(self):
     iframe_text_after = '<p><iframe src="//www.youtube.com/embed/abcdef"></iframe>Text after </p>'
     self.assertJson(
         [
             {"tag": "figure", "children": [{"tag": "iframe", "attrs": {
                 "src": "/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef"}}]},
             {"tag": "p", "children": ["Text after "]}
         ],
         convert_html_to_telegraph_format(iframe_text_after, clean_html=True)
     )
 def test_image_inside_paragraph_without_figure_para_after(self):
     html = '<p> Text 1 <img src="image0.jpg"/>Text after image </p><p>Text 2 </p>'
     self.assertJson(
         [
             {"tag": "p", "children": [" Text 1 "]},
             {"tag": "figure", "children": [{"tag": "img", "attrs": {"src": "image0.jpg"}}]},
             {"tag": "p", "children": ["Text after image "]}, {"tag": "p", "children": ["Text 2 "]}
          ],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
 def test_image_inside_paragraph_with_text(self):
     para_with_text = '<p> abc <span> <img src="image1.jpg"/>xyz </span> </p>'
     self.assertJson(
         [
             {"tag": "p", "children": [" abc "]},
             {"tag": "figure", "children": [{"tag": "img", "attrs": {"src": "image1.jpg"}}]},
             {"tag": "p", "children": ["xyz "]}
         ],
         convert_html_to_telegraph_format(para_with_text, clean_html=True)
     )
 def test_script_telegram(self):
     script_telegram = '<script async src="https://telegram.org/js/telegram-widget.js?2" ' \
                       'data-telegram-post="tginfo/1220" data-width="100%"></script>'
     self.assertJson(
         [
             {'tag': 'figure', 'children': [{'tag': 'iframe', 'attrs':
                 {'src': '/embed/telegram?url=https%3A%2F%2Ft.me%2Ftginfo%2F1220'}}]}
         ],
         convert_html_to_telegraph_format(script_telegram, clean_html=True)
     )
 def test_image_inside_paragraph(self):
     html = '<p> <img src="image0.jpg"/></p>' \
            '<p>  <span> <img src="image1.jpg"/>   </span> <img src="image2.jpg"/> </p>'
     self.assertJson(
         [
             {"children": [{"attrs": {"src": "image0.jpg"}, "tag": "img"}], "tag": "figure"},
             {"children": [{"attrs": {"src": "image1.jpg"}, "tag": "img"}], "tag": "figure"},
             {"children": [{"attrs": {"src": "image2.jpg"}, "tag": "img"}], "tag": "figure"}
         ],
         convert_html_to_telegraph_format(html, clean_html=True)
     )
    def test_em_with_div_text_after(self):
        html = '''
<div>
    <em> Em text </em>
</div> Some text node after div
        '''
        self.assertJson(
            [
                {"children": [{"tag": "em", "children": [" Em text "]}, ' Some text node after div'], "tag": "p"}
            ],
            convert_html_to_telegraph_format(html, clean_html=True)
        )