Пример #1
0
    def test_figure_inside(self):
        html_figure_inside = '<div><figure>Some figure content</figure><p>paragraph text</p></div>'
        html_figure_inside_with_img = '<div><figure>Some figure content <img src="image.png"/></figure></div>'
        self.assertJson([{
            "children": ["Some figure content"],
            "tag": "figure"
        }, {
            "children": ["paragraph text"],
            "tag": "p"
        }],
                        convert_html_to_telegraph_format(html_figure_inside,
                                                         clean_html=True))

        self.assertJson([{
            "children": [
                "Some figure content ", {
                    "attrs": {
                        "src": "image.png"
                    },
                    "tag": "img"
                }
            ],
            "tag":
            "figure"
        }],
                        convert_html_to_telegraph_format(
                            html_figure_inside_with_img, clean_html=True))
Пример #2
0
 def test_twitter_links(self):
     html = '''
     <blockquote class="twitter-tweet"><p>
     <a href="https://twitter.com/JoshConstine">@JoshConstine</a>
     <a href="https://twitter.com/TechCrunch">@TechCrunch</a> The distribution of games</p>
     <a href="https://twitter.com/durov/status/803680844200210432"></a>
     <a name="no_href"></a></blockquote>
     '''
     duplicated_link_html = '''
     <blockquote class="twitter-tweet"><p>
     <a href="https://twitter.com/JoshConstine">@JoshConstine</a>
     <a href="https://twitter.com/TechCrunch">@TechCrunch</a> The distribution of games</p>
     <a href="https://twitter.com/durov/status/803680844200210432"></a>
     <a href="https://twitter.com/durov/status/803680844200210432"></a>
     <a name="no_href"></a></blockquote>
     '''
     assert_with = [{
         'tag':
         'figure',
         'children': [{
             'tag': 'iframe',
             'attrs': {
                 'src':
                 '/embed/twitter?url=https%3A%2F%2Ftwitter.com%2Fdurov%2Fstatus%2F803680844200210432'
             }
         }]
     }]
     self.assertJson(
         assert_with, convert_html_to_telegraph_format(html,
                                                       clean_html=True))
     self.assertJson(
         assert_with,
         convert_html_to_telegraph_format(duplicated_link_html,
                                          clean_html=True))
Пример #3
0
    def test_em(self):
        html = '<em> Em text </em>'
        # Text node after inline element should be wrapped into single paragraph together with em
        html_text_after = '<em> Em text </em>Text node after'
        # Text node before inline element should be wrapped into separate paragraph
        html_text_before = 'text node before <em> Em text </em>'

        self.assertJson([{
            "children": [{
                "tag": "em",
                "children": [" Em text "]
            }],
            "tag": "p"
        }], convert_html_to_telegraph_format(html, clean_html=True))
        self.assertJson([{
            "children": [{
                "tag": "em",
                "children": [" Em text "]
            }, "Text node after"],
            "tag":
            "p"
        }], convert_html_to_telegraph_format(html_text_after, clean_html=True))
        self.assertJson([{
            'children': ["text node before "],
            "tag": "p"
        }, {
            "children": [{
                "tag": "em",
                "children": [" Em text "]
            }],
            "tag": "p"
        }], convert_html_to_telegraph_format(html_text_before,
                                             clean_html=True))
Пример #4
0
 def test_bad_para(self):
     html = '<aside><p>text inside para</p><p>another para</p></aside>'
     html2 = '<figure><figcaption><p>text inside para</p><p>another para</p></figcaption></figure>'
     html3 = '<blockquote><p></p><p>second para</p></blockquote>'
     html4 = '<blockquote><p>first para</p><strong><em></em></strong><em>em text</em></blockquote>'
     html5 = '<blockquote><p>first para</p><em></em><strong></strong><em></em></blockquote>'
     self.assertJson([{
         'children': ['text inside para\nanother para'],
         'tag': 'aside'
     }], convert_html_to_telegraph_format(html, clean_html=True))
     self.assertJson([{
         'children': [{
             'children': ['text inside para\nanother para'],
             'tag': 'figcaption'
         }],
         'tag':
         'figure'
     }], convert_html_to_telegraph_format(html2, clean_html=True))
     self.assertJson([{
         'children': ['second para'],
         'tag': 'blockquote'
     }], convert_html_to_telegraph_format(html3, clean_html=True))
     self.assertJson([{
         'children':
         [u'first para\n', {
             'children': ['em text'],
             'tag': 'em'
         }],
         'tag':
         'blockquote'
     }], convert_html_to_telegraph_format(html4, clean_html=True))
     self.assertJson([{
         'children': [u'first para'],
         'tag': 'blockquote'
     }], convert_html_to_telegraph_format(html5, clean_html=True))
Пример #5
0
    def test_blockquote(self):
        html = '<blockquote>Text inside blockquote</blockquote>'
        quote_with_para = '<blockquote><p>first para</p><p>second para</p></blockquote>'
        quote_para_strong = '<blockquote><p>first para</p><strong>strong text</strong></blockquote>'
        quote_inside_figure = '<figure><blockquote>Blockquote text inside figure</blockquote></figure>'
        self.assertJson([{
            'children': ['Text inside blockquote'],
            'tag': 'blockquote'
        }], convert_html_to_telegraph_format(html, clean_html=True))

        self.assertJson([{
            'children': ['first para\nsecond para'],
            'tag': 'blockquote'
        }], convert_html_to_telegraph_format(quote_with_para, clean_html=True))
        self.assertJson([{
            'children': [
                'first para\n', {
                    u'children': [u'strong text'],
                    u'tag': u'strong'
                }
            ],
            'tag':
            'blockquote'
        }], convert_html_to_telegraph_format(quote_para_strong,
                                             clean_html=True))
        self.assertJson([{
            'children': ['Blockquote text inside figure'],
            'tag': 'blockquote'
        }],
                        convert_html_to_telegraph_format(quote_inside_figure,
                                                         clean_html=True))
Пример #6
0
    def test_lists(self):
        html = '''
        <ul>
            <li>abc</li>
            <li>def</li>
        </ul>
        <ol>
            <li>first</li>
            <li>second</li>
        </ol>
        '''
        empty_list = '''
        <ul>
            <li></li>
            <li>second</li>
        </ul>
        <ul><li></li>
        </ul>
        <ol></ol>
        <ol>
            <li>first</li>
            <li>    </li>
        </ol>
        '''

        self.assertJson([{
            'tag':
            'ul',
            'children': [{
                'tag': 'li',
                'children': ['abc']
            }, {
                'tag': 'li',
                'children': ['def']
            }]
        }, {
            'tag':
            'ol',
            'children': [{
                'tag': 'li',
                'children': ['first']
            }, {
                'tag': 'li',
                'children': ['second']
            }]
        }], convert_html_to_telegraph_format(html, clean_html=True))
        self.assertJson([{
            'tag': 'ul',
            'children': [{
                'tag': 'li',
                'children': ['second']
            }]
        }, {
            'tag': 'ol',
            'children': [{
                'tag': 'li',
                'children': ['first']
            }]
        }], convert_html_to_telegraph_format(empty_list, clean_html=True))
Пример #7
0
 def test_text_only(self):
     html = 'only plain text'
     html_empty_string = '               '
     self.assertJson([{
         'children': ['only plain text'],
         'tag': 'p'
     }], convert_html_to_telegraph_format(html, clean_html=True))
     self.assertJson([],
                     convert_html_to_telegraph_format(html_empty_string,
                                                      clean_html=True))
Пример #8
0
 def test_br_tags(self):
     # multiple br tags should be replaced with one line break
     html = '<br><br /> <br class="somebrclass">  <div>' \
            '</div> <br id="somebrid"/> <p>text</p> <br>'
     html2 = '<br><br /> <br clear="someoldattribute">  <div>' \
            '</div> <br/> text <br>'
     self.assertJson([{
         'tag': 'p',
         'children': ['text']
     }], convert_html_to_telegraph_format(html, clean_html=True))
     self.assertJson([{
         'children': ['\ntext \n'],
         'tag': 'p'
     }], convert_html_to_telegraph_format(html2, clean_html=True))
Пример #9
0
 def test_remove_head(self):
     text = '<!doctype html>' \
            '<html><head><title>Title text</title></head><body><p>Para text</p></body></html>'
     self.assertJson([{
         'children': ['Para text'],
         'tag': 'p'
     }], convert_html_to_telegraph_format(text, clean_html=True))
Пример #10
0
 def test_header_elements(self):
     html = '<h1> H1 header (h3) </h1>' \
            '<h2> H2 header (h4) </h2>' \
            '<h3> h3 header </h3>' \
            '<h4> h4 header </h4>' \
            '<h5> h5 header (h4) </h5>' \
            '<h6> h6 header (h4) </h6>'
     self.assertJson([{
         "children": [" H1 header (h3) "],
         "tag": "h3"
     }, {
         "children": [" H2 header (h4) "],
         "tag": "h4"
     }, {
         "children": [" h3 header "],
         "tag": "h3"
     }, {
         "children": [" h4 header "],
         "tag": "h4"
     }, {
         "children": [" h5 header (h4) "],
         "tag": "h4"
     }, {
         "children": [" h6 header (h4) "],
         "tag": "h4"
     }], convert_html_to_telegraph_format(html, clean_html=True))
Пример #11
0
 def test_text_after_para(self):
     html = ' <p>text inside para</p> Text after para'
     self.assertEqual(
         '<body><p>text inside para</p><p> Text after para</p></body>',
         convert_html_to_telegraph_format(html,
                                          output_format='html_string',
                                          clean_html=True))
Пример #12
0
 def test_h3_after_text_with_br(self):
     html = '<h3> H3 header</h3> text after h3 header<br/> and new line'
     self.assertJson([{
         "children": [" H3 header"],
         "tag": "h3"
     }, {
         "children": [" text after h3 header\nand new line"],
         "tag": "p"
     }], convert_html_to_telegraph_format(html, clean_html=True))
Пример #13
0
 def test_duplicated_bad_tags(self):
     # paragraph appears twice in bad_tags list
     text = '<aside><figure><figcaption><p>Text</p></figcaption></figure></aside>'
     self.assertJson(
         [{
             'children': [{
                 'children': ['Text'],
                 'tag': 'figcaption'
             }],
             'tag': 'figure'
         }], convert_html_to_telegraph_format(text, clean_html=True))
Пример #14
0
    def test_em_with_div(self):
        html = '''
<div>
    <em> Em text </em>
</div>
        '''
        self.assertJson([{
            "children": [{
                "tag": "em",
                "children": [" Em text "]
            }],
            "tag": "p"
        }], convert_html_to_telegraph_format(html, clean_html=True))
Пример #15
0
    def test_empty_links(self):
        html = '<a href="http://example.com/">   <img src="http://httpbin.org/image/jpeg"/>   </a>'

        self.assertJson([{
            'tag':
            'figure',
            'children': [{
                'tag': 'img',
                'attrs': {
                    'src': 'http://httpbin.org/image/jpeg'
                }
            }]
        }], convert_html_to_telegraph_format(html, clean_html=True))
Пример #16
0
 def test_convert_without_clean(self):
     # multiple br tags should be replaced with one line break
     html = 'Text first line' \
            '<br><br /> <br class="somebrclass">  <div>' \
            '</div> <br id="somebrid"/> <p>text</p> <br>' \
            '<span><em><strong><i></i><u></u></strong></em></span>'
     self.assertJson([{
         'tag': 'p',
         'children': ['Text first line']
     }, {
         'tag': 'br'
     }, {
         'tag': 'br'
     }, {
         'tag': 'br',
         'attrs': {
             'class': 'somebrclass'
         }
     }, {
         'tag': 'div'
     }, {
         'tag': 'br',
         'attrs': {
             'id': 'somebrid'
         }
     }, {
         'tag': 'p',
         'children': ['text']
     }, {
         'tag': 'br'
     }, {
         'tag':
         'span',
         'children': [{
             'tag':
             'em',
             'children': [{
                 'tag': 'strong',
                 'children': [{
                     'tag': 'i'
                 }, {
                     'tag': 'u'
                 }]
             }]
         }]
     }], convert_html_to_telegraph_format(html, clean_html=False))
Пример #17
0
    def test_text_on_top(self):
        html = '''
<div>
    text as first child node
    <h1> Text Header </h1>
    <p> Text Para</p>
</div>
        '''
        self.assertJson([{
            "children": ["text as first child node "],
            "tag": "p"
        }, {
            "children": [" Text Header "],
            "tag": "h3"
        }, {
            "children": [" Text Para"],
            "tag": "p"
        }], convert_html_to_telegraph_format(html, clean_html=True))
Пример #18
0
    def test_iframe(self):
        # multiple br tags should be replaced with one line break
        html = '<iframe src="//www.youtube.com/embed/abcdef">legacy text</iframe>'
        iframe_empty_src = '<iframe src=""></iframe>'
        iframe_no_src = '<iframe></iframe>'
        iframe_child_no_src = '<p><iframe></iframe></p>'
        iframe_text_before = 'text before <iframe></iframe>'
        iframe_text_after = '<p><iframe src="//www.youtube.com/embed/abcdef"></iframe>Text after </p>'
        iframe_not_allowed_src = '<div><iframe src="http://example.com"></iframe></div>'
        iframe_vimeo = '<iframe src="https://player.vimeo.com/video/1185346"></iframe>'
        iframe_telegram = '<iframe src="https://t.me/tginfo/1220?embed=1"></iframe>'
        script_telegram = '<script async src="https://telegram.org/js/telegram-widget.js?2" data-telegram-post="tginfo/1220" data-width="100%"></script>'
        mix = iframe_child_no_src + html + iframe_empty_src + iframe_no_src
        iframe_with_figure = '<figure><iframe src="//www.youtube.com/embed/abcdef"></iframe>Text after </figure>'

        multiple_iframes = '<p>'\
            'Text before'\
            '<a href="/123">link</a><iframe src="//www.youtube.com/embed/abcdef"></iframe> text'\
            '<a href="/246">link2</a> Text after link'\
            '<iframe src="//www.youtube.com/embed/xyzxyzxyz"></iframe>'\
            '</p>'

        self.assertJson([{
            'tag':
            'figure',
            'children': [{
                'tag': 'iframe',
                'attrs': {
                    'src':
                    '/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef'
                }
            }]
        }], convert_html_to_telegraph_format(html, clean_html=True))
        self.assertJson([],
                        convert_html_to_telegraph_format(iframe_empty_src,
                                                         clean_html=True))
        self.assertJson([],
                        convert_html_to_telegraph_format(iframe_no_src,
                                                         clean_html=True))
        self.assertJson([],
                        convert_html_to_telegraph_format(iframe_child_no_src,
                                                         clean_html=True))

        self.assertJson([{
            'tag':
            'figure',
            'children': [{
                'tag': 'iframe',
                'attrs': {
                    'src':
                    '/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef'
                }
            }]
        }], convert_html_to_telegraph_format(mix, clean_html=True))

        self.assertJson([{
            'children': ['text before '],
            'tag': 'p'
        }],
                        convert_html_to_telegraph_format(iframe_text_before,
                                                         clean_html=True))
        self.assertJson([],
                        convert_html_to_telegraph_format(
                            iframe_not_allowed_src, clean_html=True))
        self.assertJson([{
            'tag':
            'figure',
            'children': [{
                'tag': 'iframe',
                'attrs': {
                    'src': '/embed/vimeo?url=https%3A%2F%2Fvimeo.com%2F1185346'
                }
            }]
        }], convert_html_to_telegraph_format(iframe_vimeo, clean_html=True))
        self.assertJson([{
            "tag":
            "figure",
            "children": [{
                "tag": "iframe",
                "attrs": {
                    "src":
                    "/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef"
                }
            }]
        }, {
            "tag": "p",
            "children": ["Text after "]
        }], convert_html_to_telegraph_format(iframe_text_after,
                                             clean_html=True))
        self.assertJson([{
            u'tag':
            u'figure',
            u'children': [{
                u'tag': u'iframe',
                u'attrs': {
                    u'src':
                    u'/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef'
                }
            }, u'Text after ']
        }],
                        convert_html_to_telegraph_format(iframe_with_figure,
                                                         clean_html=True))
        self.assertJson([{
            "tag":
            "p",
            "children": [
                "Text before", {
                    "tag": "a",
                    "attrs": {
                        "href": "/123"
                    },
                    "children": ["link"]
                }
            ]
        }, {
            "tag":
            "figure",
            "children": [{
                "tag": "iframe",
                "attrs": {
                    "src":
                    "/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dabcdef"
                }
            }]
        }, {
            "tag":
            "p",
            "children": [
                " text", {
                    "tag": "a",
                    "attrs": {
                        "href": "/246"
                    },
                    "children": ["link2"]
                }, " Text after link"
            ]
        }, {
            "tag":
            "figure",
            "children": [{
                "tag": "iframe",
                "attrs": {
                    "src":
                    "/embed/youtube?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dxyzxyzxyz"
                }
            }]
        }], convert_html_to_telegraph_format(multiple_iframes,
                                             clean_html=True))

        self.assertJson([{
            'tag':
            'figure',
            'children': [{
                'tag': 'iframe',
                'attrs': {
                    'src':
                    '/embed/telegram?url=https%3A%2F%2Ft.me%2Ftginfo%2F1220%3Fembed%3D1'
                }
            }]
        }], convert_html_to_telegraph_format(iframe_telegram, clean_html=True))

        self.assertJson([{
            'tag':
            'figure',
            'children': [{
                'tag': 'iframe',
                'attrs': {
                    'src':
                    '/embed/telegram?url=https%3A%2F%2Ft.me%2Ftginfo%2F1220'
                }
            }]
        }], convert_html_to_telegraph_format(script_telegram, clean_html=True))
Пример #19
0
    def test_code_block(self):
        html = '''<pre>
        def test_code_block(self):
            html = ''
            print("hello world")
        </pre>'''
        html2 = '''
            <p><pre
            class="code">
                def hello_world():
                    print("hello")
            </pre>
            <pre>print("second pre")</pre>
            </p>
            <p> Text after pre </p>
        '''
        html3 = '''
<pre><code class="python hljs">my_list = [<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">4</span>, <span class="hljs-number">5</span>, <span class="hljs-number">6</span>, <span class="hljs-number">7</span>]
EVEN = slice(<span class="hljs-number">1</span>, <span class="hljs-keyword">None</span>, <span class="hljs-number">2</span>)
print(my_list[EVEN])     <span class="hljs-comment"># [2, 4, 6]</span>
</code></pre>
<p> paragraph splitter</p>
<pre> String anotherCodeBlock = "separated code block"</pre>
<pre>  String anotherCodeBlock2 = "separated code block2"</pre>
<pre>  String anotherCodeBlock3 = "separated code block3"</pre>
<p> paragraph splitter</p>
<pre>  String anotherCodeBlock4 = "separated code block4"</pre>
<pre>  String anotherCodeBlock5 = "separated code block5"</pre>
<p> paragraph splitter</p>
<pre>  String anotherCodeBlock6 = "separated code block6"</pre>
        '''
        html4 = '''
<pre class="code literal-block"><p class="nv">$ </p>mkvirtualenv myvirtualenv --python<p class="o">=</p>/usr/bin/python3.4


Running virtualenv with interpreter /usr/bin/python3.4
Using base prefix <p class="s1">'/usr'</p>
New python executable in myvirtualenv/bin/python3.4
Also creating executable in myvirtualenv/bin/python
Installing setuptools, pip...done.
</pre>
        '''
        html5 = '''
        <p>Text before <code> inline_code = True</code> Text after</p>
        <code> multiline_code = True
        next_line = True
        </code>
        <code></code>empty code
        '''
        self.assertJson([{
            "tag":
            "pre",
            "children": [
                "\n        def test_code_block(self):\n            html = ''\n            print(\"hello world\")\n        "
            ]
        }], convert_html_to_telegraph_format(html, clean_html=True))
        self.assertJson([{
            "tag":
            "pre",
            "children": [
                "\n                def hello_world():\n                    print(\"hello\")\n            \nprint(\"second pre\")"
            ]
        }, {
            "tag": "p",
            "children": [" Text after pre "]
        }], convert_html_to_telegraph_format(html2, clean_html=True))
        self.assertJson([{
            "tag":
            "pre",
            "children": [
                "my_list = [1, 2, 3, 4, 5, 6, 7]\nEVEN = slice(1, None, 2)\nprint(my_list[EVEN])     # [2, 4, 6]\n"
            ]
        }, {
            "tag": "p",
            "children": [" paragraph splitter"]
        }, {
            "tag":
            "pre",
            "children": [
                " String anotherCodeBlock = \"separated code block\"\n  String anotherCodeBlock2 = \"separated code block2\"\n  String anotherCodeBlock3 = \"separated code block3\""
            ]
        }, {
            "tag": "p",
            "children": [" paragraph splitter"]
        }, {
            "tag":
            "pre",
            "children": [
                "  String anotherCodeBlock4 = \"separated code block4\"\n  String anotherCodeBlock5 = \"separated code block5\""
            ]
        }, {
            "tag": "p",
            "children": [" paragraph splitter"]
        }, {
            "tag":
            "pre",
            "children":
            ["  String anotherCodeBlock6 = \"separated code block6\""]
        }], convert_html_to_telegraph_format(html3, clean_html=True))
        self.assertJson([{
            "tag":
            "pre",
            "children": [
                "$ mkvirtualenv myvirtualenv --python=/usr/bin/python3.4\n\n\n"
                "Running virtualenv with interpreter /usr/bin/python3.4\n"
                "Using base prefix '/usr'\n"
                "New python executable in myvirtualenv/bin/python3.4\n"
                "Also creating executable in myvirtualenv/bin/python\n"
                "Installing setuptools, pip...done.\n"
            ]
        }], convert_html_to_telegraph_format(html4, clean_html=True))
        self.assertJson([{
            "tag":
            "p",
            "children": [
                "Text before ", {
                    "tag": "code",
                    "children": [" inline_code = True"]
                }, " Text after"
            ]
        }, {
            "tag":
            "pre",
            "children":
            [" multiline_code = True\n        next_line = True\n        "]
        }, {
            'tag': 'p',
            'children': [{
                'tag': 'code'
            }, 'empty code']
        }], convert_html_to_telegraph_format(html5, clean_html=True))
Пример #20
0
 def test_images(self):
     images_without_src = '<img title="image"/>'
     self.assertJson([],
                     convert_html_to_telegraph_format(images_without_src,
                                                      clean_html=True))
Пример #21
0
    def test_image_inside_paragraph(self):
        html = '<p> <img src="image0.jpg"/></p>' \
               '<p>  <span> <img src="image1.jpg"/>   </span> <img src="image2.jpg"/> </p>'

        para_with_text = '<p> abc <span> <img src="image1.jpg"/>xyz </span> </p>'
        para_with_figure = '<p> <figure> <img src="image0.jpg"/> <figcaption>test</figcaption></figure> </p>'
        para_img1 = '<p>Text 1 <figure> <img src="image0.jpg"/> <figcaption><em>test</em></figcaption></figure> </p><p>Text 2<p>'
        para_img2 = '<p> Text 1 <img src="image0.jpg"/>Text after image </p><p>Text 2 </p>'
        para_img3 = '<p> Text <img src="  data:image/png;base64,' \
                    'iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHw' \
                    'AAAABJRU5ErkJggg=="/></p>'
        self.assertJson(
            [{
                "children": [{
                    "attrs": {
                        "src": "image0.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }, {
                "children": [{
                    "attrs": {
                        "src": "image1.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }, {
                "children": [{
                    "attrs": {
                        "src": "image2.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }], convert_html_to_telegraph_format(html, clean_html=True))

        self.assertJson(
            [{
                "tag": "p",
                "children": [" abc "]
            }, {
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image1.jpg"
                    }
                }]
            }, {
                "tag": "p",
                "children": ["xyz "]
            }],
            convert_html_to_telegraph_format(para_with_text, clean_html=True))
        self.assertJson([{
            'tag':
            'figure',
            'children': [
                ' ', {
                    'tag': 'img',
                    'attrs': {
                        'src': 'image0.jpg'
                    }
                }, ' ', {
                    'tag': 'figcaption',
                    'children': ['test']
                }
            ]
        }], convert_html_to_telegraph_format(para_with_figure,
                                             clean_html=True))

        self.assertJson([{
            "tag": "p",
            "children": ["Text 1 "]
        }, {
            "tag":
            "figure",
            "children": [
                " ", {
                    "tag": "img",
                    "attrs": {
                        "src": "image0.jpg"
                    }
                }, " ", {
                    "tag": "figcaption",
                    "children": ["test"]
                }
            ]
        }, {
            "tag": "p",
            "children": ["Text 2"]
        }], convert_html_to_telegraph_format(para_img1, clean_html=True))

        self.assertJson(
            [{
                "tag": "p",
                "children": [" Text 1 "]
            }, {
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image0.jpg"
                    }
                }]
            }, {
                "tag": "p",
                "children": ["Text after image "]
            }, {
                "tag": "p",
                "children": ["Text 2 "]
            }], convert_html_to_telegraph_format(para_img2, clean_html=True))
        self.assertJson([{
            'children': [' Text '],
            'tag': 'p'
        }], convert_html_to_telegraph_format(para_img3, clean_html=True))
Пример #22
0
    def test_image_tag_at_the_top(self):
        html = '<img src="image.jpg" title="image"/>'
        html_with_text_after = '<img src="image1.jpg" title="image"/> Text after'
        html_with_text_before = 'Text before <img src="image0.jpg" title="image"/>'
        html_joined = html_with_text_before + html_with_text_after
        self.assertJson(
            [{
                "children": [{
                    "attrs": {
                        "src": "image.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }], convert_html_to_telegraph_format(html, clean_html=True))

        self.assertJson(
            [{
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image1.jpg"
                    }
                }]
            }, {
                "tag": "p",
                "children": [" Text after"]
            }],
            convert_html_to_telegraph_format(html_with_text_after,
                                             clean_html=True))

        self.assertJson(
            [{
                "children": ["Text before "],
                "tag": "p"
            }, {
                "children": [{
                    "attrs": {
                        "src": "image0.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }],
            convert_html_to_telegraph_format(html_with_text_before,
                                             clean_html=True))

        self.assertJson(
            [{
                "tag": "p",
                "children": ["Text before "]
            }, {
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image0.jpg"
                    }
                }]
            }, {
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image1.jpg"
                    }
                }]
            }, {
                "tag": "p",
                "children": [" Text after"]
            }], convert_html_to_telegraph_format(html_joined, clean_html=True))
Пример #23
0
 def test_remove_processing_instructions(self):
     text = '<p>text<?xml version=”1.0″ encoding=”UTF-8″?></p>'
     self.assertJson([{
         'children': ['text'],
         'tag': 'p'
     }], convert_html_to_telegraph_format(text, clean_html=False))
Пример #24
0
    def test_image_inside_paragraph(self):
        html = '<p> <img src="image0.jpg"/></p>' \
               '<p>  <span> <img src="image1.jpg"/>   </span> <img src="image2.jpg"/> </p>'

        para_with_text = '<p> abc <span> <img src="image1.jpg"/>xyz </span> </p>'
        para_with_figure = '<p> <figure> <img src="image0.jpg"/> <figcaption>test</figcaption></figure> </p>'
        para_img1 = '<p>Text 1 <figure> <img src="image0.jpg"/> <figcaption><em>test</em></figcaption></figure> </p><p>Text 2<p>'
        para_img2 = '<p> Text 1 <img src="image0.jpg"/>Text after image </p><p>Text 2 </p>'
        self.assertJson(
            [{
                "children": [{
                    "attrs": {
                        "src": "image0.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }, {
                "children": [{
                    "attrs": {
                        "src": "image1.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }, {
                "children": [{
                    "attrs": {
                        "src": "image2.jpg"
                    },
                    "tag": "img"
                }],
                "tag": "figure"
            }], convert_html_to_telegraph_format(html, clean_html=True))

        self.assertJson(
            [{
                "tag": "p",
                "children": [" abc "]
            }, {
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image1.jpg"
                    }
                }]
            }, {
                "tag": "p",
                "children": ["xyz "]
            }],
            convert_html_to_telegraph_format(para_with_text, clean_html=True))
        self.assertJson([{
            'tag':
            'figure',
            'children': [
                ' ', {
                    'tag': 'img',
                    'attrs': {
                        'src': 'image0.jpg'
                    }
                }, ' ', {
                    'tag': 'figcaption',
                    'children': ['test']
                }
            ]
        }], convert_html_to_telegraph_format(para_with_figure,
                                             clean_html=True))

        self.assertJson([{
            "tag": "p",
            "children": ["Text 1 "]
        }, {
            "tag":
            "figure",
            "children": [
                " ", {
                    "tag": "img",
                    "attrs": {
                        "src": "image0.jpg"
                    }
                }, " ", {
                    "tag": "figcaption",
                    "children": ["test"]
                }
            ]
        }, {
            "tag": "p",
            "children": ["Text 2"]
        }], convert_html_to_telegraph_format(para_img1, clean_html=True))

        self.assertJson(
            [{
                "tag": "p",
                "children": [" Text 1 "]
            }, {
                "tag": "figure",
                "children": [{
                    "tag": "img",
                    "attrs": {
                        "src": "image0.jpg"
                    }
                }]
            }, {
                "tag": "p",
                "children": ["Text after image "]
            }, {
                "tag": "p",
                "children": ["Text 2 "]
            }], convert_html_to_telegraph_format(para_img2, clean_html=True))