Exemplo n.º 1
0
    def parse(self, md_file):
        """ Parse a md file into a post object
        """

        # compile the templates when we parse the first post. This is needed
        # to ensure that plugins get a chance to modify the templates before
        # we compile them.
        if not self.jinja2:
            self.jinja2 = jinja2.Environment(loader=jinja2.DictLoader(
                                             self.templates))

        parsed_post = utils.dict_to_objdict()

        # parsing frontmatter and getting the md
        parsed_post.meta, parsed_post.md = frontmatter.parse(md_file)

        # parsing markdown and extractring info
        # NOTE: this must called before every parsing
        self.renderer.init(self.jinja2, self.code_formatter, self.site,
                           parsed_post.meta)

        parsed_post.html = self.md_parser.parse(parsed_post.md)
        parsed_post.text = html2text(parsed_post.html)  # used by NLP
        parsed_post.meta.statistics = self.renderer.get_stats()
        parsed_post.meta.toc = self.renderer.get_json_toc()
        parsed_post.elements = self.renderer.get_info()
        return parsed_post
Exemplo n.º 2
0
def test_broken_html():
    snip = '''
        </td>test</span>
    '''
    assert html2text(snip) == 'test'
Exemplo n.º 3
0
def test_empty():
    assert html2text('') == ''
Exemplo n.º 4
0
def test_dangling_tags():
    snip = '''
        <table><td>
            <span>test</span>
    '''
    assert html2text(snip) == 'test'
Exemplo n.º 5
0
def test_upercase_code():
    snip = '''
        <code>alert(1)</CoDe>
        <span>test</span>
    '''
    assert html2text(snip) == 'test'
Exemplo n.º 6
0
def test_none():
    assert html2text(None) == ''
Exemplo n.º 7
0
def test_pre():
    snip = '''
        <pre>alert(1)</pre>
        <span>test</span>
    '''
    assert html2text(snip) == 'test'
Exemplo n.º 8
0
def test_basic():
    assert html2text('<span>test</span>') == 'test'