Пример #1
0
    def process_markdown(self,
                         markdown_string: str,
                         paragraph_style: str = "p"):
        walker = Parser().parse(markdown_string).walker()
        event = walker.nxt()
        buf = ""

        while event is not None:
            node, entering = event["node"], event["entering"]
            node_type = node.t
            if node_type == "text":
                buf += node.literal
            if node_type == "softbreak":
                buf += " "
            if node_type == "linebreak":
                buf += "<br />"
            if node_type == "link":
                buf += f'<a href="{escape(node.destination)}">' if entering else "</a>"
            if node_type == "emph":
                buf += "<em>" if entering else "</em>"
            if node_type == "strong":
                buf += "<strong>" if entering else "</strong>"
            if node_type == "paragraph" and not entering:
                style = paragraph_style
                if node.parent.t == "item":
                    style = "ul_li" if node.parent.parent.list_data[
                        "type"] == "bullet" else "ol_li"
                self.parts.append(Paragraph(buf, PDF_STYLES[style]))
                buf = ""
            event = walker.nxt()
 def parse(self, inputstring, document):
     self.document = document
     self.current_node = document
     self.setup_parse(inputstring, document)
     self.setup_sections()
     parser = Parser()
     ast = parser.parse(inputstring + '\n')
     self.convert_ast(ast)
     self.finish_parse()
Пример #3
0
def truncate_md(markdown_string: str, *, limit: int = 200) -> str:
    walker = Parser().parse(markdown_string).walker()
    event = walker.nxt()
    buf = ""

    while event is not None:
        if event["node"].t == "text":
            buf += event["node"].literal
            break  # this limits results to first paragraph only
        event = walker.nxt()

    return f"{buf[:limit]}..." if len(buf) > limit else buf
Пример #4
0
 def parse(self, inputstring, document):
     self.document = document
     self.current_node = document
     self.config = self.default_config.copy()
     try:
         new_cfg = self.document.settings.env.config.recommonmark_config
         self.config.update(new_cfg)
     except AttributeError:
         pass
     self.setup_parse(inputstring, document)
     self.setup_sections()
     parser = Parser()
     ast = parser.parse(inputstring + '\n')
     self.convert_ast(ast)
     self.finish_parse()
Пример #5
0
def parseMarkDownBlock(text):
    """
    Parses a block of text, returning a list of docutils nodes

    >>> parseMarkdownBlock("Some\n====\n\nblock of text\n\nHeader\n======\n\nblah\n")
    []
    """
    block = Parser().parse(text)
    # CommonMark can't nest sections, so do it manually
    nestSections(block)

    return MarkDown(block)
Пример #6
0
    def _process_markup(self,
                        context,
                        source_path,
                        source_line,
                        caller,
                        content_path=None):
        content_text = None

        if content_path:
            with open(content_path, 'r', newline='') as content_file:
                content_text = content_file.read()
        else:
            content_text = str(caller())

        code_ast = Parser().parse(content_text)
        code_html = HtmlRenderer().render(code_ast)

        return code_html
Пример #7
0
def 从md转html再提取出链接(字符串):
    ast  = Parser().parse(字符串)
    html = HtmlRenderer().render(ast)
    print(html)
Пример #8
0
def markdown_to_notion(markdown: str) -> list:
    """
    Convert Markdown formatted string to Notion.


    Arguments
    ---------
    markdown : str
        Text to convert.


    Returns
    -------
    list of Block
        Blocks converted from input.
    """

    # commonmark doesn't support strikethrough,
    # so we need to handle it ourselves
    while markdown.count("~~") >= 2:
        markdown = markdown.replace("~~", "<s>", 1)
        markdown = markdown.replace("~~", "</s>", 1)

    # we don't want to touch dashes, so temporarily replace them here
    markdown = markdown.replace("-", "⸻")

    parser = Parser()
    ast = prepare(parser.parse(markdown))

    format = set()

    notion = []

    for section in ast:

        _, ended_format = _extract_text_and_format_from_ast(section)
        if ended_format and ended_format in format:
            format.remove(ended_format)

        if section["type"] == "paragraph":
            notion.append(["\n\n"])

        for item in section.get("children", []):

            literal, new_format = _extract_text_and_format_from_ast(item)

            if new_format:
                format.add(new_format)

            if item["type"] == "html_inline" and literal == "</s>":
                format.remove(("s", ))
                literal = ""

            if item["type"] == "softbreak":
                literal = "\n"

            if literal:
                notion.append([literal, [list(f) for f in sorted(format)]]
                              if format else [literal])

            # in the ast format, code blocks are meant
            # to be immediately self-closing
            if ("c", ) in format:
                format.remove(("c", ))

    # remove any trailing newlines from automatic closing paragraph markers
    if notion:
        notion[-1][0] = notion[-1][0].rstrip("\n")

    # consolidate any adjacent text blocks with identical styles
    consolidated = []
    for item in notion:
        if consolidated and _get_format(
                consolidated[-1], as_set=True) == _get_format(item,
                                                              as_set=True):
            consolidated[-1][0] += item[0]
        elif item[0]:
            consolidated.append(item)

    return _cleanup_dashes(consolidated)
Пример #9
0
import os
import re
import sys
import urllib.request
from pprint import pprint
from commonmark import Parser
from commonmark.node import NodeWalker
from yarl import URL

from recipemd._vendor.commonmark_extensions.plaintext import CommonMarkToCommonMarkRenderer
from recipemd.data import RecipeParser, RecipeSerializer
from unidecode import unidecode

root_path = os.path.realpath('.')

commonmark_parser = Parser()
commonmark_renderer = CommonMarkToCommonMarkRenderer()

recipe_parser = RecipeParser()
recipe_serializer = RecipeSerializer()


def urlopen_user_agent(url: str):
    request = urllib.request.Request(
        url, None, {
            'User-Agent':
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        })
    return urllib.request.urlopen(request)

Пример #10
0
from commonmark import Parser
from omnidoc.markdown import markdown_to_tree, _md_ast_children

example_md = """
# Heading 1

That is paragraph text.


## Subsection

Also awesome *text*
"""
parser = Parser()
example_md_ast = parser.parse(example_md)


def test_md_ast_get_children():
    children = _md_ast_children(example_md_ast)
    assert [x.t for x in children
            ] == ['heading', 'paragraph', 'heading', 'paragraph']


def test_markdown_to_tree():
    tree = markdown_to_tree(example_md_ast)
    print(tree.pretty())
    # TODO: write test


if __name__ == "__main__":
    test_markdown_to_tree()
Пример #11
0
    def add_jobs(self, *, queryset) -> None:
        metadata = MetadataListFieldWithEuraxess()
        md_parser = Parser()
        md_renderer = HtmlRenderer()

        for job in (queryset.filter(add_to_euraxess=True).select_related(
                "institution").prefetch_related("links", "institution__links",
                                                "project__programme")):
            el = etree.SubElement(self.root, "job-opportunity")
            el.set("organisationIDKey", self.organisation_id_key)
            el.set("lastmodifieddate", date_filter(job.updated_at, "c"))
            etree.SubElement(el, "job-id").text = str(job.id)

            # description

            desc = etree.SubElement(el, "description")
            etree.SubElement(desc, "job-title").text = job.title
            etree.SubElement(desc,
                             "job-description").text = md_renderer.render(
                                 md_parser.parse(job.description))

            job_topics = self.parse_topics(
                metadata.to_representation(job.topics))
            if len(job_topics) == 0:
                field = etree.SubElement(desc, "research-field")
                etree.SubElement(
                    field, "main-research-field").text = "Computer science"
                etree.SubElement(field, "sub-research-field").text = "Other"
            else:
                for topic in job_topics:
                    field = etree.SubElement(desc, "research-field")
                    etree.SubElement(
                        field, "main-research-field").text = "Computer science"
                    etree.SubElement(field, "sub-research-field").text = topic

            researcher_profiles = self.parse_topics(
                metadata.to_representation(job.career_levels))
            if len(researcher_profiles) == 0:
                etree.SubElement(
                    desc, "researcher-profile"
                ).text = "Established Researcher (R3)"  # TODO: check default
            else:
                for profile in researcher_profiles:
                    etree.SubElement(desc, "researcher-profile").text = profile

            etree.SubElement(desc, "type-of-contract").text = "To be defined"
            etree.SubElement(desc, "job-status").text = "Negotiable"
            etree.SubElement(desc, "application-deadline").text = date_filter(
                datetime.combine(job.deadline, datetime.min.time()), "c")

            # additional-information

            extra_info = etree.SubElement(el, "additional-information")
            etree.SubElement(
                extra_info, "info-website"
            ).text = f"https://www.hipeac.net{job.get_absolute_url()}"

            # eu-funding

            eu_funding = etree.SubElement(el, "eu-funding")
            if job.project and job.project.programme:
                etree.SubElement(eu_funding, "framework-programme"
                                 ).text = job.project.programme.euraxess_value
            else:
                etree.SubElement(eu_funding, "framework-programme").text = "No"

            # work-location

            location = etree.SubElement(el, "work-location")
            etree.SubElement(location,
                             "nr-job-positions").text = str(job.positions)
            etree.SubElement(
                location,
                "job-organisation-institute").text = job.institution.name
            etree.SubElement(location, "job-country").text = job.country.name
            etree.SubElement(location, "job-city").text = job.location

            # hiring-org-inst

            organisation_type = {
                Institution.UNIVERSITY: "Higher Education Institute",
                Institution.LAB: "Research Laboratory",
                Institution.INNOVATION: "Public Research Institution",
                Institution.INDUSTRY: "Large Company",
                Institution.SME: "Small Medium Enterprise, Start-up",
                Institution.OTHER: "Other",
            }[job.institution.type]

            institution = etree.SubElement(el, "hiring-org-inst")
            etree.SubElement(
                institution,
                "organisation-institute").text = job.institution.name
            etree.SubElement(
                institution,
                "organisation-institute-type").text = organisation_type
            etree.SubElement(institution,
                             "country").text = job.institution.country.name
            if job.institution.location:
                etree.SubElement(institution,
                                 "city").text = job.institution.location
            if job.institution.recruitment_email:
                etree.SubElement(
                    institution,
                    "e-mail").text = job.institution.recruitment_email
            for link in job.institution.links.all():
                etree.SubElement(institution, "website").text = link.url

            # application-details

            application_website = None

            for link in job.links.all():
                if link.type == "website":
                    application_website = link.url

            if application_website:
                application_details = etree.SubElement(el,
                                                       "application-details")
                etree.SubElement(application_details,
                                 "how-to-apply").text = "website"
                etree.SubElement(
                    application_details,
                    "application-website").text = application_website
            elif job.email:
                application_details = etree.SubElement(el,
                                                       "application-details")
                etree.SubElement(application_details,
                                 "how-to-apply").text = "e-mail"
                etree.SubElement(application_details,
                                 "application-email").text = job.email