示例#1
0
def markdown_to_notion(markdown):

    if not isinstance(markdown, str):
        markdown = str(markdown)

    # commonmark doesn't support strikethrough, so we need to handle it ourselves
    while markdown.count("~~") >= 2:
        markdown = markdown.replace("~~", "<s>", 1)
        markdown = markdown.replace("~~", "</s>", 1)

    parser = commonmark.Parser()
    ast = prepare(parser.parse(markdown))
    
    format = set()

    notion = []

    for section in ast:

        _, ended_format = _extract_text_and_format_from_ast(section)
        if ended_format and ended_format in format:
            format.remove(ended_format)

        if section["type"] == "paragraph":
            notion.append(["\n\n"])

        for item in section.get("children", []):

            literal, new_format = _extract_text_and_format_from_ast(item)

            if new_format:
                format.add(new_format)

            if item["type"] == "html_inline" and literal == "</s>":
                format.remove(("s",))
                literal = ""

            if item["type"] == "softbreak":
                literal = "\n"

            if literal:
                notion.append([literal, [list(f) for f in sorted(format)]] if format else [literal])

            # in the ast format, code blocks are meant to be immediately self-closing
            if ("c",) in format:
                format.remove(("c",))

    # remove any trailing newlines from automatic closing paragraph markers
    if notion:
        notion[-1][0] = notion[-1][0].rstrip("\n")

    # consolidate any adjacent text blocks with identical styles
    consolidated = []
    for item in notion:
        if consolidated and _get_format(consolidated[-1], as_set=True) == _get_format(item, as_set=True):
            consolidated[-1][0] += item[0]
        elif item[0]:
            consolidated.append(item)

    return consolidated
示例#2
0
文件: markdown.py 项目: say/notion-py
def markdown_to_notion(markdown):

    if not isinstance(markdown, str):
        markdown = str(markdown)

    # commonmark doesn't support strikethrough, so we need to handle it ourselves
    while markdown.count("~~") >= 2:
        markdown = markdown.replace("~~", "<s>", 1)
        markdown = markdown.replace("~~", "</s>", 1)

    # commonmark doesn't support latex blocks, so we need to handle it ourselves
    def handle_latex(match):
        return f'<latex equation="{html.escape(match.group(0)[2:-2])}">\u204d</latex>'

    markdown = re.sub(
        r'(?<!\\\\|\$\$)(?:\\\\)*((\$\$)+)(?!(\$\$))(.+?)(?<!(\$\$))\1(?!(\$\$))',
        handle_latex, markdown)

    # we don't want to touch dashes, so temporarily replace them here
    markdown = markdown.replace("-", "⸻")

    parser = commonmark.Parser()
    ast = prepare(parser.parse(markdown))

    format = set()

    notion = []

    for section in ast:

        _, ended_format = _extract_text_and_format_from_ast(section)
        if ended_format and ended_format in format:
            format.remove(ended_format)

        if section["type"] == "paragraph":
            notion.append(["\n\n"])

        for item in section.get("children", []):

            literal, new_format = _extract_text_and_format_from_ast(item)

            if new_format:
                format.add(new_format)

            if item["type"] == "html_inline" and literal == "</s>":
                format.remove(("s", ))
                literal = ""

            if item["type"] == "html_inline" and literal == "</latex>":
                for f in filter(lambda f: f[0] == 'e', format):
                    format.remove(f)
                    break
                literal = ""

            if item["type"] == "softbreak":
                literal = "\n"

            if literal:
                notion.append([literal, [list(f) for f in sorted(format)]]
                              if format else [literal])

            # in the ast format, code blocks are meant to be immediately self-closing
            if ("c", ) in format:
                format.remove(("c", ))

    # remove any trailing newlines from automatic closing paragraph markers
    if notion:
        notion[-1][0] = notion[-1][0].rstrip("\n")

    # consolidate any adjacent text blocks with identical styles
    consolidated = []
    for item in notion:
        if consolidated and _get_format(
                consolidated[-1], as_set=True) == _get_format(item,
                                                              as_set=True):
            consolidated[-1][0] += item[0]
        elif item[0]:
            consolidated.append(item)

    return cleanup_dashes(consolidated)
示例#3
0
def markdown_to_notion(markdown: str) -> list:
    """
    Convert Markdown formatted string to Notion.


    Arguments
    ---------
    markdown : str
        Text to convert.


    Returns
    -------
    list of Block
        Blocks converted from input.
    """

    # commonmark doesn't support strikethrough,
    # so we need to handle it ourselves
    while markdown.count("~~") >= 2:
        markdown = markdown.replace("~~", "<s>", 1)
        markdown = markdown.replace("~~", "</s>", 1)

    # we don't want to touch dashes, so temporarily replace them here
    markdown = markdown.replace("-", "⸻")

    parser = Parser()
    ast = prepare(parser.parse(markdown))

    format = set()

    notion = []

    for section in ast:

        _, ended_format = _extract_text_and_format_from_ast(section)
        if ended_format and ended_format in format:
            format.remove(ended_format)

        if section["type"] == "paragraph":
            notion.append(["\n\n"])

        for item in section.get("children", []):

            literal, new_format = _extract_text_and_format_from_ast(item)

            if new_format:
                format.add(new_format)

            if item["type"] == "html_inline" and literal == "</s>":
                format.remove(("s", ))
                literal = ""

            if item["type"] == "softbreak":
                literal = "\n"

            if literal:
                notion.append([literal, [list(f) for f in sorted(format)]]
                              if format else [literal])

            # in the ast format, code blocks are meant
            # to be immediately self-closing
            if ("c", ) in format:
                format.remove(("c", ))

    # remove any trailing newlines from automatic closing paragraph markers
    if notion:
        notion[-1][0] = notion[-1][0].rstrip("\n")

    # consolidate any adjacent text blocks with identical styles
    consolidated = []
    for item in notion:
        if consolidated and _get_format(
                consolidated[-1], as_set=True) == _get_format(item,
                                                              as_set=True):
            consolidated[-1][0] += item[0]
        elif item[0]:
            consolidated.append(item)

    return _cleanup_dashes(consolidated)