def markdown_to_notion(markdown): if not isinstance(markdown, str): markdown = str(markdown) # commonmark doesn't support strikethrough, so we need to handle it ourselves while markdown.count("~~") >= 2: markdown = markdown.replace("~~", "<s>", 1) markdown = markdown.replace("~~", "</s>", 1) parser = commonmark.Parser() ast = prepare(parser.parse(markdown)) format = set() notion = [] for section in ast: _, ended_format = _extract_text_and_format_from_ast(section) if ended_format and ended_format in format: format.remove(ended_format) if section["type"] == "paragraph": notion.append(["\n\n"]) for item in section.get("children", []): literal, new_format = _extract_text_and_format_from_ast(item) if new_format: format.add(new_format) if item["type"] == "html_inline" and literal == "</s>": format.remove(("s",)) literal = "" if item["type"] == "softbreak": literal = "\n" if literal: notion.append([literal, [list(f) for f in sorted(format)]] if format else [literal]) # in the ast format, code blocks are meant to be immediately self-closing if ("c",) in format: format.remove(("c",)) # remove any trailing newlines from automatic closing paragraph markers if notion: notion[-1][0] = notion[-1][0].rstrip("\n") # consolidate any adjacent text blocks with identical styles consolidated = [] for item in notion: if consolidated and _get_format(consolidated[-1], as_set=True) == _get_format(item, as_set=True): consolidated[-1][0] += item[0] elif item[0]: consolidated.append(item) return consolidated
def markdown_to_notion(markdown): if not isinstance(markdown, str): markdown = str(markdown) # commonmark doesn't support strikethrough, so we need to handle it ourselves while markdown.count("~~") >= 2: markdown = markdown.replace("~~", "<s>", 1) markdown = markdown.replace("~~", "</s>", 1) # commonmark doesn't support latex blocks, so we need to handle it ourselves def handle_latex(match): return f'<latex equation="{html.escape(match.group(0)[2:-2])}">\u204d</latex>' markdown = re.sub( r'(?<!\\\\|\$\$)(?:\\\\)*((\$\$)+)(?!(\$\$))(.+?)(?<!(\$\$))\1(?!(\$\$))', handle_latex, markdown) # we don't want to touch dashes, so temporarily replace them here markdown = markdown.replace("-", "⸻") parser = commonmark.Parser() ast = prepare(parser.parse(markdown)) format = set() notion = [] for section in ast: _, ended_format = _extract_text_and_format_from_ast(section) if ended_format and ended_format in format: format.remove(ended_format) if section["type"] == "paragraph": notion.append(["\n\n"]) for item in section.get("children", []): literal, new_format = _extract_text_and_format_from_ast(item) if new_format: format.add(new_format) if item["type"] == "html_inline" and literal == "</s>": format.remove(("s", )) literal = "" if item["type"] == "html_inline" and literal == "</latex>": for f in filter(lambda f: f[0] == 'e', format): format.remove(f) break literal = "" if item["type"] == "softbreak": literal = "\n" if literal: notion.append([literal, [list(f) for f in sorted(format)]] if format else [literal]) # in the ast format, code blocks are meant to be immediately self-closing if ("c", ) in format: format.remove(("c", )) # remove any trailing newlines from automatic closing paragraph markers if notion: notion[-1][0] = notion[-1][0].rstrip("\n") # consolidate any adjacent text blocks with identical styles consolidated = [] for item in notion: if consolidated and _get_format( consolidated[-1], as_set=True) == _get_format(item, as_set=True): consolidated[-1][0] += item[0] elif item[0]: consolidated.append(item) return cleanup_dashes(consolidated)
def markdown_to_notion(markdown: str) -> list: """ Convert Markdown formatted string to Notion. Arguments --------- markdown : str Text to convert. Returns ------- list of Block Blocks converted from input. """ # commonmark doesn't support strikethrough, # so we need to handle it ourselves while markdown.count("~~") >= 2: markdown = markdown.replace("~~", "<s>", 1) markdown = markdown.replace("~~", "</s>", 1) # we don't want to touch dashes, so temporarily replace them here markdown = markdown.replace("-", "⸻") parser = Parser() ast = prepare(parser.parse(markdown)) format = set() notion = [] for section in ast: _, ended_format = _extract_text_and_format_from_ast(section) if ended_format and ended_format in format: format.remove(ended_format) if section["type"] == "paragraph": notion.append(["\n\n"]) for item in section.get("children", []): literal, new_format = _extract_text_and_format_from_ast(item) if new_format: format.add(new_format) if item["type"] == "html_inline" and literal == "</s>": format.remove(("s", )) literal = "" if item["type"] == "softbreak": literal = "\n" if literal: notion.append([literal, [list(f) for f in sorted(format)]] if format else [literal]) # in the ast format, code blocks are meant # to be immediately self-closing if ("c", ) in format: format.remove(("c", )) # remove any trailing newlines from automatic closing paragraph markers if notion: notion[-1][0] = notion[-1][0].rstrip("\n") # consolidate any adjacent text blocks with identical styles consolidated = [] for item in notion: if consolidated and _get_format( consolidated[-1], as_set=True) == _get_format(item, as_set=True): consolidated[-1][0] += item[0] elif item[0]: consolidated.append(item) return _cleanup_dashes(consolidated)