Exemplo n.º 1
0
def colorize(doc):
    doc = copy.deepcopy(doc)

    # Replace rules with empty level 2 headers
    rules = []
    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, HorizontalRule):
            holder, index = path[-1]
            rules.append((elt, holder, index))
    for _, holder, index in reversed(rules):
        empty_header = Header(2, ("", [], []), [])
        holder[index] = empty_header

    # Colors
    color = None
    for block in doc[1]:
        if isinstance(block, Header):
            header = block
            lvl, attr, inlines = header[:]
            if lvl == 2:
                id_, classes, kvs = attr
                title = pandoc.write(inlines)
                for emoji in COLOR_THEME:
                    if emoji in title:
                        color = f"{COLOR_THEME[emoji]}"
                        break
                if color is not None:
                    kvs.append(("data-background-color", color))

    return doc
Exemplo n.º 2
0
def notebookify(doc):
    from pandoc.types import Pandoc, Meta, CodeBlock

    notebook = Notebook()
    cells = notebook["cells"]
    blocks = doc[1]
    # print(blocks)
    execution_count = 1

    for block in blocks:
        if isinstance(block, CodeBlock):
            source = block[1]
            code_cell = CodeCell()
            code_cell["source"] = source
            code_cell["execution_count"] = execution_count
            execution_count += 1
            cells.append(code_cell)
        else:
            wrapper = Pandoc(Meta({}), [block])
            options = ["-t", "markdown-smart"]  # needed for en-dashes for
            # example: we don't expect Jupyter cells to be smart, so we
            # *disable* the smart output so that '–' won't get represented
            # as '--'.
            source = pandoc.write(wrapper, options=options)

            merge_markdown = False
            if (merge_markdown and len(cells) >= 1
                    and cells[-1]["cell_type"] == "markdown"):
                cells[-1]["source"] += "\n" + source
            else:
                markdown_cell = MarkdownCell()
                markdown_cell["source"] = source
                cells.append(markdown_cell)
    return notebook
Exemplo n.º 3
0
def print_sections(doc):
    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, Header):
            header = elt
            level, attr, inlines = header[:]
            minidoc = Pandoc(Meta({}), [Plain(inlines)])
            title = pandoc.write(minidoc)
            depth = len(
                [holder for holder, index in path if isinstance(holder, Div)])
            print(str(depth) + "> " + depth * 4 * " " + title, end="")
Exemplo n.º 4
0
 def round_trip_check(self, json_doc):
     json_doc_2 = None
     try:
         doc = pandoc.read(json_doc)
         json_doc_2 = pandoc.write(doc)
     except Exception as error:
         jsond_doc_2 = error
     if json_doc == json_doc_2 :
         return True
     else:
         return json_doc, json_doc_2
Exemplo n.º 5
0
 def to_pandoc(self):
     rich_text_ast = self.rich_text.to_pandoc()
     rich_text_html = pandoc.write(rich_text_ast, format='html')
     # TODO: handle citations
     return RawBlock(
         Format('html'), "\n".join([
             '<blockquote><p>',
             rich_text_html.rstrip(),
             '</p></blockquote>',
             '',
         ]))
Exemplo n.º 6
0
def handle_typed_sections(doc):
    types = {
        "theorem": "Théorème",
        "definition": "Définition",
        "lemma": "Lemme",
        "proposition": "Proposition"
    }
    levels = {
        1: "section",
        2: "subsection",
        3: "subsubsection",
        4: "paragraph",
        5: "subparagraph"
    }
    todos = []
    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, Header):
            header = elt
            level, attr, inlines = header
            id_, classes, kvs = attr
            shared = [type_ for type_ in classes if type_ in types]
            if shared:
                classes.extend(["unnumbered", "unlisted"])
                holder, index = path[-1]
                minidoc = Pandoc(Meta(map()), [Para(inlines)])
                latex_title = pandoc.write(minidoc, format="latex").strip()
                todos.append([holder, index, level, latex_title])
                type_ = shared[0]
                inlines = [Str(types[type_]),
                           Space(),
                           Str("–"),
                           Space()] + inlines
                header[2] = inlines
    for holder, index, level, latex_title in todos:
        latex_code = r"\addcontentsline{toc}{" + levels[
            level] + "}{" + latex_title + "}"
        holder.insert(index + 1, Para([RawInline("latex", latex_code)]))

    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, (RawInline, RawBlock)):
            format = elt[0]
            if format[0] == "html":
                holder, i = path[-1]
                found.insert(0, (holder, i))
Exemplo n.º 7
0
doc = transform(doc)

# Code and Doctest
code = generate_code(doc)
if code is not None:
    #print("code:\n\n", code)
    with open(doc_py, "w") as py_file:
        py_file.write(code)
    python("-m", "doctest", doc_py)
    try:
        shutil.rmtree(output /
                      "__pycache__")  # otherwise, top build has issues.
    except:
        pass

pandoc.write(doc, file=doc_pdf, options=PDF_options)

# PDF Output (Print)
pandoc.write(doc, file=doc_pdf_print, options=PDF_PRINT_options)

# LaTeX Output
pandoc.write(doc, file=doc_tex, options=TEX_options)
gl = lambda pattern: list(images.glob(pattern))
image_filenames = gl("*.pdf") + gl("*.png") + gl("*.jpg")
for image in image_filenames:
    shutil.copy(image, output_latex_images)
shutil.make_archive(str(output_latex), "zip", str(output_latex))
shutil.rmtree(str(output_latex))
#pandoc.write(doc, format="html5", file=doc_html, options=HTML_options)
#pandoc.write(doc, format="odt", file=doc_odt, options=ODT_options)
Exemplo n.º 8
0
    for action, index, contents in reversed(divs):
        del root[index]
        if action == "unpack":
            root[index:index] = contents

    return doc


slides_doc = make_slides_doc(doc)

options = [
    "--standalone", "-V", "theme:white", "--mathjax", "-V", "slideNumber:true"
]

pandoc.write(slides_doc,
             file=doc_name + ".html",
             format="revealjs",
             options=options)

# Notebook Generation
# ------------------------------------------------------------------------------

# Two issues here: for one some Header stuff flagged 'slides'
# should be removed from the notebook output but isn't. Mmmmmm shit.
# Second, deeper issue: Headers are NOT the holders of the elements
# that follow, so the algorithm needs to be smarter and identify the
# corresponding content.
# For now, short term: squash two issues by using only divs in the
# document to deal with conditional content.
# Ouch: the reveal target doesn't like divs very much (overlap
# and / or no newpage). Can we solve this by unpacking slides divs ?
Exemplo n.º 9
0
Arquivo: build.py Projeto: giliam/CDIS
options += ["-V", "lang=fr"]
options += ["--table-of-contents"]
if bibliography.exists():
    options += [
        "--bibliography=bibliography.json", "-M", "link-citations=true"
    ]
TEX_options = options.copy()
PDF_options = options.copy()

# To use package titlesec, see <https://stackoverflow.com/questions/42916124/not-able-to-use-titlesec-with-markdown-and-pandoc>
# Update: titlesec is off limit anyway with pandoc,
# as it is not compatible with hyperref
# PDF_options += ["--variable", "subparagraph"]
ODT_options = options.copy()
HTML_options = options.copy()
HTML_options += ["--mathjax"]

# PDF Output
doc = pandoc.read(file=doc_md)
doc = transform(doc)
pandoc.write(doc, file=doc_pdf, options=PDF_options)
pandoc.write(doc, file=doc_tex, options=TEX_options)
gl = lambda pattern: list(images.glob(pattern))
image_filenames = gl("*.pdf") + gl("*.png") + gl("*.jpg")
for image in image_filenames:
    shutil.copy(image, output_latex_images)
shutil.make_archive(str(output_latex), "zip", str(output_latex))
shutil.rmtree(str(output_latex))
#pandoc.write(doc, format="html5", file=doc_html, options=HTML_options)
#pandoc.write(doc, format="odt", file=doc_odt, options=ODT_options)
Exemplo n.º 10
0
    error = "cannot identify the main document "
    error += f"(found {len(docs)} markdown files)"
    raise RuntimeError(error)
doc = _docs[0] 
doc_md = doc + ".md"
doc_pdf = str(output / (doc + ".pdf"))
doc_odt = str(output / (doc + ".odt"))
doc_html = str(output / (doc + ".html"))
doc_md_md = str(output / (doc + ".md"))

# Doctest
python("-m", "doctest", doc_md)

# Pandoc Options
options = ["--standalone"]
options += ["-V", "lang=fr"]
options += ["--table-of-contents"]
if bibliography.exists():
    options += ["--bibliography=bibliography.json", "-M", "link-citations=true"]
PDF_options = options.copy()
ODT_options = options.copy()
HTML_options = options.copy()
HTML_options += ["--mathjax"]

# PDF Output
doc = pandoc.read(file=doc_md)
doc = transform(doc)
pandoc.write(doc, file=doc_pdf, options=PDF_options)
pandoc.write(doc, format="html5", file=doc_html, options=HTML_options)
pandoc.write(doc, format="odt", file=doc_odt, options=ODT_options)
Exemplo n.º 11
0
def notebookify(doc):
    from pandoc.types import Pandoc, Meta, CodeBlock, Header, Para, Str, Space

    notebook = Notebook()
    cells = notebook["cells"]
    blocks = doc[1]
    # execution_count = 1

    metamap = doc[0][0]
    hero_title = [
        Str("Control"),
        Space(),
        Str("Engineering"),
        Space(),
        Str("with"),
        Space(),
        Str("Python"),
    ]
    title = metamap["title"][0]
    author = metamap["author"][0][0][0]

    header = Pandoc(
        Meta({}),
        [
            Header(1, ("", [], []), hero_title),
            Header(1, ("", [], []), title),
            Para(author),
        ],
    )

    header_cell = MarkdownCell()
    header_cell["source"] = pandoc.write(header)
    cells.append(header_cell)

    for block in blocks:
        if isinstance(block, CodeBlock):
            source = block[1]
            code_cell = CodeCell()
            code_cell["source"] = source
            code_cell["execution_count"] = None  # execution_count
            # execution_count += 1
            cells.append(code_cell)
        else:
            wrapper = Pandoc(Meta({}), [block])
            options = ["-t", "markdown-smart-raw_attribute-simple_tables"]
            # markdown-smart-raw_attribute variant
            # ------------------------------------------------------------------
            # -smart needed for en-dashes for example: we don't expect Jupyter
            # cells to be smart, so we *disable* the smart output so that
            # '–' won't get represented as '--'. Doesn't work in metadata (?)
            # -raw_attribute so that raw html is output as HTML, not as
            # the non-standard markdown syntax `<p>Hello</p>`{=html} that
            # the Jupyter notebooks do not understand.
            # ------------------------------------------------------------------
            # UPDATE: replace this markdown variant by github-flavored
            # markdown (for example to get a proper rendering of tables
            # in notebooks). Arf, no, would f**k up the math. Need to
            # find selectively what kind of tables are allowed.
            # UPDATE: ok, the removal of simple_tables works.
            source = pandoc.write(wrapper, options=options)

            merge_markdown = False
            if (merge_markdown and len(cells) >= 1
                    and cells[-1]["cell_type"] == "markdown"):
                cells[-1]["source"] += "\n" + source
            else:
                markdown_cell = MarkdownCell()
                markdown_cell["source"] = source
                cells.append(markdown_cell)
    return notebook
Exemplo n.º 12
0
from ics import *
import pandoc

# ------------------------------------------------------------------------------

url = "https://calendar.google.com/calendar/ical/o1rahvtc75kj2qcc5tmsrfr6e0%40group.calendar.google.com/public/basic.ics"

calendar = Calendar(urlopen(url).read().decode("utf-8"))

# assert len(calendar.events) == 60
# relax this; we have more slots with the exam of EC2 now.

events = list(calendar.events)
events.sort()

for i, event in enumerate(events):
    print(f"{i+1:2d}) {event.name}")
    assert event.begin.date() == event.end.date()
    #assert event.duration.seconds == 1.5 * 3600
    begin = event.begin.to("Europe/Paris")
    end = event.end.to("Europe/Paris")
    print("    " + begin.format("dddd DD MMMM YYYY", locale="fr_FR"), end=", ")
    print(begin.format("HH:mm") + "-" + end.format("HH:mm") + ".")
    description = event.description
    if event.description:
        doc = pandoc.read(event.description, format="html")
        print(pandoc.write(doc, format="markdown"))
        #description = "\n".join(["    " + line for line in description.splitlines()])
        #description = "    " + description.strip()
        #print(description)
    print()
Exemplo n.º 13
0
def handle_typed_sections(doc):
    types = {
        "theorem": "Théorème",
        "corollary": "Corollaire",
        "definition": "Définition",
        "lemma": "Lemme",
        "proposition": "Proposition",
        "remark": "Remarque",
        "exercise": "Exercice",
        "example": "Exemple",
        "question": None,
        "answer": None,
    }
    levels = {
        1: "section",
        2: "subsection",
        3: "subsubsection",
        4: "paragraph",
        5: "subparagraph",
    }
    todos = []
    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, Header):
            header = elt
            level, attr, inlines = header
            id_, classes, kvs = attr
            shared = [type_ for type_ in classes if type_ in types]
            if shared:
                classes.extend(["unnumbered", "unlisted"])
                holder, index = path[-1]
                minidoc = Pandoc(Meta(map()), [Para(inlines)])
                latex_title = pandoc.write(minidoc, format="latex").strip()
                todos.append([holder, index, level, latex_title])
                type_ = shared[0]
                if types[type_]:
                    inlines = [Str(types[type_]),
                               Space(),
                               Str("–"),
                               Space()] + inlines
                if "exercise" in classes or "question" in classes:
                    if "zero" in classes:
                        inlines += [
                            Space(),
                            Str("("),
                            Math(InlineMath(),
                                 r"\mathord{\boldsymbol{\circ}}"),
                            Str(")"),
                        ]
                    if "one" in classes:
                        inlines += [
                            Space(),
                            Str("("),
                            Math(InlineMath(), r"\mathord{\bullet}"),
                            Str(")"),
                        ]
                    if "two" in classes:
                        inlines += [
                            Space(),
                            Str("("),
                            Math(InlineMath(), r"\mathord{\bullet}" * 2),
                            Str(")"),
                        ]
                    if "three" in classes:
                        inlines += [
                            Space(),
                            Str("("),
                            Math(InlineMath(), r"\mathord{\bullet}" * 3),
                            Str(")"),
                        ]
                    if "four" in classes:
                        inlines += [
                            Space(),
                            Str("("),
                            Math(InlineMath(), r"\mathord{\bullet}" * 4),
                            Str(")"),
                        ]
                    # r"\mathord{\pmb{\infty}}"
                header[2] = inlines
    for holder, index, level, latex_title in todos:
        latex_code = (r"\addcontentsline{toc}{" + levels[level] + "}{" +
                      latex_title + "}")
        holder.insert(index + 1, Para([RawInline("latex", latex_code)]))

    for elt, path in pandoc.iter(doc, path=True):
        if isinstance(elt, (RawInline, RawBlock)):
            format = elt[0]
            if format[0] == "html":
                holder, i = path[-1]
                found.insert(0, (holder, i))
Exemplo n.º 14
0
doc = transform(doc)

# Code and Doctest
code = generate_code(doc)
if code is not None:
    # print("code:\n\n", code)
    with open(doc_py, "w") as py_file:
        py_file.write(code)
    python("-m", "doctest", doc_py)
    try:
        shutil.rmtree(output /
                      "__pycache__")  # otherwise, top build has issues.
    except:
        pass

pandoc.write(doc, file=doc_pdf, options=PDF_options)
pandoc.write(doc, file=doc_md_md, options=PDF_options)

# PDF Output (Print)
pandoc.write(doc, file=doc_pdf_print, options=PDF_PRINT_options)

# LaTeX Output
pandoc.write(doc, file=doc_tex, options=TEX_options)
gl = lambda pattern: list(images.glob(pattern))
image_filenames = gl("*.pdf") + gl("*.png") + gl("*.jpg")
for image in image_filenames:
    shutil.copy(image, output_latex_images)
shutil.make_archive(str(output_latex), "zip", str(output_latex))
shutil.rmtree(str(output_latex))
# pandoc.write(doc, format="html5", file=doc_html, options=HTML_options)
# pandoc.write(doc, format="odt", file=doc_odt, options=ODT_options)
Exemplo n.º 15
0
def notebookify(doc):
    from pandoc.types import Pandoc, Meta, CodeBlock, Header, Para, Str, Space

    notebook = Notebook()
    cells = notebook["cells"]
    blocks = doc[1]
    # execution_count = 1

    metamap = doc[0][0]
    hero_title = [
        Str("Control"),
        Space(),
        Str("Engineering"),
        Space(),
        Str("with"),
        Space(),
        Str("Python"),
    ]
    title = metamap["title"][0]
    author = metamap["author"][0][0][0]

    header = Pandoc(
        Meta({}),
        [
            Header(1, ("", [], []), hero_title),
            Header(1, ("", [], []), title),
            Para(author),
        ],
    )

    header_cell = MarkdownCell()
    header_cell["source"] = pandoc.write(header)
    cells.append(header_cell)

    for block in blocks:
        if isinstance(block, CodeBlock):
            source = block[1]
            code_cell = CodeCell()
            code_cell["source"] = source
            code_cell["execution_count"] = None  # execution_count
            # execution_count += 1
            cells.append(code_cell)
        else:
            wrapper = Pandoc(Meta({}), [block])
            options = ["-t", "markdown-smart-raw_attribute"]
            # -smart needed for en-dashes for example: we don't expect Jupyter
            # cells to be smart, so we *disable* the smart output so that
            # '–' won't get represented as '--'.
            # -raw_attribute so that raw html is output as HTML, not as
            # the non-standard markdown syntax `<p>Hello</p>`{=html} that
            # the Jupyter notebooks do not understand.
            source = pandoc.write(wrapper, options=options)

            merge_markdown = False
            if (merge_markdown and len(cells) >= 1
                    and cells[-1]["cell_type"] == "markdown"):
                cells[-1]["source"] += "\n" + source
            else:
                markdown_cell = MarkdownCell()
                markdown_cell["source"] = source
                cells.append(markdown_cell)
    return notebook
Exemplo n.º 16
0
    "Remove everything before the first real paragraph"
    blocks = doc[1]
    for i, block in enumerate(blocks):
        if isinstance(block, Para):
            para = block
            inlines = para[0]
            if len(inlines) > 0 and isinstance(inlines[0], Str):
                break
    doc[1] = blocks[i:]
    return doc


# Simplify
# ------------------------------------------------------------------------------
def simplify(doc):
    doc = unpack_divs(doc)
    # doc = unpack_divs_2(doc)
    doc = remove_preamble(doc)
    return doc


# Entry Point
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    url = 'https://pandoc.org/getting-started.html'
    src = urllib.request.urlopen(url).read()
    doc = pandoc.read(src, format="html")
    doc = simplify(doc)
    print(pandoc.write(doc, format="markdown", options=["-s"]))