def get_meta(meta, name): """Retrieves the metadata variable `name` from the `meta` dict.""" assert name in meta data = meta[name] if data['t'] in ['MetaString', 'MetaBool']: return data['c'] if data['t'] == 'MetaInlines': # Handle bug in pandoc 2.2.3 and 2.2.3.1: Return boolean value rather # than strings, as appropriate. if len(data['c']) == 1 and data['c'][0]['t'] == 'Str': if data['c'][0]['c'] in ['true', 'True', 'TRUE']: return True if data['c'][0]['c'] in ['false', 'False', 'FALSE']: return False return stringify(data['c']) if data['t'] == 'MetaList': try: # Process MetaList of MetaMaps ret = [] for v in data['c']: assert v['t'] == 'MetaMap' entry = {} for key in v['c']: entry[key] = stringify(v['c'][key]) ret.append(entry) return ret except AssertionError: pass return [stringify(v['c']) for v in data['c']] if data['t'] == 'MetaMap': ret = {} for key in data['c']: ret[key] = stringify(data['c'][key]) return ret raise RuntimeError("Could not understand metadata variable '%s'." % name)
def url_filter(key, value, format_, meta): ''' Filter special links. If a link is of the form '!STRING', use the bang-expression to search DuckDuckGo. So for instance (with markdown) '[Fishmans](!w)' would search Wikipedia for "Fishmans". If a link is empty, like '[About me]()', then automatically link to the slug-form of the text; in this case, the link would be transformed to '[About me](./about-me)' (or whatever equivalent in the output format). ''' if key == 'Link': attr, txt, urllst = value url = urllst[0] # For debugging #with open('log.txt', 'w') as f: # f.write(str(value) + "\n") # f.write("txt: " + str(txt) + "\n") # f.write("url: " + str(url) + "\n") # f.write("attr: " + str(attr) + "\n") if url == "!w": url = "https://en.wikipedia.org/wiki/" + stringify(txt) elif url.startswith("!w%20"): url = "https://en.wikipedia.org/wiki/" + url[len("!w%20"):] elif url == "!wja": url = "https://ja.wikipedia.org/wiki/" + stringify(txt) elif url.startswith("!wja%20"): url = "https://ja.wikipedia.org/wiki/" + url[len("!wja%20"):] elif url.startswith("!"): url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt) elif url == '': # So we want to internally link txt url = slug(stringify(txt)) url = "./" + url urllst = [url, urllst[1]] return Link(attr, txt, urllst)
def structure_header(v, f, m): global lastsection, lastsubsection if v[0] == 1: lastsection = pf.stringify(v[2]) lastsubsection = '' elif v[0] == 2: lastsubsection = pf.stringify(v[2])
def get_runlist(metadata, kind, options): """ return run list for kind from metadata """ runlist = list() # - return empty list unless entries of kind are in metadata try: metadata_list = get_content(metadata, kind, 'MetaList') except (error.WrongType, error.MissingField) as err: info.log('WARNING', 'panzer', err) return runlist for item in metadata_list: check_c_and_t_exist(item) item_content = item[const.C] # - create new entry entry = dict() entry['kind'] = kind entry['command'] = str() entry['status'] = const.QUEUED # - get entry command command_raw = get_content(item_content, 'run', 'MetaInlines') command_str = pandocfilters.stringify(command_raw) entry['command'] = util.resolve_path(command_str, kind, options) # - get entry arguments entry['arguments'] = list() if 'args' in item_content: if get_type(item_content, 'args') == 'MetaInlines': # - arguments raw string arguments_raw = get_content(item_content, 'args', 'MetaInlines') arguments_str = pandocfilters.stringify(arguments_raw) entry['arguments'] = shlex.split(arguments_str) elif get_type(item_content, 'args') == 'MetaList': # - arguments MetaList arguments_list = get_content(item_content, 'args', 'MetaList') entry['arguments'] = get_runlist_args(arguments_list) runlist.append(entry) return runlist
def url_filter(key, value, format_, meta): ''' Filter special links. If a link is of the form '!STRING', use the bang-expression to search DuckDuckGo. So for instance (with markdown) '[Fishmans](!w)' would search Wikipedia for "Fishmans". If a link is empty, like '[About me]()', then automatically link to the slug-form of the text; in this case, the link would be transformed to '[About me](./about-me)' (or whatever equivalent in the output format). ''' if key == 'Link': [txt, [url, attr]] = value if url == "!w": url = "https://en.wikipedia.org/wiki/" + stringify(txt) elif url.startswith("!w%20"): url = "https://en.wikipedia.org/wiki/" + url[len("!w%20"):] elif url == "!wja": url = "https://ja.wikipedia.org/wiki/" + stringify(txt) elif url.startswith("!wja%20"): url = "https://ja.wikipedia.org/wiki/" + url[len("!wja%20"):] elif url.startswith("!"): url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt) elif url == '': # So we want to internally link txt url = slugify_unicode(stringify(txt), to_lower=True) url = "./" + url return Link(txt, [url, attr])
def bidding_divs(key, value, fmt, meta): global prev_key if key == 'BulletList' and prev_key != pf.stringify(value[0]): prev_key = pf.stringify(value[0]) if is_bidding(value): attr = ('', ['bids'], []) return pf.Div(attr, [pf.BulletList(value)]) return None
def _para_attr(self, para): 'extract and return attributes from para (if any)' # An attribute Para starts with a known attribute: keyword # - parameter para is the 'c'-val from {'t': 'Para', 'c': [...]} # - list of words may span multiple lines # - explanation: is the only attr whose value is markdown text. attrs = {} # attrs{attr} -> sub-ast # check if para starts with an attribute keyword, if not return {} try: if para[0]['t'] != 'Str': return attrs attr = para[0]['c'].lower() if attr not in self.ATTR_KEYWORDS: return attrs except Exception as e: log.debug('cannot retrieve attributes from para: %s', repr(e)) return attrs # collect subast per attribute in ATTR_KEYWORDS ptr = attrs.setdefault(attr, []) for key, val in PandocAst(para).tokens: if key != 'Str': ptr.append(as_block(key, val)) # append non-Str to ptr continue attr = val.lower() if attr in self.ATTR_KEYWORDS: ptr = attrs.setdefault(attr, []) # keyword: starts new ptr continue ptr.append(as_block(key, val)) # otherwise, append to ptr # process known attributes for attr, subast in attrs.items(): if attr not in self.ATTR_KEYWORDS: continue if attr == 'explanation:': xpl = PandocAst(as_ast('Para', subast)).convert('markdown') attrs[attr] = xpl.strip() elif attr == 'tags:': # list of words, possibly separated by spaces and/or comma's words = pf.stringify(subast).replace(',', ' ').lower() attrs[attr] = sorted(set(words.split())) elif attr == 'answer:': # list of letters or numbers, possibly separated by non-alnum's answers = pf.stringify(subast).lower() attrs[attr] = sorted(set(x for x in answers if x.isalnum())) elif attr == 'section:': # section: keyword for this question words = pf.stringify(subast).replace(',', ' ').lower() attrs[attr] = words.split()[0] # keep only 1st word # log any attributes found log.debug('found %d attributes: %s', len(attrs), attrs.keys()) return attrs
def header2box(key, value, format, meta): if format not in ["latex", "pdf", "native"]: return if key == 'Header' and value[0] == 1: return RawBlock('latex', template1 % stringify(value)) if key == 'Header' and value[0] == 2: return RawBlock('latex', template2 % stringify(value))
def main(): """docstring for main""" message_in = read_message() # read input ast meta = message_in[0]['metadata'] if 'metapub' not in meta: log('ERROR', 'No "metapub" metadata field in input document') return 1 # read metapub_file if 'metapub_file' not in meta: log('ERROR', 'No "metapub_file" metadata field in input document') return 1 metapub_file = stringify(meta['metapub_file']) log('DEBUG', 'reading from: ' + metapub_file) pubs = read_yaml(metapub_file) # find entry in metapub_file ident = stringify(meta['metapub']) log('DEBUG', 'looking for: ' + ident) entries = [e for e in pubs if ('uuid' in e and e['uuid'] == ident) \ or ('slug' in e and e['slug'] == ident)] if not entries: log( 'ERROR', 'Publication with id "%s" not found in "%s"' % (ident, metapub_file)) return 1 if len(entries) > 1: log( 'WARNING', 'More than 1 publication with id "%s" found in "%s"' % (ident, metapub_file)) entry = entries[0] dest = str() try: dest = entry['deploy']['path'] except KeyError: log('ERROR', '"path" field inside "deploy" field not found in metadata') return 1 if not dest: log('WARNING', '"path" field inside "deploy" empty') return 1 # copy the file source = message_in[0]['options']['pandoc']['output'] if source == '-': log('ERROR', 'no output filename for panzer given') return 1 source = os.path.splitext(source)[0] + '.pdf' log('INFO', 'source file: "%s"' % source) log('INFO', 'deploying to location "%s"' % dest) if not os.path.exists(source): log('ERROR', 'source file "%s" does not exist' % source) return 1 shutil.copyfile(source, dest) if os.path.exists(dest): open_pdf(dest) return 0
def crossreference(key, value, fmt, meta): if key == 'Image' and fmt == 'context': [caption, fig] = value strCap = re.findall(r'(?:\{(.*)\})? *(.*$)', stringify(caption)) caption = [ RawInline('context', re.sub(r'\{.*\} *', "", stringify(caption))) ] return [ context("\placefigure[here][%s]{%s}{\externalfigure[%s]}" % (strCap[0][0], strCap[0][1], fig[0])) ] if key == 'Table' and fmt == 'context': warning("start") warning(value) [caption, align, width, head, text] = value strCap = re.findall(r'(?:\{(.*)\})? *(.*$)', stringify(caption)) # todo width out = [] for i, col in enumerate(align): out.append( block("\\setupTABLE[column][%s][align={%s,lohi}]" % (i + 1, alignDict(col)))) out.append( block("\placetable[here][%s]{%s}{" % (strCap[0][0], strCap[0][1]))) out.append(block("\\bTABLE")) if not empty(head): out.append( block( "\setupTABLE[r][1][background=color,backgroundcolor=gray,style={\\tfa}]" )) out.append(block("\\bTR")) for h in head: out.append(block("\\bTH %s \\eTH" % (stringify(h)))) out.append(block("\\eTR")) else: out.append( block( "\setupTABLE[r][1][background=color,backgroundcolor=white,style={\\tfx}]" )) for row in text: out.append(block("\\bTR")) for cell in row: out.append(block("\\bTD %s \\eTD" % stringify(cell))) out.append(block("\\eTR")) out.append(block("\\eTABLE}")) return out
def figure_replacement(self, key, value, format, metadata): """Replace figures with appropriate representation and append info to the refdict. """ image = value[0] attr_string = pf.stringify(value[1:]) filename = image['c'][1][0] raw_caption = pf.stringify(image['c'][0]) attrs = attr_parser.parse(attr_string) label = attrs['id'] classes = attrs['classes'] keys = [(k, v) for k, v in attrs.items() if k not in ('id', 'classes')] class_str = 'class="{}"'.format(' '.join(classes)) if classes else '' key_str = ' '.join('{}={}'.format(k, v) for k, v in keys) self.refdict[label] = {'type': 'figure', 'id': self.figure_count} caption = 'Figure {n}: {caption}'.format(n=self.figure_count, caption=raw_caption) self.figure_count += 1 if format == 'markdown': figure = markdown_figure.format(id=label, caption=caption, filename=filename) return pf.Para([rawmarkdown(figure)]) elif format == 'html': figure = html_figure.format(id=label, classes=class_str, keys=key_str, filename=filename, alt=caption, caption=caption) return pf.Para([rawhtml(figure)]) elif format == 'html5': figure = html5_figure.format(id=label, classes=class_str, keys=key_str, filename=filename, alt=caption, caption=caption) return pf.Para([rawhtml(figure)]) elif format == 'latex': figure = latex_figure.format(filename=filename, caption=raw_caption, label=label) return pf.Para([rawlatex(figure)])
def convert_internal_refs(self, key, value, format, metadata): """Convert all internal links from '#blah' into format specified in self.replacements. """ if key != 'Cite': return None citations, inlines = value if len(citations) > 1: ''' Note: Need to check that *all* of the citations in a multicitation are in the reference list. If not, the citation is bibliographic, and we want LaTeX to handle it, so just return unmodified. ''' for citation in citations: if citation['citationId'] not in self.references: return return self.convert_multiref(key, value, format, metadata) else: citation = citations[0] prefix = pf.stringify(citation['citationPrefix']) suffix = pf.stringify(citation['citationSuffix']) if prefix: prefix += ' ' label = citation['citationId'] if label not in self.references: return rtype = self.references[label]['type'] n = self.references[label]['id'] text = self.replacements[rtype].format(n) if format == 'latex' and self.autoref: link = u'{pre}\\autoref{{{label}}}{post}'.format(pre=prefix, label=label, post=suffix) return pf.RawInline('latex', link) elif format == 'latex' and not self.autoref: link = u'{pre}\\ref{{{label}}}{post}'.format(pre=prefix, label=label, post=suffix) return pf.RawInline('latex', link) else: link_text = '{}{}{}'.format(prefix, text, suffix) link = pf.Link([pf.Str(link_text)], ('#' + label, '')) return link
def convert_internal_refs(self, key, value, format, metadata): """Convert all internal links from '#blah' into format specified in self.replacements. """ if key != 'Cite': return None citations, inlines = value if len(citations) > 1: ''' Note: Need to check that *all* of the citations in a multicitation are in the reference list. If not, the citation is bibliographic, and we want LaTeX to handle it, so just return unmodified. ''' for citation in citations: if citation['citationId'] not in self.references: return return self.convert_multiref(key, value, format, metadata) else: citation = citations[0] prefix = pf.stringify(citation['citationPrefix']) suffix = pf.stringify(citation['citationSuffix']) if prefix: prefix += ' ' label = citation['citationId'] if label not in self.references: return rtype = self.references[label]['type'] n = self.references[label]['id'] text = self.replacements[rtype].format(n) if format == 'latex' and self.autoref: link = u'{pre}\\autoref{{{label}}}{post}'.format(pre=prefix, label=label, post=suffix) return pf.RawInline('latex', link) elif format == 'latex' and not self.autoref: link = u'{pre}\\ref{{{label}}}{post}'.format(pre=prefix, label=label, post=suffix) return pf.RawInline('latex', link) else: link_text = u'{}{}{}'.format(prefix, text, suffix) link = pf.Link([pf.Str(link_text)], ('#' + label, '')) return link
def structure_header(v, f, m): global block result = [] if v[0] == 1: block.begin(result, pf.stringify(v[2])) return result elif v[0] == 2: # second level ignored and removed on posters return [] elif v[0] == 3: result.append(lb(r'\structure{%s}' % pf.stringify(v[2]))) return result
def main(): """docstring for main""" # read input ast ast = read_ast() meta = ast['meta'] if 'metapub' not in meta: write_ast(ast) return # read metapub_file if 'metapub_file' not in meta: log('ERROR', 'No "metapub_file" metadata field in input document') write_ast(ast) return metapub_file = stringify(meta['metapub_file']) log('DEBUG', 'reading from: ' + metapub_file) pubs = read_yaml(metapub_file) # find entry in metapub_file ident = stringify(meta['metapub']) log('DEBUG', 'looking for: ' + ident) entries = [e for e in pubs if ('uuid' in e and e['uuid'] == ident) \ or ('slug' in e and e['slug'] == ident)] if not entries: log( 'ERROR', 'Publication with id "%s" not found in "%s"' % (ident, metapub_file)) write_ast(ast) return if len(entries) > 1: log( 'WARNING', 'More than 1 publication with id "%s" found in "%s"' % (ident, metapub_file)) entry = entries[0] # build new metadata from entry new = dict() add_title(new, entry) add_author(new, entry) add_date_updated(new, entry) add_disclaimer(new, entry) add_publication(new, entry) add_abstract(new, entry) add_keywords(new, entry) add_note(new, entry) add_review(new, entry) # convert new metadata to pandoc's ast metadata format log('DEBUG', 'new metadata: ' + str(new)) incoming = generate_meta(new) # update using new metadata meta.update(incoming) # write output ast ast['meta'] = meta write_ast(ast)
def get_meta(meta, name): """Retrieves the metadata variable 'name' from the 'meta' dict.""" assert name in meta data = meta[name] if data['t'] in ['MetaString', 'MetaBool']: return data['c'] elif data['t'] == 'MetaInlines': if len(data['c']) == 1: return stringify(data['c']) elif data['t'] == 'MetaList': return [stringify(v['c']) for v in data['c']] else: raise RuntimeError("Could not understand metadata variable '%s'." % name)
def get_list_or_inline(metadata, field): """ return content of MetaList or MetaInlines item coerced as list """ field_type = get_type(metadata, field) if field_type == 'MetaInlines': content_raw = get_content(metadata, field, 'MetaInlines') content = [pandocfilters.stringify(content_raw)] return content elif field_type == 'MetaList': content = list() for content_raw in get_content(metadata, field, 'MetaList'): content.append(pandocfilters.stringify(content_raw)) return content else: raise error.WrongType('"%s" value must be of type "MetaInlines"' 'or "MetaList"' % field)
def main(): """docstring for main""" # read input ast ast = read_ast() meta = ast['meta'] if 'metapub' not in meta: write_ast(ast) return # read metapub_file if 'metapub_file' not in meta: log('ERROR', 'No "metapub_file" metadata field in input document') write_ast(ast) return metapub_file = stringify(meta['metapub_file']) log('DEBUG', 'reading from: ' + metapub_file) pubs = read_yaml(metapub_file) # find entry in metapub_file ident = stringify(meta['metapub']) log('DEBUG', 'looking for: ' + ident) entries = [e for e in pubs if ('uuid' in e and e['uuid'] == ident) \ or ('slug' in e and e['slug'] == ident)] if not entries: log('ERROR', 'Publication with id "%s" not found in "%s"' % (ident, metapub_file)) write_ast(ast) return if len(entries) > 1: log('WARNING', 'More than 1 publication with id "%s" found in "%s"' % (ident, metapub_file)) entry = entries[0] # build new metadata from entry new = dict() add_title(new, entry) add_author(new, entry) add_date_updated(new, entry) add_disclaimer(new, entry) add_publication(new, entry) add_abstract(new, entry) add_keywords(new, entry) add_note(new, entry) add_review(new, entry) # convert new metadata to pandoc's ast metadata format log('DEBUG', 'new metadata: ' + str(new)) incoming = generate_meta(new) # update using new metadata meta.update(incoming) # write output ast ast['meta'] = meta write_ast(ast)
def mk_columns(k, v, f, m): if k == "Para": value = stringify(v) if value.startswith('.') and value.endswith('['): return html(r'<div class="%s">' % value[1:-1]) elif value == ".]": return html(r'</div>')
def get_runlist_args(arguments_list): """ return list of arguments from 'args' MetaList """ arguments = list() for item in arguments_list: if item[const.T] != 'MetaMap': info.log('ERROR', 'panzer', '"args" list should have fields of type "MetaMap"') continue fields = item[const.C] if len(fields) != 1: info.log('ERROR', 'panzer', '"args" list should have exactly one field per item') continue field_name = "".join(fields.keys()) field_type = get_type(fields, field_name) field_value = get_content(fields, field_name, field_type) if field_type == 'MetaBool': arguments.append('--' + field_name) elif field_type == 'MetaInlines': value_str = pandocfilters.stringify(field_value) arguments.append('--%s="%s"' % (field_name, value_str)) else: info.log('ERROR', 'panzer', 'arguments of type "%s" not' 'supported---"%s" ignored' % (field_type, field_name)) return arguments
def wrapfig(key, val, fmt, meta): if key == 'Image': attrs, caption, target = val if FLAG_PAT.match(stringify(caption)): # Strip tag where = FLAG_PAT.match(caption[-1]['c']).group(1) overhang = FLAG_PAT.match(caption[-1]['c']).group(4) overhang = overhang if not overhang else '[%s]' % overhang size = FLAG_PAT.match(caption[-1]['c']).group(2) lines = FLAG_PAT.match(caption[-1]['c']).group(7) stripped_caption = caption[:-2] if fmt == 'latex': if len(lines) > 0: latex_begin = r'\begin{wrapfigure}[' + lines \ + ']{%s}%s{' % (where, overhang) + size + '}' else: latex_begin = \ r'\begin{wrapfigure}{%s}%s{' % (where, overhang) \ + size + '}' if len(stripped_caption) > 0: latex_fig = r'\centering\includegraphics{' + target[0] \ + '}\caption{' latex_end = r'}\end{wrapfigure}' return [RawInline(fmt, latex_begin + latex_fig)] \ + stripped_caption + [RawInline(fmt, latex_end)] else: latex_fig = r'\centering\includegraphics{' + target[0] \ + '}' latex_end = r'\end{wrapfigure}' return [RawInline(fmt, latex_begin + latex_fig)] \ + [RawInline(fmt, latex_end)] else: return Image(attrs, stripped_caption, target)
def blockquote2div(key, value, format, meta): """Convert a blockquote into a div if it begins with a header that has attributes containing a single class that is in the allowed classes. This function can be passed directly to toJSONFilter from pandocfilters. """ if key == 'BlockQuote': blockquote = value header = find_header(blockquote) if not header: return else: level, attr, inlines = header id, classes, kvs = attr ltitle = pf.stringify(inlines).lower() if ltitle in SPECIAL_TITLES: classes.append(SPECIAL_TITLES[ltitle]) return pf.Div(attr, blockquote) elif len(classes) == 1 and classes[0] in SPECIAL_CLASSES: remove_attributes(blockquote) # a blockquote is just a list of blocks, so it can be # passed directly to Div, which expects Div(attr, blocks) return pf.Div(attr, blockquote)
def transPara(key, value, format, meta): if key == 'Span': if dropHeaderFooter(value[0][2]): return [] hstr = stringify(value[1]) return Str(hstr) elif key == 'Div': if dropHeaderFooter(value[0][2]): return [] hstr = stringify(value[1]) if len(hstr) < 1: trStr="" return [] return value[1] else: return None
def keyword2html(keyword_node): """Return HTML version of keyword with id.""" keyword = pf.stringify(keyword_node) id = normalize_keyword(keyword) return [{"t": "Span", "c": [[id, [],[]], keyword_node]}]
def mk_columns(k, v, f, m): if k == "Para": value = pf.stringify(v).strip() if value.startswith('[') and value.endswith(']'): content = value[1:-1] if content.startswith("leftcol"): width = content.replace("leftcol", '').strip() if width != "": width+="%" else: width="50%" return html(""" <div id="col-wrapper"> <div id="col"> <div style="width:%(width)s; float: left;">""" % {'width': width}) elif content.startswith("rightcol"): width = content.replace("rightcol", '').strip() if width != "": width+="%" else: width="50%" return html(""" </div> <div style="width:%(width)s; float:right;">""" % {'width': width}) elif content.startswith("endcol"): return html(""" </div> </div> </div>""")
def taskToProjectJson(block): match = taskRegex.match(pandocfilters.stringify(block)) isComplete = match.group(1) != " " name = match.group(2) return ["", name, "task", isComplete]
def id4glossary(key, value, format, meta): """Add id to keywords at glossary.""" if "subtitle" in meta and pf.stringify(meta['subtitle']) == 'Reference': if key == "DefinitionList": for definition in value: definition[0] = keyword2html(definition[0]) return {"t": key, "c": value}
def replaceQuote(key, value, format, meta): if key == 'BlockQuote' and format == 'latex': if stringify(value).startswith(":" + tag): value[0]['c'][0] = Strong([Str(prefix) ]) # remove the ":note" prefix return [latex("\\begin{" + blockName + "}") ] + value + [latex("\\end{" + blockName + "}")]
def mk_columns(k, v, f, m): if k == "Para": value = stringify(v) if value.startswith('[') and value.endswith(']'): if value.count("[columns"): div_args = "" if value.count(","): div_args += value[value.find(",")+1:-1] return html(r'<div %s>' % div_args) elif value == "[/columns]": return html(r'</div>') elif value == "[/column]": return html(r'</div>') elif value.startswith("[column=") or value.startswith("[column,"): digit_re = re.compile("column=(\d+)") regex_result = digit_re.search(value) if regex_result and regex_result.groups(): div_args = r'<div width="%s" ' % regex_result.groups()[0] else: div_args = r'<div ' if value.count(","): div_args += value[value.find(",")+1:-1] div_args += ">" return html(div_args)
def create_figures(key, value, format, metadata): """Convert Images with attributes to Figures. Images are [caption, (filename, title)]. Figures are [caption, (filename, title), attrs]. This isn't a supported pandoc type, we just use it internally. """ if isattrfigure(key, value): image = value[0] attr = PandocAttributes(pf.stringify(value[1:]), 'markdown') caption, target = image['c'] return Figure(caption, target, attr.to_pandoc()) elif isdivfigure(key, value): # use the first image inside attr, blocks = value images = [b['c'][0] for b in blocks if b['c'][0]['t'] == 'Image'] image = images[0] caption, target = image['c'] return Figure(caption, target, attr) else: return None
def wrapfig(key, val, fmt, meta): if key == 'Image': attrs, caption, target = val if FLAG_PAT.match(stringify(caption)): # Strip tag size = FLAG_PAT.match(caption[-1]['c']).group(1) lines = FLAG_PAT.match(caption[-1]['c']).group(3) stripped_caption = caption[:-2] if fmt == 'latex': if len(lines) > 0: latex_begin = r'\begin{wrapfigure}[' + lines + ']{l}{' + size + '}' else: latex_begin = r'\begin{wrapfigure}{l}{' + size + '}' if len(stripped_caption) > 0: latex_fig = r'\centering\includegraphics{' + target[0] \ + '}\caption{' latex_end = r'}\end{wrapfigure}' return [RawInline(fmt, latex_begin + latex_fig)] \ + stripped_caption + [RawInline(fmt, latex_end)] else: latex_fig = r'\centering\includegraphics{' + target[0] \ + '}' latex_end = r'\end{wrapfigure}' return [RawInline(fmt, latex_begin + latex_fig)] \ + [RawInline(fmt, latex_end)] else: return Image(attrs, stripped_caption, target)
def mk_terminal(key, value, format, meta): if key == "Para": val = pf.stringify(value) if val.startswith('[') and val.endswith(']'): content = val[1:-1] if content == "terminal": if (format == "beamer"): return latex( '\setbeamercolor*{block title example}{fg=darkgray!95!white,bg=darkgray!50!white}' + '\n' + '\setbeamercolor*{block body example}{fg=green!75!black,bg=black!80}' + '\n' + '\\begin{exampleblock}{\centering {Terminal}}' + '\n' + ' \\vspace{-0.3cm}' + '\n' + ' \\begin{lstlisting}[style=bash, frame=none, numbers=none, xleftmargin=0pt, framexleftmargin=0pt]' ) elif (format == "latex"): return latex( '\\begin{terminalbox}{}{' + '\n' + ' \\begin{lstlisting}[style=bash, frame=none, numbers=none, xleftmargin=0pt, framexleftmargin=0pt]' ) elif content == "/terminal": if (format == "beamer"): return latex(' \end{lstlisting}' + '\n' + ' \\vspace{-0.3cm}' + '\n' + '\end{exampleblock}') elif (format == "latex"): return latex(' \end{lstlisting}' + '\n' + '}\end{terminalbox}')
def get_runlist_args(arguments_list): """ return list of arguments from 'args' MetaList """ arguments = list() for item in arguments_list: if item[const.T] != 'MetaMap': info.log('ERROR', 'panzer', '"args" list should have fields of type "MetaMap"') continue fields = item[const.C] if len(fields) != 1: info.log('ERROR', 'panzer', '"args" list should have exactly one field per item') continue field_name = "".join(fields.keys()) field_type = get_type(fields, field_name) field_value = get_content(fields, field_name, field_type) if field_type == 'MetaBool': arguments.append('--' + field_name) elif field_type == 'MetaInlines': value_str = pandocfilters.stringify(field_value) arguments.append('--%s="%s"' % (field_name, value_str)) else: info.log( 'ERROR', 'panzer', 'arguments of type "%s" not' 'supported---"%s" ignored' % (field_type, field_name)) return arguments
def page_number_extractor(key, value, fmt, meta): """Scan all paragraphs for those starting with || to parse it for page numbering information.""" if not (fmt == "html" or fmt == "html5" or fmt == "epub"): return if key == "Para" and value: # find first obj with content (line breaks don't have this) text = None for obj in value: if "c" in obj: text = obj["c"] break if text is None: return # no content in Paragraph - ignore # first chunk of paragraph must be str and contain '||' if isinstance(text, str) and text.startswith("||"): text = pandocfilters.stringify(value) # get whole text of page number pnum = config.PAGENUMBERING_PATTERN.search(text) if pnum: # strip the first || text = text[2:].lstrip().rstrip() if fmt == "epub": return html( '<p class="pagebreak"><span id="p{0}">{1}</span></p>'.format( pnum.groups()[1], text ) ) return html( '<p><span id="p{0}">{1}</span></p>'.format(pnum.groups()[1], text) )
def process_simg(k, v, f, m): if k == "Para": value = pf.stringify(v) if value.startswith('[simg') and value.endswith(']'): debug(value) images = re.split(r'\s+\+\+\+\s*', value[1:-1]) return latex(generate_image_code(images))
def transPara(key, value, format, meta): if key == "Link": # print "Link" if debug == 1: fh.write("Link\n") hstr = stringify(value[1]) hstr = re.sub(u"listing", u"清单", hstr) hstr = re.sub(u"figure", u"图", hstr) hstr = re.sub(u"table", u"表", hstr) hstr = re.sub("\.", u"-", hstr) if len(hstr) < 1: trStr = "" return [] elif re.match(u"图|表|清单[\d\-]+", hstr): return Str(hstr) elif re.match(u"[\d\-]+章", hstr): return Str(u"第" + hstr) elif re.match(u"第[\d\-]+章", hstr): return Str(hstr) elif key == "CodeBlock": v1 = ("", [], []) return CodeBlock(v1, value[1]) elif key == "Image": v1 = ("", [], []) return Image(v1, value[1], value[2]) else: return None
def ascii2svg(key, value, format, meta): if key == 'CodeBlock': [[ident, classes, keyvals], code] = value if "a2s" in classes: caption, typef, keyvals = get_caption(keyvals) filename = get_filename4code("a2s", code) typepandoc = get_extension(format, "pdf") typea2s = "svg" src = filename + '.a2s' desta2s = filename + '.' + typea2s fontName = "" metaMonoFont = meta.get('monofont', None) if metaMonoFont: fontName = stringify(metaMonoFont['c']) if not os.path.isfile(desta2s): txt = code.encode(sys.getfilesystemencoding()) txt = txt.decode('utf-8') with open(src, "w") as f: f.write(txt) call(['a2s "-f%s" -i%s -o%s' % (fontName, src, desta2s)], shell=True) sys.stderr.write('Created image ' + desta2s + '\n') return Para([Image([ident, [], keyvals], caption, [desta2s, typef])])
def is_attrtable(key, value): """True if this is an attributed table; False otherwise.""" try: s = stringify(value[0]).strip() return key == 'Table' and ATTR_PATTERN.match(s) # pylint: disable=bare-except except: return False
def mk_columns(k, v, f, m): if k == "Para": value = stringify(v) if value.startswith('[') and value.endswith(']'): if "[notes" in value: return html(r'<div class="notes">') elif value == "[/notes]": return html(r'</div>')
def transform(self): """ transform `self` by applying styles listed in `self.stylefull` """ writer = self.options['pandoc']['write'] info.log('INFO', 'panzer', 'writer:') info.log('INFO', 'panzer', ' %s' % writer) # 1. Do transform # - start with blank metadata new_metadata = dict() # - apply styles, first to last for style in self.stylefull: all_s = meta.get_nested_content(self.styledef, [style, 'all'], 'MetaMap') new_metadata = meta.update_metadata(new_metadata, all_s) self.apply_commandline(all_s) cur_s = meta.get_nested_content(self.styledef, [style, writer], 'MetaMap') new_metadata = meta.update_metadata(new_metadata, cur_s) self.apply_commandline(cur_s) # - add in document metadata in document indoc_data = self.get_metadata() # -- add items from additive fields in indoc metadata new_metadata = meta.update_additive_lists(new_metadata, indoc_data) # -- add all other (non-additive) fields in new_metadata.update(indoc_data) # -- apply items from indoc `commandline` field self.apply_commandline(indoc_data) # 2. Apply kill rules to trim run lists for field in const.RUNLIST_KIND: try: original_list = meta.get_content(new_metadata, field, 'MetaList') trimmed_list = meta.apply_kill_rules(original_list) if trimmed_list: meta.set_content(new_metadata, field, trimmed_list, 'MetaList') else: # if all items killed, delete field del new_metadata[field] except error.MissingField: continue except error.WrongType as err: info.log('WARNING', 'panzer', err) continue # 3. Set template try: if meta.get_type(new_metadata, 'template') == 'MetaInlines': template_raw = meta.get_content(new_metadata, 'template', 'MetaInlines') template_str = pandocfilters.stringify(template_raw) elif meta.get_type(new_metadata, 'template') == 'MetaString': template_str = meta.get_content(new_metadata, 'template', 'MetaString') if template_str == '': raise error.MissingField else: raise error.WrongType self.template = util.resolve_path(template_str, 'template', self.options) except (error.MissingField, error.WrongType) as err: info.log('DEBUG', 'panzer', err) if self.template: info.log('INFO', 'panzer', info.pretty_title('template')) info.log('INFO', 'panzer', ' %s' % info.pretty_path(self.template)) # 4. Update document's metadata self.set_metadata(new_metadata)
def _process_refs(x, pattern, labels): """Searches the element list `x` for the first Cite element with an id that either matches the compiled regular expression `pattern` or is found in the `labels` list. Strips surrounding curly braces and adds modifiers to the attributes of the Cite element. Repeats processing (via decorator) until all matching Cite elements in `x` are processed.""" # Scan the element list x for Cite elements with known labels try: for i, v in enumerate(x): if v['t'] == 'Cite' and len(v['c']) == 2: label = v['c'][-2][0]['citationId'] if not label in labels and ':' in label: testlabel = label.split(':')[-1] if testlabel in labels: label = testlabel if (pattern and pattern.match(label)) or label in labels: # A new reference was found; create some empty attrs for it attrs = PandocAttributes() # Extract the modifiers. 'attrs' is updated in place. # Element deletion could change the index of the Cite being # processed. i = _extract_modifier(x, i, attrs) # Remove surrounding brackets i = _remove_brackets(x, i) # Get the reference attributes. Attributes must immediately # follow the label. if not v['c'][0][0]['citationSuffix'] and \ not stringify(v['c'][-1]).endswith(']'): try: a = extract_attrs(x, i + 1) attrs.id = a.id attrs.classes.extend(a.classes) attrs.kvs.update(a.kvs) except (ValueError, IndexError): pass # None given # Attach the attributes v['c'].insert(0, attrs.list) # The element list may be changed if label in labels: return None # Forces processing to repeat via @_repeat if _WARNINGLEVEL and pattern and \ pattern.match(label) and label not in badlabels: badlabels.append(label) msg = "\n%s: Bad reference: @%s.\n" % (_FILTERNAME, label) STDERR.write(msg) STDERR.flush() except: pass return True # Terminates processing in _repeat decorator
def url_filter(key, value, format_, meta): """ """ if key == 'Link': [txt, [url, attr]] = value caught = False for i in interwiki_maps: key, query = i if url.startswith("!" + key): url = query + stringify(txt) caught = True break if not caught and url == '': # So we want to internally link txt url = "http://gwern.net/" + stringify(txt) elif not caught and "://" not in url: url = "http://gwern.net/" + url return Link(txt, [url, attr])
def get_list(meta): if get_list.checked == True: pass else: try: get_list.checked = True get_list.hitlist = [stringify(x) for x in meta.get('smallcaps', {})['c']] except KeyError: pass return get_list.hitlist
def walk_metadata(x): ''' x is a JSON dictionary of pandoc metadata Walks down a JSON dictionary in the pandoc metadata, returning a more manageable representation. FIXME: Maybe formatting for e.g. math should be retained instead of converting to a string? ''' if x['t'] == 'MetaBool': return x['c'] elif x['t'] == 'MetaInlines': return str(pandocfilters.stringify(x)) elif x['t'] == 'MetaString': return str(pandocfilters.stringify(x)) elif x['t'] == 'MetaList': lst = [] for i in x['c']: lst.append(walk_metadata(i)) return lst
def mk_center(key, value, *args): """Make LaTeX centering.""" if key == "Para": value = pf.stringify(value) if value.startswith('[') and value.endswith(']'): content = value[1:-1] if content == "center": return latex(r'\begin{center}') elif content == "/center": return latex(r'\end{center}')
def duck(key, value, format_, meta): ''' If a link is of the form "!STRING", use the !-expression to search DuckDuckGo. So for instance [Fishmans](!w) would search Wikipedia for "Fishmans". ''' if key == 'Link': [txt, [url, attr]] = value if url.startswith("!"): url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt) return Link(txt, [url, attr])
def get_list(meta): if get_list.checked == True: pass else: try: get_list.checked = True get_list.hitlist = [stringify(x) for x in meta.get('smallcaps', {})['c']] panzertools.log('INFO', 'small caps: ' + repr(get_list.hitlist)) except KeyError: pass return get_list.hitlist
def mk_columns(k, v, f, m): if k == "Para": value = pf.stringify(v) if value.startswith('[') and value.endswith(']'): content = value[1:-1] if content == "columns": return latex(r'\begin{columns}[T]') elif content == "/columns": return latex(r'\end{columns}') elif content.startswith("column="): return latex(r'\column{%s\textwidth}' % content[7:])
def mk_columns(key, value, *args): """Make LaTeX columns.""" if key == "Para": value = pf.stringify(value) if value.startswith('[') and value.endswith(']'): content = value[1:-1] if content == "columns": return latex(r'\begin{columns}') elif content == "/columns": return latex(r'\end{columns}') elif content.startswith("column="): return latex(r'\column{%s\textwidth}' % content[7:])
def parse_attrimage(value): """Parses an attributed image.""" if len(value[0]['c']) == 2: # Old pandoc < 1.16 attrs, (caption, target) = None, value[0]['c'] s = stringify(value[1:]).strip() # The attribute string # Extract label from attributes (label, classes, kvs) label = PandocAttributes(s, 'markdown').to_pandoc()[0] if label == 'fig:': # Make up a unique description label = label + '__'+str(hash(target[0]))+'__' return attrs, caption, target, label else: # New pandoc >= 1.16 assert len(value[0]['c']) == 3 attrs, caption, target = value[0]['c'] s = stringify(value[1:]).strip() # The attribute string # Extract label from attributes label = attrs[0] if label == 'fig:': # Make up a unique description label = label + '__'+str(hash(target[0]))+'__' return attrs, caption, target, label
def get_meta(meta, name): """Retrieves the metadata variable 'name' from the 'meta' dict.""" assert name in meta data = meta[name] if data['t'] in ['MetaString', 'MetaBool']: return data['c'] elif data['t'] == 'MetaInlines': # Handle bug in pandoc 2.2.3 and 2.2.3.1: Return boolean value rather # than strings, as appropriate. if len(data['c']) == 1 and data['c'][0]['t'] == 'Str': if data['c'][0]['c'] in ['true', 'True', 'TRUE']: return True elif data['c'][0]['c'] in ['false', 'False', 'FALSE']: return False return stringify(data['c']) elif data['t'] == 'MetaList': return [stringify(v['c']) for v in data['c']] else: raise RuntimeError("Could not understand metadata variable '%s'." % name)