def preprocess(key, value, format, meta): prefix = normalize(meta["file"]["c"]) # Header - set anchor name explicitly if key == "Header": [level, [anchor, t1, t2], header] = value anchor = prefix + "-" + anchor # Append attribute list with prefixed anchor header.append(RawInline("html", " {{ #{} }}".format(anchor))) return Header(level, [anchor, t1, t2], header) # Link - correct links if key == "Link": [t1, text, [link, t4]] = value # This document if link[0] == "#": ref = prefix + "-" + link[1:] return Link(t1, text, [ref, t4]) # That document elif "#" in link: anchor = "#" + normalize(link) return Link(t1, text, [anchor, t4])
def process_link( self, value: Tuple[str, List[Dict[str, str]], List[str]], output_format: str, meta: Dict[str, Dict[str, Any]]) -> Any: # pylint: disable=unused-argument """Process a link element.""" link_info, link_texts, link_values = value changed = False new_texts = [] for text_obj in link_texts: if text_obj.get('t') == 'Str': val = self.replace_metavar( text_obj['c'], meta, self._metavar_pattern_text) if val is not None: new_texts.append({'t': 'Str', 'c': val}) changed = True continue new_texts.append(text_obj) new_values = [] for link_string in link_values: val = self.replace_metavar( link_string, meta, self._metavar_pattern_link) if val is not None: changed = True new_values.append(val) else: new_values.append(link_string) if changed: return Link(link_info, new_texts, new_values) return None
def rewrite_target(key, val, fmt, meta): # get potentially different url extension ext = os.getenv('FILTER_URL_EXTENSION', '.html') # when we are at a link node if key == 'Link': # get details of link attr, inline, target = val if target[0].startswith('http://') or target[0].startswith( 'https://') or target[0].startswith('mailto:'): url = target[0] else: parts = target[0].split('#', 1) if len(parts) > 1: url = meta['root']['c'] + parts[0] + ext + '#' + parts[1] else: url = meta['root']['c'] + parts[0] + ext return Link(attr, inline, [url, target[1]]) # when we are at a image node elif key == 'Image': # get details of link attr, inline, target = val if target[0].startswith('http://') or target[0].startswith( 'https://') or target[0].startswith('mailto:'): url = target[0] else: path = target[0] url = meta['root']['c'] + path return Image(attr, inline, [url, target[1]])
def wrap_urls_in_anchors(key, val, fmt, meta): if key == 'Para': children = [] for child in val: new_objs = [child] if child['t'] == 'Str': s = child['c'] match = URL_REGEX.search(s) if match: link_text = match.group(1) url = link_text if not url.startswith('http'): url = 'http://' + url link = Link(['', [], []], [Str(link_text)], [url, '']) new_objs = [] before, _, after = URL_REGEX.split(s, 1) if before: new_objs.append(Str(before)) new_objs.append(link) if after: new_objs.append(Str(after)) children += new_objs return Para(children)
def add_http_to_links(key, val, fmt, meta): if key != 'Link': return url = val[2][0] if not url.startswith('http://') and not url.startswith('https://'): val[2][0] = "http://" + url return Link(*val)
def parse_site(k, v, fmt, meta): base_url = config["GENERAL"]["baseurl"] if "MENU" not in meta_tags_written: active_site = get_meta_string(meta, "active-site", None) menu = get_meta_list(meta, "menu") parse_menu(menu, base_url, active_site) meta_tags_written.append("MENU") if "TAGS" not in meta_tags_written: tags = get_meta_list(meta, "tags", None) parse_tags(tags, base_url, config) meta_tags_written.append("TAGS") if k == "Link": title = v[0] text = v[1] link = v[2] if link[0].endswith(".md"): link[0] = base_url + link[0][:-3] + ".html" return Link(title, text, link) elif k == "Image": title = v[0] text = v[1] link = v[2] link[0] = base_url + link[0] return Image(title, text, link)
def linkify_tests(key, value, format, meta): if key == 'Image': attr, content, (urlStr, title) = value url = urllib.parse.urlparse(urlStr) if url.scheme != "test": return name = None if url.path.startswith("~"): pattern = url.path[1:] matches = list(sorted({n for n in TEST_NAMES if re.search(pattern, n)})) if len(matches) == 1: [name] = matches else: logging.error(f"pattern {pattern!r} does not uniquely identify a test; matches = {matches}") else: name = TestName(url.path) if name not in TEST_NAMES: name = None logging.error(f"test name {name!r} does not seem to exist!") if name is None: return Strong([Str("[???]")]) img = Image(attr, content, (f"testinfo/{name}.png", title)) return Link(attr, [img], (f"testinfo/{name}.html", title))
def internallinks(key, value, format, meta): if key == 'Link': [attrs, contents, [url, title]] = value o = urlparse(url) if not o.scheme and not o.netloc and o.fragment: url = '#' + o.fragment return Link(attrs, contents, (url, title))
def filter(key, value, format, meta): if key == 'Link': link = value[2][0] if link[0] == '#' and link[1:] in links: # internal link, # replace by mangled version value[2][0] = '#' + links[link[1:]] return Link(value[0], value[1], value[2])
def relink(key, value, format, meta): if key == "Link": link_url = value[2][0] for ending in ['.md', '.rst']: if link_url.endswith(ending): link_url = link_url.replace(ending, '.pdf') value[2][0] = link_url return Link(value[0], value[1], value[2])
def crossrefs(key, value, fmt, meta): if key == 'Link': attr, fmt, target = value ref, title = target if is_cross(ref): ref = sanitize(ref) target = [ref, title] return Link(attr, fmt, target)
def transformLink(key, value, _, meta): if key == 'Link': [ident, classes, keyvals], alttext, [dest, typef] = value link, sep, rest = dest.partition("#") # for anchors # TODO better checks? use urllib? if not (link.startswith("http://") or link.startswith("https://") or link.startswith("ftp://")) and link.endswith(".html"): link = link.replace(".html", ".ipynb") dest = link + sep + rest return Link([ident, classes, keyvals], alttext, [dest, typef])
def interpretManLinks(key, value, fmt, meta): if key == 'Link': text, link = value url, title = link match = _man_link_re.match(url) if match is not None: html_url = "%s.%s.html" % (match.group(1), match.group(2)) return Link(text, (html_url, title)) else: return None
def duck(key, value, format_, meta): ''' If a link is of the form "!STRING", use the !-expression to search DuckDuckGo. So for instance [Fishmans](!w) would search Wikipedia for "Fishmans". ''' if key == 'Link': [txt, [url, attr]] = value if url.startswith("!"): url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt) return Link(txt, [url, attr])
def my_filter(key, value, format, _): if key == 'Link': #[u'', [], []] | [{u'c': u'link', u't': u'Str'}] | [u'index.html#some-header', u'Alt-Title'] [attr, inline, [target, title]] = value if not re.search("://", target): m = re.search("(#.*$)", target) if m: target = m.group(0) return Link(attr, inline, [target, title])
def replace_cite_references(key, val, fmt, meta): if key == 'Cite': label = val[0][0]['citationId'] if label and label in label_map: ref_string, ref_id, prev_strings = label_map[label] return [ Link(['', ['engrafo-cite'], []], [Str(ref_string)], ['#%s' % ref_id, '']) ] # TODO: below doesn't work yet else: return Span(['', ['engrafo-cite', 'engrafo-missing-cite'], []], [Str('[?]')])
def _cite_replacement(key, value, fmt, meta): """Returns context-dependent content to replace a Cite element.""" assert key == 'Cite' attrs, label = value[0], _get_label(key, value) attrs = PandocAttributes(attrs, 'pandoc') assert label in references # Get the replacement value text = str(references[label]) # Choose between \Cref, \cref and \ref use_cleveref = attrs['modifier'] in ['*', '+'] \ if 'modifier' in attrs.kvs else use_cleveref_default plus = attrs['modifier'] == '+' if 'modifier' in attrs.kvs \ else use_cleveref_default name = plusname[0] if plus else starname[0] # Name used by cref # The replacement depends on the output format if fmt == 'latex': if use_cleveref: # Renew commands needed for cleveref fakery if not 'xnos-cleveref-fake' in meta or \ get_meta(meta, 'xnos-cleveref-fake'): faketex = (r'\xrefname' if plus else r'\Xrefname') + \ '{%s}' % name else: faketex = '' macro = r'\cref' if plus else r'\Cref' ret = RawInline('tex', r'%s%s{%s}'%(faketex, macro, label)) elif use_eqref: ret = RawInline('tex', r'\eqref{%s}'%label) else: ret = RawInline('tex', r'\ref{%s}'%label) else: if use_eqref: text = '(' + text + ')' linktext = [Math({"t":"InlineMath", "c":[]}, text[1:-1]) \ if text.startswith('$') and text.endswith('$') \ else Str(text)] link = elt('Link', 2)(linktext, ['#%s' % label, '']) \ if _PANDOCVERSION < '1.16' else \ Link(['', [], []], linktext, ['#%s' % label, '']) ret = ([Str(name), Space()] if use_cleveref else []) + [link] return ret
def m2html_filter(key, value, form, meta): if key == 'Link': org_path = value[2][0] scheme, netloc, path, params, query, fragment = urlparse(org_path) root, ext = os.path.splitext(path) replaced_path = '' if not root == "": replaced_path = root + ".html" value[2][0] = replaced_path + "#" + fragment return Link(*value)
def transform_url(key, value, format, meta): if key != 'Link': return None # Then value has the following form: # [[{'t': 'Str', 'c': 'Contributing'}], ['docs/contributing.md', '']] # Extract the URL. url = value[1][0] new_url = convert_url(url) if new_url is None: return None log.info("converting URL:\n" " %s\n" "-->%s" % (url, new_url)) value[1][0] = new_url return Link(*value)
def change_markdown_links_to_html_links(key, value, format, meta): if key == 'Link': # links are made up of three parts (https://pandoc.org/lua-filters.html#type-link): # 0. attr # 1. content # 2. target # we want the target, and there might be more than one target (somehow) # so make sure to select target 0 (the first one) using `[2][0]` target = value[2][0] # for internal targets, make sure that they now point to HTML files # instead of markdown files if not target.startswith('http'): value[2][0] = target.replace('.md', '.html') return Link(value[0], value[1], value[2])
def caps(key, value, format, meta): # print >> sys.stderr, key # if key == "Cite": # print >> sys.stderr, "CITE", value ## example of value: ## [[u'', [], []], [{u'c': u'here', u't': u'Str'}], [u'http://docs.mathjax.org/en/latest/tex.html#defining-tex-macros', u'']] if key == 'Link': dest = value[2][0] if '#' in dest and not dest.startswith('http://') and not dest.startswith('https://') and not dest.startswith('/') and not dest.startswith('#'): value[2][0] = "#{}".format(dest.rpartition('#')[2]) print >> sys.stderr, "[INFO] {} -> {}".format(dest, value[2][0]) dest = value[2][0] if not dest.startswith('http://') and not dest.startswith('https://') and not dest.startswith('#'): print >> sys.stderr, "[WARNING!] '{}' might not work".format(value[2][0]) #print >> sys.stderr, "LINK", value return Link(*value)
def add_gitlab_ref(k, v, fmt, meta): if k in ['Link'] and CI_PROJECT_URL is not None: if fmt in ['latex', 'beamer']: link_contents = v[-1] link_ref = link_contents[0] link_match = LINK_REF_TO_REPLACE_RE.search(link_ref) if link_match: rel_link = link_match.group('rel_link') if rel_link.startswith('http'): return link_spec = [CI_PROJECT_URL] if link_match.group('absolute') is None: link_spec.extend(['tree', CI_COMMIT_REF_NAME]) link_spec.append(link_match.group('rel_link')) link_ref = os.path.join(*link_spec) link_contents[0] = link_ref return Link(v[0], v[1], link_contents)
def remove_caption_filter(key, value, format_, meta): if key == 'Image': """ markdown: ![alt](path "title") json: [[u'', [], []], [{u'c': u'alt', u't': u'Str'}], [u'path', u'fig:title']] """ try: alt = value[1][0].get("c") except IndexError: alt = None value[1] = [] if alt: value[2][1] = u"fig:{}".format(alt) return Image(*value) elif key == "Link": """ markdown: [node](node_api.md) json: [[u'', [], []], [{u'c': u'node', u't': u'Str'}], [u'node_api.md', u'']] """ link_path = value[2][0] if not bool(urlparse.urlparse(link_path).netloc): name, ext = os.path.splitext(link_path) if ext and ext.lower() == ".md": link_name = value[1][0]["c"] if link_name: return RawInline(u'rst', u':doc:`{} <{}>`'.format(link_name, name)) return RawInline(u'rst', u':doc:`{}`'.format(name)) return Link(*value)
def listof(key, value, format, meta): global headers2 # Is it a header? if key == 'Header': [level, [id, classes, attributes], content] = value if 'unnumbered' not in classes: headers2[level - 1] = headers2[level - 1] + 1 for index in range(level, 6): headers2[index] = 0 # Is it a paragraph with only one string? if key == 'Para' and len(value) == 1 and value[0]['t'] == 'Str': # Is it {tag}? result = re.match('^{(?P<name>(?P<prefix>[a-zA-Z][\w.-]*)(?P<section>\:((?P<sharp>#(\.#)*)|(\d+(\.\d+)*)))?)}$', value[0]['c']) if result: prefix = result.group('prefix') # Get the collection name if result.group('sharp') == None: name = result.group('name') else: level = (len(result.group('sharp')) - 1) // 2 + 1 name = prefix + ':' + '.'.join(map(str, headers2[:level])) # Is it an existing collection if name in collections: if format == 'latex': # Special case for LaTeX output if 'toccolor' in meta: linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c'], format) + '}' else: linkcolor = '\\hypersetup{linkcolor=black}' if result.group('sharp') == None: suffix = '' else: suffix = '_' return Para([RawInline('tex', linkcolor + '\\makeatletter\\@starttoc{' + name + suffix + '}\\makeatother')]) else: # Prepare the list elements = [] # Loop on the collection for value in collections[name]: # Add an item to the list if pandocVersion() < '1.16': # pandoc 1.15 link = Link([Str(value['text'])], ['#' + prefix + ':' + value['identifier'], '']) else: # pandoc 1.16 link = Link(['', [], []], [Str(value['text'])], ['#' + prefix + ':' + value['identifier'], '']) elements.append([Plain([link])]) # Return a bullet list return BulletList(elements) # Special case where the paragraph start with '{{...' elif re.match('^{{[a-zA-Z][\w.-]*}$', value[0]['c']): value[0]['c'] = value[0]['c'][1:]
def link(href, label): return Link(["", [], [("reference-type", "ref"), ("reference", href)]], [Str(label)], [href, ""])
def replace_references(key, val, fmt, meta): ''' Replace [Str("Foo"), Space(), RawInLine("latex", "figref")] with [Str("Foo"), Space(), Link([Str("Figure"), Space(), Str("7")])] and [Str("Figure"), Space(), RawInLine("latex", "figref")] with [Link([Str("Figure"), Space(), Str("7")])] also works with abbreviations. ''' if isinstance(val, list): altered = [] for i, obj in enumerate(val): new_objs = [obj] if (isinstance(obj, dict) and obj['t'] == 'RawInline' and obj['c'][0] == 'latex'): label = match_ref(obj['c'][1]) if not label: continue if label in label_map: ref_string, ref_id, prev_strings = label_map[label] prev = val[i - 1] if i > 0 else None prevprev = val[i - 2] if i > 1 else None new_objs = [] # handle "Table ", "(Table" etc. if (prev_strings and prevprev and prev['t'] == 'Space' and 'c' in prevprev and prevprev['t'] == 'Str'): prevprev_lower = prevprev['c'].lower() for needle in prev_strings: if prevprev_lower.endswith(needle): altered = altered[:-2] prefix = prevprev_lower[:-len(needle)] if prefix: new_objs.append(Str(prefix)) # hack around bug in pandoc where non-breaking space # doesn't tokenize properly if (prev_strings and prev['t'] == 'Str' and prev['c'].replace( u'\xa0', ' ').strip().lower() in prev_strings): altered = altered[:-1] link_content = [] link_content.append(Str(ref_string)) new_objs += [ Link(['', [], []], link_content, ['#%s' % ref_id, '']) ] else: new_objs += [ Space(), Span(['', ['engrafo-missing-ref'], []], [Str('?')]) ] altered += new_objs return {'t': key, 'c': altered}
def img_to_zoomable_link(key, value, format, meta): # I'm not really sure what 'format' and 'meta' are. # 'key' is a string telling you what kind of element you're processing, and # 'value' is some JSON list/object/something, depending on what 'key' is. # print("Hello world from pandoc filter!") # NO: stdout kills pandoc, since it's stream-based. Use stderr instead. # sys.stderr.write("key: {}\n".format(key)) # sys.stderr.write("value: {}\n".format(value)) if key != 'Image': return None # no processing required sys.stderr.write("Running img-to-zoomable-link.py\n") # sys.stderr.write("key: {}\n".format(key)) # sys.stderr.write("value: {}\n".format(value)) (identifier, classes, img_attributes), alt_text_shit, (img_path, img_caption) = value # sys.stderr.write("identifier: {}\n".format(identifier)) # sys.stderr.write("alt_text_shit: {}\n".format(alt_text_shit)) # sys.stderr.write("img_path: {}\n".format(img_path)) # sys.stderr.write("img_caption: {}\n".format(img_caption)) # replacement = Code(['', [], []], 'ls -l') # works # replacement = Image( ("",[],[("width","200px")]), [],[img_path,"fig:blah"] ) # works # replacement = Link( ("",[],[]), ["click","here!"], ["http://google.com",""] ) # no # replacement = Link( ["",[],[]], ["click","here!"], ["http://google.com",""] ) # no # replacement = Link( ["",[],[]], [Str('Click here!')], ["http://google.com",""] ) # yes # replacement = Link( ["",[],[]], [Image( ("",[],[("width","200px")]), [],[img_path,"fig:blah"] )], [img_path,""] ) # yes # replacement = Para([Code(['', [], []], 'ls -lt')]) # no # replacement = Code(['', [], []], 'ls -lt') # yes # replacement = Para([Str('wtf!!')]) # no # replacement = Para([Image(['', [], []], [], [img_path, ""])]) # no: can't wrap an img in a para, recursion error. replacement = Link( ["", [], [("onclick", "return hs.expand(this)")]], [Image(("", [], [("width", "400px")]), [], [img_path, "fig:blah"])], [img_path, ""]) # sys.stderr.write("replacement: {}\n".format(replacement)) # target = strip_universal_leading_whitespace(target) # Create the Pandoc block element that will replace the code block. # (Note: Pandoc distinguishes between "inline" elements and "block" elements, # and gives some sort of "no such element" error if you return an inline when it expects a block.) # I'm going to make a bulleted list. (Each bullet is itself a list of Pandoc block elements.) # bullet_points = [ # [Para([Code(['', [], []], 'ls -l')]), # CodeBlock(['', [], []], '...\noutput\n...')], # [Para([Code(['', [], []], 'ls -lt')]), # CodeBlock(['', [], []], '...\nmore output\n...')]] # replacement = BulletList(bullet_points) return replacement
#!/usr/bin/env python3 import io import json import sys from pandocfilters import walk, Header, Link, Para, Str # record how many headers deep we are depth = 0 # create node that is a block paragraph with a link that says 'Jump to Top' and hrefs '#top' jump = Para([Link(['', [], []], [Str('Jump to Top')], ('#top', 'top'))]) # add jumps before headers of the document def add_to_headers(key, val, fmt, meta): global depth # when we are at a header node if key == 'Header': # get details of header lvl, attr, inline = val # if we are at the first header of a larger section if lvl > depth: # record the depth and do not place a jump depth += 1 return elif lvl < depth: # bring depth down to level depth = lvl
def _cite_replacement(key, value, fmt, meta): """Returns context-dependent content to replace a Cite element.""" assert key == 'Cite' # Extract the attributes attrs = PandocAttributes(value[0], 'pandoc') # Check if the nolink attribute is set nolink = attrs['nolink'].capitalize() == 'True' if 'nolink' in attrs \ else False # Extract the label label = value[-2][0]['citationId'] if allow_implicit_refs and not label in references and ':' in label: testlabel = label.split(':')[-1] if testlabel in references: label = testlabel # Get the target metadata; typecast it as a Target for easier access target = references[label] if label in references else None if target and not isinstance(target, Target): target = Target(*target) # Issue a warning for duplicate targets if _WARNINGLEVEL and target and target.has_duplicate: msg = textwrap.dedent(""" %s: Referenced label has duplicate: %s """ % (_FILTERNAME, label)) STDERR.write(msg) STDERR.flush() # Get the replacement value text = str(target.num) if target else '??' # Choose between \Cref, \cref and \ref use_cleveref = attrs['modifier'] in ['*', '+'] \ if 'modifier' in attrs else use_cleveref_default is_plus_ref = attrs['modifier'] == '+' if 'modifier' in attrs \ else use_cleveref_default refname = plusname[0] if is_plus_ref else starname[0] # Reference name # The replacement content depends on the output format if fmt == 'latex': if use_cleveref: macro = r'\cref' if is_plus_ref else r'\Cref' ret = RawInline('tex', r'%s{%s}' % (macro, label)) elif use_eqref: ret = RawInline('tex', r'\eqref{%s}' % label) else: ret = RawInline('tex', r'\ref{%s}' % label) if nolink: # https://tex.stackexchange.com/a/323919 ret['c'][1] = \ r'{\protect\NoHyper' + ret['c'][1] + r'\protect\endNoHyper}' else: if use_eqref: text = '(' + text + ')' elem = Math({"t":"InlineMath", "c":[]}, text[1:-1]) \ if text.startswith('$') and text.endswith('$') \ else Str(text) if not nolink and target: prefix = 'ch%03d.xhtml' % target.secno \ if fmt in ['epub', 'epub2', 'epub3'] and \ target.secno else '' elem = elt('Link', 2)([elem], ['%s#%s' % (prefix, label), '']) \ if version(_PANDOCVERSION) < version('1.16') else \ Link(['', [], []], [elem], ['%s#%s' % (prefix, label), '']) ret = ([Str(refname + NBSP)] if use_cleveref else []) + [elem] # If the Cite was square-bracketed then wrap everything in a span s = stringify(value[-1]) # pandoc strips off intervening space between the prefix and the Cite; # we may have to add it back in prefix = value[-2][0]['citationPrefix'] spacer = [Space()] \ if prefix and not stringify(prefix).endswith(('{', '+', '*', '!')) \ else [] if s.startswith('[') and s.endswith(']'): els = value[-2][0]['citationPrefix'] + \ spacer + ([ret] if fmt == 'latex' else ret) + \ value[-2][0]['citationSuffix'] # We don't yet know if there will be attributes, so leave them # as None. This is fixed later when attributes are processed. ret = Span(None, els) return ret