def finish(): assert len(counts) == len(styles) from mwlib.parser import styleanalyzer states = styleanalyzer.compute_path(counts) last_apocount = 0 for i, s in enumerate(states): apos = "'" * (s.apocount - last_apocount) if apos: styles[i].children.insert(0, T(type=T.t_text, text=apos)) last_apocount = s.apocount if s.is_bold and s.is_italic: styles[i].caption = "'''" inner = T(type=T.t_complex_style, caption="''", children=styles[i].children) styles[i].children = [inner] elif s.is_bold: styles[i].caption = "'''" elif s.is_italic: styles[i].caption = "''" else: styles[i].type = T.t_complex_node
def run(self): tokens = self.tokens i = 0 start = None while i < len(tokens): t = tokens[i] if t.type == T.t_urllink and start is None: start = i i += 1 elif t.type == T.t_special and t.text == "]" and start is not None: sub = self.tokens[start + 1:i] self.tokens[start:i + 1] = [ T(type=T.t_complex_named_url, children=sub, caption=self.tokens[start].text[1:]) ] i = start start = None elif t.type == T.t_2box_close and start is not None: self.tokens[i].type = T.t_special self.tokens[i].text = "]" sub = self.tokens[start + 1:i] self.tokens[start:i] = [ T(type=T.t_complex_named_url, children=sub, caption=self.tokens[start].text[1:]) ] i = start start = None else: i += 1
def create_source(self, name, vlist, inner, xopts): children = [T(type=T.t_text, text=inner)] blocknode = True if vlist and vlist.get("enclose", "") == "none": blocknode = False return T(type=T.t_complex_tag, tagname=name, vlist=vlist, children=children, blocknode=blocknode)
def fix_break_between_pre(tokens, xopt): idx = 0 while idx < len(tokens) - 1: t = tokens[idx] if t.type == T.t_break and t.text.startswith(" ") and tokens[ idx + 1].type == T.t_pre: tokens[idx:idx + 1] = [ T(type=T.t_pre, text=" "), T(type=T.t_newline, text=u"\n") ] idx += 2 else: idx += 1
def makecell(skip_end=0): st = tokens[start].text.strip() if st == "|": self.is_header = False elif st == "!": self.is_header = True is_header = self.is_header if tokens[start].rawtagname == "th": is_header = True elif tokens[start].rawtagname == "td": is_header = False if is_header: tagname = "th" else: tagname = "td" search_modifier = tokens[start].text.strip() in ("|", "!", "||", "!!") sub = tokens[start + 1:i - skip_end] self.replace_tablecaption(sub) tokens[start:i] = [T(type=T.t_complex_table_cell, tagname=tagname, start=tokens[start].start, children=sub, vlist=tokens[start].vlist, is_header=is_header)] if search_modifier: self.find_modifier(tokens[start])
def close_stack(self, spos, tokens, pos): close = self.stack[spos:] del self.stack[spos:] close.reverse() for i, t in close: vlist = tokens[i].vlist display = vlist.get("style", {}).get("display", "").lower() if display == "inline": blocknode = False elif display == "block": blocknode = True else: blocknode = t.blocknode sub = tokens[i + 1:pos] tokens[i:pos] = [ T(type=T.t_complex_tag, children=sub, tagname=t.tagname, blocknode=blocknode, vlist=tokens[i].vlist) ] pos = i + 1 return pos
def extract_garbage(tokens, is_allowed, is_whitespace=None): if is_whitespace is None: def is_whitespace(t): return t.type in (T.t_newline, T.t_break) res = [] i = 0 start = None while i < len(tokens): if is_whitespace(tokens[i]): if start is None: start = i i += 1 elif is_allowed(tokens[i]): start = None i += 1 else: if start is None: start = i i += 1 # find end of garbage while i < len(tokens): if is_allowed(tokens[i]): break i += 1 garbage = tokens[start:i] del tokens[start:i] i = start res.append(T(type=T.t_complex_node, children=garbage)) return res
def create_gallery(self, name, vlist, inner, xopts): sub = _parse_gallery_txt(inner, xopts) return T(type=T.t_complex_tag, tagname="gallery", vlist=vlist, children=sub, blocknode=True)
def run(self): tokens = self.tokens i = 0 start = None while i < len(tokens): t = tokens[i] if t.type == T.t_pre: assert start is None start = i i += 1 elif t.type == T.t_newline and start is not None: sub = tokens[start + 1:i + 1] if start > 0 and tokens[start - 1].type == T.t_complex_preformatted: del tokens[start:i + 1] tokens[start - 1].children.extend(sub) i = start else: tokens[start:i + 1] = [ T(type=T.t_complex_preformatted, children=sub, blocknode=True) ] i = start + 1 start = None elif t.blocknode or (t.type == T.t_complex_tag and t.tagname in ("blockquote", "table", "timeline", "div")): start = None i += 1 else: i += 1
def replace_tablecaption(self, children): i = 0 while i < len(children): if children[i].type == T.t_tablecaption: children[i].type = T.t_special children[i].text = u"|" children.insert(i + 1, T(type=T.t_text, text="+")) i += 1
def splitdl(self, item): for i, x in enumerate(item.children): if x.type == T.t_special and x.text == ':': s = T(type=T.t_complex_style, caption=':', children=item.children[i + 1:]) del item.children[i:] return s
def _create_generic(self, name, vlist, inner, xopts): if not vlist: vlist = {} if name in self.tagextensions: node = self.tagextensions[name](inner, vlist) if node is None: retval = None else: retval = T(type=T.t_complex_compat, compatnode=node) return retval children = [T(type=T.t_text, text=inner)] return T(type=T.t_complex_tag, tagname=name, vlist=vlist, children=children)
def create(delta=1): sub = tokens[first:i] if sub: tokens[first:i + delta] = [ T(type=T.t_complex_tag, tagname='p', children=sub, blocknode=True) ]
def create(): if not state or i <= start: return False children = tokens[start:i] for tag, tok in state.items(): outer = T(type=T.t_complex_tag, tagname=tag, children=children, vlist=tok.vlist) children = [outer] tokens[start:i] = [outer] return True
def create_imagemap(self, name, vlist, inner, xopts): from mwlib import imgmap txt = inner t = T(type=T.t_complex_tag, tagname="imagemap", vlist=vlist) t.imagemap = imgmap.ImageMapFromString(txt) if t.imagemap.image: t.imagemap.imagelink = None s = u"[[" + t.imagemap.image + u"]]" res = parse_txt(s, xopts) if res and res[0].type == T.t_complex_link and res[0].ns == 6: t.imagemap.imagelink = res[0] return t
def create(): if current.start is None or current.endtitle is None: return False l1 = tokens[current.start].text.count("=") l2 = tokens[current.endtitle].text.count("=") level = min(l1, l2) # FIXME: make this a caption caption = T(type=T.t_complex_node, children=tokens[current.start + 1:current.endtitle]) if l2 > l1: caption.children.append(T(type=T.t_text, text=u"=" * (l2 - l1))) elif l1 > l2: caption.children.insert( 0, T(type=T.t_text, text=u"=" * (l1 - l2))) body = T(type=T.t_complex_node, children=tokens[current.endtitle + 1:i]) sect = T(type=T.t_complex_section, tagname="@section", children=[caption, body], level=level, blocknode=True) tokens[current.start:i] = [sect] while sections and level <= sections[-1].level: sections.pop() if sections: sections[-1].children.append(tokens[current.start]) del tokens[current.start] current.start -= 1 sections.append(sect) return True
def fixlitags(tokens, xopts): root = T(type=T.t_complex_tag, tagname="div") todo = [(root, tokens)] while todo: parent, tokens = todo.pop() if parent.tagname not in ("ol", "ul"): idx = 0 while idx < len(tokens): start = idx while idx < len(tokens) and tokens[idx].tagname == "li": idx += 1 if idx > start: lst = T(type=T.t_complex_tag, tagname="ul", children=tokens[start:idx]) tokens[start:idx + 1] = [lst] idx = start + 1 else: idx += 1 for t in tokens: if t.children: todo.append((t, t.children))
def appendline(): line = lines[startpos] if endtag: for i, x in enumerate(line.children): if x.rawtagname == endtag and x.type == T.t_html_tag_end: after = line.children[i + 1:] del line.children[i:] item.children.append(line) lines[startpos] = T(type=T.t_complex_line, tagname="p", lineprefix=None, children=after) return item.children.append(lines[startpos]) del lines[startpos]
def maketable(): start = stack.pop() starttoken = tokens[start] sub = tokens[start + 1:i] from mwlib.refine import core tp = core.tagparser() tp.add("caption", 5) tp(sub, self.xopts) tokens[start:i + 1] = [T(type=T.t_complex_table, tagname="table", start=tokens[start].start, children=sub, vlist=starttoken.vlist, blocknode=True)] if starttoken.text.strip() == "{|": self.find_modifier(tokens[start]) self.handle_rows(sub) self.find_caption(tokens[start]) return start
def _parse_gallery_txt(txt, xopts): lines = [x.strip() for x in txt.split("\n")] sub = [] for x in lines: if not x: continue assert xopts.expander is not None, "no expander in _parse_gallery_txt" xnew = xopts.expander.parseAndExpand(x, keep_uniq=True) linode = parse_txt(u'[[' + xnew + ']]', xopts) if linode: n = linode[0] if n.ns == nshandling.NS_IMAGE: sub.append(n) continue sub.append(T(type=T.t_text, text=xnew)) return sub
def create_ref(self, name, vlist, inner, xopts): expander = xopts.expander if expander is not None and inner: inner = expander.parseAndExpand(inner, True) if inner: # <ref>* not an item</ref> children = parse_txt("<br />" + inner, xopts) if children[0].children: # paragraph had been created... del children[0].children[0] else: del children[0] else: children = [] return T(type=T.t_complex_tag, tagname="ref", vlist=vlist, children=children)
def create_poem(self, name, vlist, inner, xopts): expander = xopts.expander if expander is not None and inner: inner = expander.parseAndExpand(inner, True) res = [] res.append(u"\n") for line in inner.split("\n"): if line.strip(): res.append(":") if line.startswith(" "): res.append(u" ") res.append(line.strip()) res.append(u"\n") res.append(u"\n") res = u"".join(res) children = parse_txt(res, xopts) return T(type=T.t_complex_tag, tagname="poem", vlist=vlist, children=children)
def find_caption(self, table): children = table.children start = None i = 0 while i < len(children): t = children[i] if t.type == T.t_tablecaption: start = i i += 1 break if t.text is None or t.text.strip(): return i += 1 modifier = None while i < len(children): t = children[i] if t.tagname not in ("ref", ) and (t.text is None or t.text.startswith("\n")): if modifier: mod = T.join_as_text(children[start:modifier]) vlist = util.parseParams(mod) sub = children[modifier + 1:i] else: sub = children[start + 1:i] vlist = {} caption = T(type=T.t_complex_caption, children=sub, vlist=vlist) children[start:i] = [caption] return elif t.text == "|" and modifier is None: modifier = i elif t.type == T.t_2box_open and modifier is None: modifier = 0 i += 1
def create_pages(self, name, vlist, inner, xopts): expander = xopts.expander if not vlist: vlist = {} s = vlist.get("from") e = vlist.get("to") children = [] if s and e and expander: nshandler = expander.nshandler page_ns = nshandler._find_namespace("Page")[1] try: si = int(s) ei = int(e) except ValueError: s = nshandler.get_fqname(s, page_ns) e = nshandler.get_fqname(e, page_ns) pages = expander.db.select(s, e) else: base = vlist.get("index", "") base = nshandler.get_fqname(base, page_ns) pages = [u"%s/%s" % (base, i) for i in range(si, ei + 1)] rawtext = u"".join(u"{{%s}}\n" % x for x in pages) te = expander.__class__(rawtext, pagename=expander.pagename, wikidb=expander.db) children = parse_txt(te.expandTemplates(True), xopts=XBunch(**xopts.__dict__), expander=te, uniquifier=te.uniquifier) return T(type=T.t_complex_tag, tagname=name, vlist=vlist, children=children)
def create_timeline(self, name, vlist, inner, xopts): return T(type=T.t_complex_tag, tagname="timeline", vlist=vlist, timeline=inner, blocknode=True)
def run(self): tokens = self.tokens i = 0 lines = [] startline = None firsttoken = None def getlineprefix(): return (tokens[startline].text or "").strip() while i < len(self.tokens): t = tokens[i] if t.type in (T.t_item, T.t_colon): if firsttoken is None: firsttoken = i startline = i i += 1 elif t.type == T.t_newline and startline is not None: sub = self.tokens[startline + 1:i + 1] lines.append( T(type=T.t_complex_line, start=tokens[startline].start, len=0, children=sub, lineprefix=getlineprefix())) startline = None i += 1 elif t.type == T.t_break: if startline is not None: sub = self.tokens[startline + 1:i] lines.append( T(type=T.t_complex_line, start=tokens[startline].start, len=0, children=sub, lineprefix=getlineprefix())) startline = None if lines: self.analyze(lines) self.tokens[firsttoken:i] = lines i = firsttoken firsttoken = None lines = [] continue firsttoken = None lines = [] i += 1 else: if startline is None and lines: self.analyze(lines) self.tokens[firsttoken:i] = lines i = firsttoken lines = [] firsttoken = None else: i += 1 if startline is not None: sub = self.tokens[startline + 1:] lines.append( T(type=T.t_complex_line, start=tokens[startline].start, children=sub, lineprefix=getlineprefix())) if lines: self.analyze(lines) self.tokens[firsttoken:] = lines
def run(self): tokens = self.tokens i = 0 start = None remove_start = 1 rowbegintoken = None def should_find_modifier(): if rowbegintoken is None: return False if rowbegintoken.rawtagname: return False return True def args(): if rowbegintoken is None: return {} return dict(vlist=rowbegintoken.vlist) while i < len(tokens): if start is None and self.is_table_cell_start(tokens[i]): rowbegintoken = None start = i remove_start = 0 i += 1 elif self.is_table_row_start(tokens[i]): if start is not None: children = tokens[start + remove_start:i] tokens[start:i] = [T(type=T.t_complex_table_row, tagname="tr", start=tokens[start].start, children=children, **args())] if should_find_modifier(): self.find_modifier(tokens[start]) parse_table_cells(children, self.xopts) start += 1 # we didn't remove the start symbol above rowbegintoken = tokens[start] remove_start = 1 i = start + 1 else: rowbegintoken = tokens[i] remove_start = 1 start = i i += 1 elif self.is_table_row_end(tokens[i]): if start is not None: sub = tokens[start + remove_start:i] tokens[start:i + 1] = [T(type=T.t_complex_table_row, tagname="tr", start=tokens[start].start, children=sub, **args())] if should_find_modifier(): self.find_modifier(tokens[start]) parse_table_cells(sub, self.xopts) i = start + 1 start = None rowbegintoken = None else: i += 1 else: i += 1 if start is not None: sub = tokens[start + remove_start:] tokens[start:] = [T(type=T.t_complex_table_row, tagname="tr", start=tokens[start].start, children=sub, **args())] if should_find_modifier(): self.find_modifier(tokens[start]) parse_table_cells(sub, self.xopts)
def create_nowiki(self, name, vlist, inner, xopts): txt = inner txt = util.replace_html_entities(txt) return T(type=T.t_text, text=txt)
def run(self): tokens = self.tokens i = 0 marks = [] stack = [] while i < len(self.tokens): t = tokens[i] if t.type == T.t_2box_open: if len(marks) > 1: stack.append(marks) marks = [i] i += 1 elif t.type == T.t_newline and len(marks) < 2: if stack: marks = stack.pop() else: marks = [] i += 1 elif t.type == T.t_special and t.text == "|": marks.append(i) i += 1 elif t.type == T.t_2box_close and marks: marks.append(i) start = marks[0] target = T.join_as_text(tokens[start + 1:marks[1]]).strip() target = target.strip(u"\u200e\u200f") if target.startswith(":"): target = target[1:] colon = True else: colon = False ilink = self.nshandler.resolve_interwiki(target) if ilink: url = ilink.url ns = None partial = ilink.partial langlink = ilink.language interwiki = ilink.prefix full = None else: if target.startswith('/') and self.xopts.title: ns, partial, full = self.nshandler.splitname( self.xopts.title + target) if full.endswith('/'): full = full[:-1] target = target[1:-1] else: ns, partial, full = self.nshandler.splitname(target) if self.xopts.wikidb is not None: url = self.xopts.wikidb.getURL(full) else: url = None langlink = None interwiki = None if not ilink and not partial: i += 1 if stack: marks = stack.pop() else: marks = [] continue node = T(type=T.t_complex_link, children=[], ns=ns, colon=colon, lang=self.lang, nshandler=self.nshandler, url=url) if langlink: node.langlink = langlink if interwiki: node.interwiki = interwiki sub = None if ns == nshandling.NS_IMAGE: sub = self.extract_image_modifiers(marks, node) elif len(marks) > 2: sub = tokens[marks[1] + 1:marks[-1]] if sub is None: sub = [] node.children = sub tokens[start:i + 1] = [node] node.target = target node.full_target = full if stack: marks = stack.pop() else: marks = [] i = start + 1 else: i += 1
def create_math(self, name, vlist, inner, xopts): return T(type=T.t_complex_tag, tagname="math", vlist=vlist, math=inner)