def parse_inputbox(tokens, xopts): get_recursive_tag_parser("inputbox")(tokens, xopts) for t in tokens: if t.tagname == 'inputbox': t.inputbox = T.join_as_text(t.children) del t.children[:]
def extract_image_modifiers(self, marks, node): cap = None for i in range(1, len(marks) - 1): tmp = self.tokens[marks[i] + 1:marks[i + 1]] if not self.handle_image_modifier(T.join_as_text(tmp), node): cap = tmp return cap
def find_modifier(self, row): children = row.children for i, x in enumerate(children): if x.type in (T.t_newline, T.t_break): mod = T.join_as_text(children[:i]) #print "MODIFIER:", repr(mod) row.vlist = util.parseParams(mod) del children[:i] return
def find_modifier(self, cell): children = cell.children if not children: return for i, x in enumerate(children): t = children[i] if t.type == T.t_2box_open: break if t.type == T.t_special and t.text == "|": mod = T.join_as_text(children[:i]) cell.vlist = util.parseParams(mod) del children[:i + 1] return
def find_caption(self, table): children = table.children start = None i = 0 while i < len(children): t = children[i] if t.type == T.t_tablecaption: start = i i += 1 break if t.text is None or t.text.strip(): return i += 1 modifier = None while i < len(children): t = children[i] if t.tagname not in ("ref", ) and (t.text is None or t.text.startswith("\n")): if modifier: mod = T.join_as_text(children[start:modifier]) vlist = util.parseParams(mod) sub = children[modifier + 1:i] else: sub = children[start + 1:i] vlist = {} caption = T(type=T.t_complex_caption, children=sub, vlist=vlist) children[start:i] = [caption] return elif t.text == "|" and modifier is None: modifier = i elif t.type == T.t_2box_open and modifier is None: modifier = 0 i += 1
def find_caption(self, table): children = table.children start = None i = 0 while i < len(children): t = children[i] if t.type == T.t_tablecaption: start = i i += 1 break if t.text is None or t.text.strip(): return i += 1 modifier = None while i < len(children): t = children[i] if t.tagname not in ("ref",) and (t.text is None or t.text.startswith("\n")): if modifier: mod = T.join_as_text(children[start:modifier]) vlist = util.parseParams(mod) sub = children[modifier + 1:i] else: sub = children[start + 1:i] vlist = {} caption = T(type=T.t_complex_caption, children=sub, vlist=vlist) children[start:i] = [caption] return elif t.text == "|" and modifier is None: modifier = i elif t.type == T.t_2box_open and modifier is None: modifier = 0 i += 1
def compute_mod(): mod = T.join_as_text(children[:i]) #print "MODIFIER:", repr(mod) table.vlist = util.parseParams(mod) del children[:i]
def test_ebad_in_text(): txt = T.join_as_text(core.parse_txt(u"foo\uebadbar")) assert txt == "foobar", "\uebad should be stripped"
def run(self): tokens = self.tokens i = 0 marks = [] stack = [] while i < len(self.tokens): t = tokens[i] if t.type == T.t_2box_open: if len(marks) > 1: stack.append(marks) marks = [i] i += 1 elif t.type == T.t_newline and len(marks) < 2: if stack: marks = stack.pop() else: marks = [] i += 1 elif t.type == T.t_special and t.text == "|": marks.append(i) i += 1 elif t.type == T.t_2box_close and marks: marks.append(i) start = marks[0] target = T.join_as_text(tokens[start + 1:marks[1]]).strip() target = target.strip(u"\u200e\u200f") if target.startswith(":"): target = target[1:] colon = True else: colon = False ilink = self.nshandler.resolve_interwiki(target) if ilink: url = ilink.url ns = None partial = ilink.partial langlink = ilink.language interwiki = ilink.prefix full = None else: if target.startswith('/') and self.xopts.title: ns, partial, full = self.nshandler.splitname(self.xopts.title + target) if full.endswith('/'): full = full[:-1] target = target[1:-1] else: ns, partial, full = self.nshandler.splitname(target) if self.xopts.wikidb is not None: url = self.xopts.wikidb.getURL(full) else: url = None langlink = None interwiki = None if not ilink and not partial: i += 1 if stack: marks = stack.pop() else: marks = [] continue node = T(type=T.t_complex_link, children=[], ns=ns, colon=colon, lang=self.lang, nshandler=self.nshandler, url=url) if langlink: node.langlink = langlink if interwiki: node.interwiki = interwiki sub = None if ns == nshandling.NS_IMAGE: sub = self.extract_image_modifiers(marks, node) elif len(marks) > 2: sub = tokens[marks[1] + 1:marks[-1]] if sub is None: sub = [] node.children = sub tokens[start:i + 1] = [node] node.target = target node.full_target = full if stack: marks = stack.pop() else: marks = [] i = start + 1 else: i += 1
def test_ebad_in_text(): txt = T.join_as_text(core.parse_txt(u"foo\uebadbar")) assert txt=="foobar", "\uebad should be stripped"
def run(self): tokens = self.tokens i = 0 marks = [] stack = [] while i < len(self.tokens): t = tokens[i] if t.type == T.t_2box_open: if len(marks) > 1: stack.append(marks) marks = [i] i += 1 elif t.type == T.t_newline and len(marks) < 2: if stack: marks = stack.pop() else: marks = [] i += 1 elif t.type == T.t_special and t.text == "|": marks.append(i) i += 1 elif t.type == T.t_2box_close and marks: marks.append(i) start = marks[0] target = T.join_as_text(tokens[start + 1:marks[1]]).strip() target = target.strip(u"\u200e\u200f") if target.startswith(":"): target = target[1:] colon = True else: colon = False ilink = self.nshandler.resolve_interwiki(target) if ilink: url = ilink.url ns = None partial = ilink.partial langlink = ilink.language interwiki = ilink.prefix full = None else: if target.startswith('/') and self.xopts.title: ns, partial, full = self.nshandler.splitname( self.xopts.title + target) if full.endswith('/'): full = full[:-1] target = target[1:-1] else: ns, partial, full = self.nshandler.splitname(target) if self.xopts.wikidb is not None: url = self.xopts.wikidb.getURL(full) else: url = None langlink = None interwiki = None if not ilink and not partial: i += 1 if stack: marks = stack.pop() else: marks = [] continue node = T(type=T.t_complex_link, children=[], ns=ns, colon=colon, lang=self.lang, nshandler=self.nshandler, url=url) if langlink: node.langlink = langlink if interwiki: node.interwiki = interwiki sub = None if ns == nshandling.NS_IMAGE: sub = self.extract_image_modifiers(marks, node) elif len(marks) > 2: sub = tokens[marks[1] + 1:marks[-1]] if sub is None: sub = [] node.children = sub tokens[start:i + 1] = [node] node.target = target node.full_target = full if stack: marks = stack.pop() else: marks = [] i = start + 1 else: i += 1