def wiki2table(content): """ Get text. Return information in table of that text: Header List of data Known issues: What is the meaning of '||' at the beginning of line? """ try: content = re2.find(ur"(?ms)^\{\|.*?^\|\}", content) except AttributeError: raise libexception.TableError content = re2.subr(ur"(?m)(^\!.*?)\!\!", u"\\1\n!", content) content = re2.subr(ur"(?m)(^\|.*?)\|\|", u"\\1\n|", content) header = [] lines = content.split(u"\n") for line in lines: if line.startswith(u"!"): header.append(line[1:].strip()) table = [] linelist = [] for line in lines: if line.startswith(u"|-") or line.startswith(u"|}"): if linelist: table.append(linelist) linelist = [] elif line.startswith(u"|"): linelist.append(line[1:].strip()) for line in table: if len(line) != len(header): raise libexception.TableError header = (re2.find(ur"(?m)^\{\|.*?$", content), header) return header, table
def extract(key, text): """ Extract key from text. Note that both key and text must be enunicode strings. """ if key: try: dat = preload.enunicode(re2.find(u"(?m)^\* " + re2.escape(key) + u": (.*?)$", text, 1)) except AttributeError: dat = None else: lines = text.strip().split(u"\n") dat = {} for line in lines: key, value = line.split(u": ") dat[key] = value return dat