def getXHTML(wikitext): db = DummyDB() r = parseString(title="test", raw=wikitext, wikidb=db) preprocess(r) show(sys.stdout, r) dbw = MWXHTMLWriter() dbw.writeBook(r) return dbw.asstring()
def simpleparse(raw): # !!! USE FOR DEBUGGING ONLY !!! import sys from mwlib import dummydb, parser from mwlib.uparser import parseString input = raw.decode('utf8') r = parseString(title="title", raw=input, wikidb=dummydb.DummyDB()) buildAdvancedTree(r) parser.show(sys.stdout, r, 0) return r
def simpleparse(raw): # !!! USE FOR DEBUGGING ONLY !!! does not use post processors import sys from mwlib import dummydb db = dummydb.DummyDB() tokens = scanner.tokenize(raw) r=parser.Parser(tokens, "unknown").parse() parser.show(sys.stdout, r, 0) return r
def simpleparse( raw): # !!! USE FOR DEBUGGING ONLY !!! does not use post processors import sys from mwlib import dummydb db = dummydb.DummyDB() tokens = scanner.tokenize(raw) r = parser.Parser(tokens, "unknown").parse() parser.show(sys.stdout, r, 0) return r
def getXML(wikitext): db = DummyDB() r = parseString(title="test", raw=wikitext, wikidb=db) print "before preprocess" show(sys.stdout, r) preprocess(r) print "after preprocess" show(sys.stdout, r) dbw = DocBookWriter() dbw.dbwriteArticle(r) return dbw.asstring()
def main(): for fn in sys.argv[1:]: from mwlib.dummydb import DummyDB from mwlib.uparser import parseString db = DummyDB() input = unicode(open(fn).read(), 'utf8') r = parseString(title=fn, raw=input, wikidb=db) parser.show(sys.stdout, r) preprocess(r) parser.show(sys.stdout, r) dbw = MWXHTMLWriter() dbw.writeBook(r) nf = open("%s.html" % fn, "w") nf.write(dbw.asstring())
def xwriteGenericElement(self, t): if not hasattr(t, "starttext"): if hasattr(t, "_tag"): e = ET.Element(t._tag) setVList(e, t) return e else: return else: # parse html and return ET elements stuff = t.starttext + t.endtext try: if not t.endtext and not "/" in t.starttext: stuff = t.starttext[:-1] + "/>" p = ET.fromstring(stuff) except Exception, e: parser.show(sys.stdout, t) #raise e p = None
def test_definitiondescription(): raw = u""" == test == :One ::Two :::Three ::::Four """ db = DummyDB() r = parseString(title="t", raw=raw, wikidb=db) parser.show(sys.stdout, r) buildAdvancedTree(r) dd = r.getChildNodesByClass(DefinitionDescription) print "DD:", dd for c in dd: assert c.indentlevel == 1 assert len(dd) == 4
def main(): for fn in sys.argv[1:]: from mwlib.dummydb import DummyDB from mwlib.uparser import parseString db = DummyDB() input = unicode(open(fn).read(), 'utf8') r = parseString(title=fn, raw=input, wikidb=db) #parser.show(sys.stdout, r) # advtree.buildAdvancedTree(r) #tc = TreeCleaner(r) # tc.cleanAll() preprocess(r) parser.show(sys.stdout, r) odf = ODFWriter() odf.writeTest(r) doc = odf.getDoc() # doc.toXml("%s.xml"%fn) doc.save(fn, True)
def main(): for fn in sys.argv[1:]: from mwlib.dummydb import DummyDB from mwlib.uparser import parseString db = DummyDB() input = unicode(open(fn).read(), 'utf8') r = parseString(title=fn, raw=input, wikidb=db) #parser.show(sys.stdout, r) #advtree.buildAdvancedTree(r) #tc = TreeCleaner(r) #tc.cleanAll() preprocess(r) parser.show(sys.stdout, r) odf = ODFWriter() odf.writeTest(r) doc = odf.getDoc() #doc.toXml("%s.xml"%fn) doc.save(fn, True)
def xwriteGenericElement(self, t): if not hasattr(t, "starttext"): if hasattr(t, "_tag"): e = ET.Element(t._tag) setVList(e, t) return e else: log("skipping %r" % t) return else: # parse html and return ET elements stuff = t.starttext + t.endtext try: if not t.endtext and not "/" in t.starttext: stuff = t.starttext[:-1] + "/>" p = ET.fromstring(stuff) except Exception, e: log("failed to parse %r \n" % t) parser.show(sys.stdout, t) #raise e p = None
import codecs import sys import wcb if __name__ == "__main__": argparser = argparse.ArgumentParser(description='Prints the syntax tree of an article') argparser.add_argument('--advanced', '-a', action='store_true', help="Convert to advtree") argparser.add_argument('article') argparser.add_argument('--file', '-f', action='store_true', help="read article from FILE") args = argparser.parse_args() env = wiki.makewiki(wcb.paths["wikiconf"]) if args.file: try: f = codecs.open(args.article, encoding='utf-8') raw = f.read() except ValueError as excp: sys.exit(unicode(excp).encode("ascii", "backslashreplace") + "\n") tree = uparser.parseString(title='Nameless', raw=raw, wikidb=env.wiki, lang=env.wiki.siteinfo["general"]["lang"]) else: tree = env.wiki.getParsedArticle(args.article) if tree: if args.advanced: advtree.buildAdvancedTree(tree) parser.show(sys.stdout, tree, 0) else: print 'Could not find article "' + args.article + '"'
def showTree(tree): parser.show(sys.stdout, tree, 0)
def show(tree): parser.show(sys.stdout, tree)
def writeparsetree(self, tree): out = StringIO.StringIO() parser.show(out, tree) self.root.append(ET.Comment(out.getvalue().replace("--", " - - ")))