def show(): parser = optparse.OptionParser() parser.add_option("-c", "--config", help="configuration file/URL/shortcut") parser.add_option("-e", "--expand", action="store_true", help="expand templates") parser.add_option("-t", "--template", action="store_true", help="show template") parser.add_option("-f", help='read input from file. implies -e') options, args = parser.parse_args() if not args and not options.f: parser.error("missing ARTICLE argument") articles = [unicode(x, 'utf-8') for x in args] conf = options.config if not conf: parser.error("missing --config argument") from mwlib import wiki, expander db = wiki.makewiki(conf).wiki for a in articles: if options.template: defaultns = 10 else: defaultns = 0 page = db.normalize_and_get_page(a, defaultns) if page: raw = page.rawtext else: raw = None if raw: if options.expand: te = expander.Expander(raw, pagename=a, wikidb=db) raw = te.expandTemplates() print raw.encode("utf-8") if options.f: raw = unicode(open(options.f).read(), 'utf-8') te = expander.Expander(raw, pagename='test', wikidb=db) raw = te.expandTemplates() print raw.encode("utf-8")
def expandArticle(self, article_text, title): template_expander = expander.Expander(article_text, pagename=title, wikidb=self, lang=self.lang, templateprefix=self.templateprefix, templateblacklist=self.templateblacklist) expanded_article = template_expander.expandTemplates() return expanded_article
def test_undefined_variable(): db = DictDB(Art="{{Pipe}}", Pipe="{{{undefined_variable}}}") te = expander.Expander(db.normalize_and_get_page( "Art", 0).rawtext, pagename="thispage", wikidb=db) res = te.expandTemplates() print "EXPANDED:", repr(res) assert u"{{{undefined_variable}}}" in res, "wrong expansion for undefined variable"
def test_alfred(): """I start to hate that Alfred_Gusenbauer""" db = DictDB(a="{{ibox2|birth_date=1960}}", ibox2="{{{birth{{#if:{{{birthdate|}}}||_}}date}}}") te = expander.Expander(db.normalize_and_get_page("a", 0).rawtext, pagename="thispage", wikidb=db) res = te.expandTemplates() print "EXPANDED:", repr(res) assert "1960" in res
def test_preprocess_uniq_after_comment(): s = u""" <!-- these <ref> tags should be ignored: <ref> --> foo was missing<ref>bar</ref> <!-- some comment--> baz <references /> """ e = expander.Expander(s, pagename="test", wikidb=DictDB()) raw = e.expandTemplates() print repr(raw) assert u"foo was missing" in raw, "text is missing"
def test_pipe_table(): db = DictDB(Foo=""" bla {{{ {{Pipe}}} blubb """, Pipe="|") te = expander.Expander(db.normalize_and_get_page("Foo", 0).rawtext, pagename="thispage", wikidb=db) res = te.expandTemplates() print "EXPANDED:", repr(res) assert "bla" in res assert "blubb" in res
def main(): #import htmlwriter from mwlib.dummydb import DummyDB db = DummyDB() for x in sys.argv[1:]: input = unicode(open(x).read(), 'utf8') from mwlib import expander te = expander.Expander(input, pagename=x, wikidb=db) input = te.expandTemplates() tokens = tokenize(input, x) p = Parser(tokens, os.path.basename(x)) r = p.parse() show(sys.stdout, r, 0)
def extract_metadata(raw, fields, template_name="saved_book"): fields = list(fields) fields.append("") templ = "".join(u"%s%s\n{{{%s|}}}\n" % (uniq, f, f) for f in fields) db = expander.DictDB({template_name:templ}) te = expander.Expander(raw, pagename="", wikidb=db) res = te.expandTemplates() d = defaultdict(unicode) for x in res.split(uniq)[1:-1]: name, val = x.split("\n", 1) val = val.strip() d[name] = val return d
def test_switch_default(): db = DictDB( Bonn="""{{Infobox |Bundesland = Nordrhein-Westfalen }} """, Infobox="""{{#switch: {{{Bundesland}}} | Bremen = [[Bremen (Land)|Bremen]] | #default = [[{{{Bundesland|Bayern}}}]] }} """) te = expander.Expander(db.normalize_and_get_page("Bonn", 0).rawtext, pagename="thispage", wikidb=db) res = te.expandTemplates() print "EXPANDED:", repr(res) assert "Nordrhein-Westfalen" in res
def parseString(title=None, raw=None, wikidb=None, revision=None): """parse article with title from raw mediawiki text""" assert title is not None if raw is None: raw = wikidb.getRawArticle(title, revision=revision) assert raw is not None, "cannot get article %r" % (title, ) if wikidb: te = expander.Expander(raw, pagename=title, wikidb=wikidb) input = te.expandTemplates() else: input = raw tokens = scanner.tokenize(input, title) a = parser.Parser(tokens, title).parse() a.caption = title for x in postprocessors: x(a) return a
def show(): parser = optparse.OptionParser( usage="%prog [-e|--expand] --conf CONF ARTICLE [...]") parser.add_option("-c", "--conf", help="config file") parser.add_option("-e", "--expand", action="store_true", help="expand templates") parser.add_option("-t", "--template", action="store_true", help="show template") options, args = parser.parse_args() if not args: parser.error("missing ARTICLE argument") articles = [unicode(x, 'utf-8') for x in args] conf = options.conf if not conf: parser.error("missing --conf argument") from mwlib import wiki, expander db = wiki.makewiki(conf)['wiki'] for a in articles: if options.template: raw = db.getTemplate(a) else: raw = db.getRawArticle(a) if raw: if options.expand: te = expander.Expander(raw, pagename=a, wikidb=db) raw = te.expandTemplates() print raw.encode("utf-8")
def parseString( title=None, raw=None, wikidb=None, revision=None, lang=None, interwikimap=None, ): """parse article with title from raw mediawiki text""" assert title is not None, 'no title given' if raw is None: raw = wikidb.getRawArticle(title, revision=revision) assert raw is not None, "cannot get article %r" % (title, ) if wikidb: te = expander.Expander(raw, pagename=title, wikidb=wikidb) input = te.expandTemplates() if lang is None and hasattr(wikidb, 'getSource'): src = wikidb.getSource(title, revision=revision) if src: lang = src.get('language') if interwikimap is None and hasattr(wikidb, 'getInterwikiMap'): interwikimap = wikidb.getInterwikiMap(title, revision=revision) else: input = raw tokens = scanner.tokenize(input, title) a = parser.Parser(tokens, title, lang=lang, interwikimap=interwikimap).parse() a.caption = title for x in postprocessors: x(a, title=title, revision=revision, wikidb=wikidb, lang=lang) return a
def parseString(title=None, raw=None, wikidb=None, revision=None, lang=None, magicwords=None, expandTemplates=True): """parse article with title from raw mediawiki text""" uniquifier = None siteinfo = None assert title is not None, 'no title given' if raw is None: page = wikidb.normalize_and_get_page(title, 0) if page: raw = page.rawtext else: raw = None assert raw is not None, "cannot get article %r" % (title, ) input = raw te = None if wikidb: if expandTemplates: te = expander.Expander(raw, pagename=title, wikidb=wikidb) input = te.expandTemplates(True) uniquifier = te.uniquifier if hasattr(wikidb, 'get_siteinfo'): siteinfo = wikidb.get_siteinfo() src = None if hasattr(wikidb, 'getSource'): src = wikidb.getSource(title, revision=revision) assert not isinstance(src, dict) if not src: src = metabook.source() if lang is None: lang = src.language if magicwords is None: if siteinfo is not None and 'magicwords' in siteinfo: magicwords = siteinfo['magicwords'] else: magicwords = src.get('magicwords') if siteinfo is None: nshandler = nshandling.get_nshandler_for_lang(lang) else: nshandler = nshandling.nshandler(siteinfo) a = compat.parse_txt(input, title=title, wikidb=wikidb, nshandler=nshandler, lang=lang, magicwords=magicwords, uniquifier=uniquifier, expander=te) a.caption = title if te and te.magic_displaytitle: a.caption = te.magic_displaytitle from mwlib.old_uparser import postprocessors for x in postprocessors: x(a, title=title, revision=revision, wikidb=wikidb, lang=lang) return a
def test_resolve_magic_alias(): db = DummyDB("nl") e = expander.Expander(u"{{#als: 1 | yes | no}}", wikidb=db) assert e.resolve_magic_alias(u"#als") == u"#if" assert e.resolve_magic_alias(u"#foobar") is None
def test_localized_expr(): db = DummyDB("nl") e = expander.Expander(u"{{#expressie: 1+2*3}}", wikidb=db) res = e.expandTemplates() assert res == "7"
def test_localized_switch_default(): db = DummyDB("nl") e = expander.Expander(u"{{#switch: 1 | #standaard=foobar}}", wikidb=db) res = e.expandTemplates() assert res == "foobar"
def test_localized_expander(): db = DummyDB("nl") e = expander.Expander(u"{{#als: 1 | yes | no}}", wikidb=db) res = e.expandTemplates() assert res == "yes"
|  [[Digital object identifier|DOI]]:[http://dx.doi.org/{{{doi|{{{doilabel|}}}}}} {{{doi}}}]. }}{{#if: {{{accessdate|}}} |  Retrieved on [[{{{accessdate}}}]]{{#if: {{{accessyear|}}} | , [[{{{accessyear}}}]] }}. }}{{#if: {{{accessmonthday|}}} |  Retrieved on {{{accessmonthday}}}{{#if: {{{accessyear|}}} | , {{{accessyear}}} }}. }}{{#if: {{{accessdaymonth|}}} |  Retrieved on {{{accessdaymonth}}}{{#if: {{{accessyear|}}} |  {{{accessyear}}} }}. }}{{#if: {{{quote|}}} | “{{{quote}}}” }}</includeonly><noinclude> {{pp-template|small=yes}} {{Documentation}} <!-- PLEASE ADD CATEGORIES AND INTERWIKIS TO THE /doc SUBPAGE, THANKS --> </noinclude> """ import time from mwlib import expander snippet = """ {{citeweb|url=http://www.webbyawards.com/webbys/winners-2004.php|title=Webby Awards 2004|publisher=The International Academy of Digital Arts and Sciences|date=2004|accessdate=2007-06-19}} """ db = expander.DictDB(citeweb=citeweb) e = expander.Expander(snippet * 1000, pagename='test', wikidb=db) stime = time.time() e.expandTemplates() print time.time() - stime