Exemplo n.º 1
0
def show():
    parser = optparse.OptionParser()
    parser.add_option("-c", "--config", help="configuration file/URL/shortcut")
    parser.add_option("-e",
                      "--expand",
                      action="store_true",
                      help="expand templates")
    parser.add_option("-t",
                      "--template",
                      action="store_true",
                      help="show template")
    parser.add_option("-f", help='read input from file. implies -e')

    options, args = parser.parse_args()

    if not args and not options.f:
        parser.error("missing ARTICLE argument")

    articles = [unicode(x, 'utf-8') for x in args]

    conf = options.config
    if not conf:
        parser.error("missing --config argument")

    from mwlib import wiki, expander

    db = wiki.makewiki(conf).wiki

    for a in articles:
        if options.template:
            defaultns = 10
        else:
            defaultns = 0

        page = db.normalize_and_get_page(a, defaultns)
        if page:
            raw = page.rawtext
        else:
            raw = None

        if raw:
            if options.expand:
                te = expander.Expander(raw, pagename=a, wikidb=db)
                raw = te.expandTemplates()

            print raw.encode("utf-8")
    if options.f:
        raw = unicode(open(options.f).read(), 'utf-8')
        te = expander.Expander(raw, pagename='test', wikidb=db)
        raw = te.expandTemplates()
        print raw.encode("utf-8")
Exemplo n.º 2
0
    def expandArticle(self, article_text, title):
        template_expander = expander.Expander(article_text, pagename=title,
                wikidb=self, lang=self.lang,
                templateprefix=self.templateprefix,
                templateblacklist=self.templateblacklist)
        expanded_article = template_expander.expandTemplates()

        return expanded_article
Exemplo n.º 3
0
def test_undefined_variable():
    db = DictDB(Art="{{Pipe}}",
                Pipe="{{{undefined_variable}}}")

    te = expander.Expander(db.normalize_and_get_page(
        "Art", 0).rawtext, pagename="thispage", wikidb=db)
    res = te.expandTemplates()
    print "EXPANDED:", repr(res)
    assert u"{{{undefined_variable}}}" in res, "wrong expansion for undefined variable"
Exemplo n.º 4
0
def test_alfred():
    """I start to hate that Alfred_Gusenbauer"""
    db = DictDB(a="{{ibox2|birth_date=1960}}",
                ibox2="{{{birth{{#if:{{{birthdate|}}}||_}}date}}}")
    te = expander.Expander(db.normalize_and_get_page("a", 0).rawtext,
                           pagename="thispage",
                           wikidb=db)
    res = te.expandTemplates()
    print "EXPANDED:", repr(res)
    assert "1960" in res
Exemplo n.º 5
0
def test_preprocess_uniq_after_comment():
    s = u"""
<!--
these <ref> tags should be ignored: <ref>
-->

foo was missing<ref>bar</ref> <!-- some comment--> baz


<references />
"""
    e = expander.Expander(s, pagename="test", wikidb=DictDB())
    raw = e.expandTemplates()
    print repr(raw)
    assert u"foo was missing" in raw, "text is missing"
Exemplo n.º 6
0
def test_pipe_table():

    db = DictDB(Foo="""
bla
{{{ {{Pipe}}}
blubb
""",
                   Pipe="|")

    te = expander.Expander(db.normalize_and_get_page("Foo", 0).rawtext, pagename="thispage", wikidb=db)
    res = te.expandTemplates()

    print "EXPANDED:", repr(res)
    assert "bla" in res
    assert "blubb" in res
Exemplo n.º 7
0
def main():
    #import htmlwriter
    from mwlib.dummydb import DummyDB
    db = DummyDB()

    for x in sys.argv[1:]:
        input = unicode(open(x).read(), 'utf8')
        from mwlib import expander
        te = expander.Expander(input, pagename=x, wikidb=db)
        input = te.expandTemplates()

        tokens = tokenize(input, x)

        p = Parser(tokens, os.path.basename(x))
        r = p.parse()

        show(sys.stdout, r, 0)
Exemplo n.º 8
0
def extract_metadata(raw, fields, template_name="saved_book"):
    fields = list(fields)
    fields.append("")

    templ = "".join(u"%s%s\n{{{%s|}}}\n" % (uniq, f, f) for f in fields)
    db = expander.DictDB({template_name:templ})

    te = expander.Expander(raw, pagename="", wikidb=db)
    res = te.expandTemplates()

    d = defaultdict(unicode)
    for x in res.split(uniq)[1:-1]:
        name, val = x.split("\n", 1)
        val = val.strip()
        d[name] = val

    return d
Exemplo n.º 9
0
def test_switch_default():
    db = DictDB(
        Bonn="""{{Infobox
|Bundesland         = Nordrhein-Westfalen
}}
""",
        Infobox="""{{#switch: {{{Bundesland}}}
        | Bremen = [[Bremen (Land)|Bremen]]
        | #default = [[{{{Bundesland|Bayern}}}]]
}}
""")

    te = expander.Expander(db.normalize_and_get_page("Bonn", 0).rawtext, pagename="thispage", wikidb=db)
    res = te.expandTemplates()

    print "EXPANDED:", repr(res)
    assert "Nordrhein-Westfalen" in res
Exemplo n.º 10
0
def parseString(title=None, raw=None, wikidb=None, revision=None):
    """parse article with title from raw mediawiki text"""
    assert title is not None

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title, )
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title).parse()
    a.caption = title
    for x in postprocessors:
        x(a)
    return a
Exemplo n.º 11
0
def show():
    parser = optparse.OptionParser(
        usage="%prog [-e|--expand] --conf CONF ARTICLE [...]")
    parser.add_option("-c", "--conf", help="config file")
    parser.add_option("-e",
                      "--expand",
                      action="store_true",
                      help="expand templates")
    parser.add_option("-t",
                      "--template",
                      action="store_true",
                      help="show template")

    options, args = parser.parse_args()

    if not args:
        parser.error("missing ARTICLE argument")

    articles = [unicode(x, 'utf-8') for x in args]

    conf = options.conf
    if not conf:
        parser.error("missing --conf argument")

    from mwlib import wiki, expander

    db = wiki.makewiki(conf)['wiki']

    for a in articles:
        if options.template:
            raw = db.getTemplate(a)
        else:
            raw = db.getRawArticle(a)

        if raw:
            if options.expand:
                te = expander.Expander(raw, pagename=a, wikidb=db)
                raw = te.expandTemplates()

            print raw.encode("utf-8")
Exemplo n.º 12
0
def parseString(
    title=None,
    raw=None,
    wikidb=None,
    revision=None,
    lang=None,
    interwikimap=None,
):
    """parse article with title from raw mediawiki text"""

    assert title is not None, 'no title given'

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title, )
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
        if lang is None and hasattr(wikidb, 'getSource'):
            src = wikidb.getSource(title, revision=revision)
            if src:
                lang = src.get('language')
        if interwikimap is None and hasattr(wikidb, 'getInterwikiMap'):
            interwikimap = wikidb.getInterwikiMap(title, revision=revision)
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title, lang=lang,
                      interwikimap=interwikimap).parse()
    a.caption = title
    for x in postprocessors:
        x(a, title=title, revision=revision, wikidb=wikidb, lang=lang)

    return a
Exemplo n.º 13
0
def parseString(title=None,
                raw=None,
                wikidb=None,
                revision=None,
                lang=None,
                magicwords=None,
                expandTemplates=True):
    """parse article with title from raw mediawiki text"""

    uniquifier = None
    siteinfo = None
    assert title is not None, 'no title given'
    if raw is None:
        page = wikidb.normalize_and_get_page(title, 0)
        if page:
            raw = page.rawtext
        else:
            raw = None

        assert raw is not None, "cannot get article %r" % (title, )
    input = raw
    te = None
    if wikidb:
        if expandTemplates:
            te = expander.Expander(raw, pagename=title, wikidb=wikidb)
            input = te.expandTemplates(True)
            uniquifier = te.uniquifier
        if hasattr(wikidb, 'get_siteinfo'):
            siteinfo = wikidb.get_siteinfo()

        src = None
        if hasattr(wikidb, 'getSource'):
            src = wikidb.getSource(title, revision=revision)
            assert not isinstance(src, dict)

        if not src:
            src = metabook.source()

        if lang is None:
            lang = src.language
        if magicwords is None:
            if siteinfo is not None and 'magicwords' in siteinfo:
                magicwords = siteinfo['magicwords']
            else:
                magicwords = src.get('magicwords')

    if siteinfo is None:
        nshandler = nshandling.get_nshandler_for_lang(lang)
    else:
        nshandler = nshandling.nshandler(siteinfo)
    a = compat.parse_txt(input,
                         title=title,
                         wikidb=wikidb,
                         nshandler=nshandler,
                         lang=lang,
                         magicwords=magicwords,
                         uniquifier=uniquifier,
                         expander=te)

    a.caption = title
    if te and te.magic_displaytitle:
        a.caption = te.magic_displaytitle

    from mwlib.old_uparser import postprocessors
    for x in postprocessors:
        x(a, title=title, revision=revision, wikidb=wikidb, lang=lang)

    return a
Exemplo n.º 14
0
def test_resolve_magic_alias():
    db = DummyDB("nl")
    e = expander.Expander(u"{{#als: 1 | yes | no}}", wikidb=db)
    assert e.resolve_magic_alias(u"#als") == u"#if"
    assert e.resolve_magic_alias(u"#foobar") is None
Exemplo n.º 15
0
def test_localized_expr():
    db = DummyDB("nl")
    e = expander.Expander(u"{{#expressie: 1+2*3}}", wikidb=db)
    res = e.expandTemplates()
    assert res == "7"
Exemplo n.º 16
0
def test_localized_switch_default():
    db = DummyDB("nl")
    e = expander.Expander(u"{{#switch: 1 | #standaard=foobar}}", wikidb=db)
    res = e.expandTemplates()
    assert res == "foobar"
Exemplo n.º 17
0
def test_localized_expander():
    db = DummyDB("nl")
    e = expander.Expander(u"{{#als: 1 | yes | no}}", wikidb=db)
    res = e.expandTemplates()
    assert res == "yes"
Exemplo n.º 18
0
  | &#32;[[Digital object identifier|DOI]]:[http://dx.doi.org/{{{doi|{{{doilabel|}}}}}} {{{doi}}}].
}}{{#if: {{{accessdate|}}}
  | &#32;Retrieved on [[{{{accessdate}}}]]{{#if: {{{accessyear|}}} | , [[{{{accessyear}}}]] }}.
}}{{#if: {{{accessmonthday|}}}
  | &#32;Retrieved on {{{accessmonthday}}}{{#if: {{{accessyear|}}} | , {{{accessyear}}} }}.
}}{{#if: {{{accessdaymonth|}}}
  | &#32;Retrieved on {{{accessdaymonth}}}{{#if: {{{accessyear|}}} | &#32;{{{accessyear}}} }}.
}}{{#if: {{{quote|}}}
  | &nbsp;“{{{quote}}}”
}}</includeonly><noinclude>

{{pp-template|small=yes}}
{{Documentation}}
<!-- PLEASE ADD CATEGORIES AND INTERWIKIS TO THE /doc SUBPAGE, THANKS -->
</noinclude>
"""


import time
from mwlib import expander

snippet = """
{{citeweb|url=http://www.webbyawards.com/webbys/winners-2004.php|title=Webby Awards 2004|publisher=The International Academy of Digital Arts and Sciences|date=2004|accessdate=2007-06-19}}
"""

db = expander.DictDB(citeweb=citeweb)
e = expander.Expander(snippet * 1000, pagename='test', wikidb=db)
stime = time.time()
e.expandTemplates()
print time.time() - stime