Пример #1
0
def parseString(
    title=None,
    raw=None,
    wikidb=None,
    revision=None,
    lang=None,
    interwikimap=None,
):
    """parse article with title from raw mediawiki text"""
    
    assert title is not None, 'no title given'
    
    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title,)
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
        if lang is None and hasattr(wikidb, 'getSource'):
            src = wikidb.getSource(title, revision=revision)
            if src:
                lang = src.get('language')
        if interwikimap is None and hasattr(wikidb, 'getInterwikiMap'):
            interwikimap = wikidb.getInterwikiMap(title, revision=revision)
    else:
        input = raw
    
    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title, lang=lang, interwikimap=interwikimap).parse()
    a.caption = title
    for x in postprocessors:
        x(a, title=title, revision=revision, wikidb=wikidb, lang=lang)
    
    return a
Пример #2
0
def _parseAtomFromString(s):
    from mwlib import scanner
    tokens = scanner.tokenize(s)
    p=Parser(tokens)
    try:
        return p.parseAtom()
    except Exception, err:
        log.error("exception while parsing %r: %r" % (s, err))
        return None
Пример #3
0
def _parseAtomFromString(s, lang=None, interwikimap=None):
    from mwlib import scanner
    tokens = scanner.tokenize(s)
    p = Parser(tokens, lang=lang, interwikimap=interwikimap)
    try:
        return p.parseAtom()
    except Exception, err:
        log.error("exception while parsing %r: %r" % (s, err))
        return None
Пример #4
0
def _parseAtomFromString(s):
    from mwlib import scanner
    tokens = scanner.tokenize(s)
    p = Parser(tokens)
    try:
        return p.parseAtom()
    except Exception, err:
        log.error("exception while parsing %r: %r" % (s, err))
        return None
Пример #5
0
def _parseAtomFromString(s, lang=None, interwikimap=None):
    from mwlib import scanner
    tokens = scanner.tokenize(s)
    p=Parser(tokens, lang=lang, interwikimap=interwikimap)
    try:
        return p.parseAtom()
    except Exception, err:
        log.error("exception while parsing %r: %r" % (s, err))
        return None
def simpleparse(raw):    # !!! USE FOR DEBUGGING ONLY !!! does not use post processors
    import sys
    from mwlib import dummydb
    db = dummydb.DummyDB()
    
    tokens = scanner.tokenize(raw)
    r=parser.Parser(tokens, "unknown").parse()
    parser.show(sys.stdout, r, 0)
    return r
Пример #7
0
def simpleparse(
        raw):  # !!! USE FOR DEBUGGING ONLY !!! does not use post processors
    import sys
    from mwlib import dummydb
    db = dummydb.DummyDB()

    tokens = scanner.tokenize(raw)
    r = parser.Parser(tokens, "unknown").parse()
    parser.show(sys.stdout, r, 0)
    return r
Пример #8
0
    def write_wiki_html(self, htmlout, title, article_text):
        tokens = scanner.tokenize(article_text, title)

        wiki_parsed = parser.Parser(tokens, title).parse()
        wiki_parsed.caption = title

        imagedb = WPImageDB(self.base_path + "/images/")
        writer = WPHTMLWriter(self.wikidb.dataretriever, htmlout, images=imagedb, lang=self.lang)
        writer.write(wiki_parsed)
        self.links_cache[title] = writer.links_list
        return writer.math_processed
Пример #9
0
    def write_wiki_html(self, htmlout, title, article_text):
        tokens = scanner.tokenize(article_text, title)

        wiki_parsed = parser.Parser(tokens, title).parse()
        wiki_parsed.caption = title

        imagedb = WPImageDB(self.base_path + '/images/')
        writer = WPHTMLWriter(self.wikidb.dataretriever, htmlout,
                images=imagedb, lang=self.lang)
        writer.write(wiki_parsed)
        self.links_cache[title] = writer.links_list
        return writer.math_processed
Пример #10
0
    def write_wiki_html(self, htmlout, title, article_text):
        tokens = scanner.tokenize(article_text, title)

        wiki_parsed = parser.Parser(tokens, title).parse()
        wiki_parsed.caption = title

        imagedb = WPImageDB(self.base_path + '/images/')
        writer = WPHTMLWriter(self.index,
                              htmlout,
                              images=imagedb,
                              lang=self.lang)
        writer.write(wiki_parsed)
        return writer.math_processed
Пример #11
0
def main():
    #import htmlwriter
    from mwlib.dummydb import DummyDB
    db = DummyDB()

    for x in sys.argv[1:]:
        input = unicode(open(x).read(), 'utf8')
        from mwlib import expander
        te = expander.Expander(input, pagename=x, wikidb=db)
        input = te.expandTemplates()

        tokens = tokenize(input, x)

        p = Parser(tokens, os.path.basename(x))
        r = p.parse()

        show(sys.stdout, r, 0)
Пример #12
0
def main():
    #import htmlwriter
    from mwlib.dummydb import DummyDB
    db = DummyDB()
    
    for x in sys.argv[1:]:
        input = unicode(open(x).read(), 'utf8')
        from mwlib import expander
        te = expander.Expander(input, pagename=x, wikidb=db)
        input = te.expandTemplates()

        
        tokens = tokenize(input, x)
        
        p=Parser(tokens, os.path.basename(x))
        r = p.parse()

        show(sys.stdout, r, 0)
Пример #13
0
def parseString(title=None, raw=None, wikidb=None, revision=None):
    """parse article with title from raw mediawiki text"""
    assert title is not None 

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title,)
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title).parse()
    a.caption = title
    for x in postprocessors:
        x(a)
    return a
Пример #14
0
def _parse(txt):
    """parse text....and try to return a 'better' (some inner) node"""

    from mwlib import scanner, parser

    tokens = scanner.tokenize(txt)
    res = parser.Parser(tokens, "unknown").parse()

    # res is an parser.Article.
    if len(res.children) != 1:
        res.__class__ = parser.Node
        return res

    res = res.children[0]
    if res.__class__ == parser.Paragraph:
        res.__class__ = parser.Node

    if len(res.children) != 1:
        return res
    return res.children[0]
Пример #15
0
def _parse(txt):
    """parse text....and try to return a 'better' (some inner) node"""
    
    from mwlib import scanner, parser
    
    tokens = scanner.tokenize(txt)
    res=parser.Parser(tokens, "unknown").parse()

    # res is an parser.Article. 
    if len(res.children)!=1:
        res.__class__ = parser.Node
        return res

    res = res.children[0]
    if res.__class__==parser.Paragraph:
        res.__class__ = parser.Node
        
    if len(res.children)!=1:
        return res
    return res.children[0]
Пример #16
0
def parseString(title=None, raw=None, wikidb=None, revision=None):
    """parse article with title from raw mediawiki text"""
    assert title is not None

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title, )
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title).parse()
    a.caption = title
    for x in postprocessors:
        x(a)
    return a
Пример #17
0
def parseString(
    title=None,
    raw=None,
    wikidb=None,
    revision=None,
    lang=None,
    interwikimap=None,
):
    """parse article with title from raw mediawiki text"""

    assert title is not None, 'no title given'

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title, )
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
        if lang is None and hasattr(wikidb, 'getSource'):
            src = wikidb.getSource(title, revision=revision)
            if src:
                lang = src.get('language')
        if interwikimap is None and hasattr(wikidb, 'getInterwikiMap'):
            interwikimap = wikidb.getInterwikiMap(title, revision=revision)
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title, lang=lang,
                      interwikimap=interwikimap).parse()
    a.caption = title
    for x in postprocessors:
        x(a, title=title, revision=revision, wikidb=wikidb, lang=lang)

    return a