def get_template_job(name):
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     try:
         recorddb.getTemplate(name)
     finally:
         self.template_count += 1
         if self.fetchtemplate_status:
             self.fetchtemplate_status(progress=self.template_count*100/self.num_templates)
Пример #2
0
    def addArticle(
        self,
        title,
        revision=None,
        wikidb=None,
        imagedb=None,
    ):
        """Add article with given title and revision to ZIP file. This will add
        all referenced templates and images, too.
        
        @param title: article title
        @type title: unicode
        
        @param revision: article revision (optional)
        @type revision: int
        
        @param wikidb: WikiDB to use
        
        @param imagedb: ImageDB to use (optional)
        """

        if title in self.articles:
            return
        self.articles[title] = {}

        self.status(article=title)

        recorddb = RecordDB(wikidb, self.articles, self.templates,
                            self.sources)
        raw = recorddb.getRawArticle(title, revision=revision)
        if raw is None:
            log.warn('Could not get article %r' % title)
            return
        mo = self.redirect_rex.search(raw)
        if mo:
            raw = recorddb.getRawArticle(mo.group('redirect'))
            if raw is None:
                log.warn('Could not get redirected article %r (from %r)' %
                         (mo.group('redirect'), title))
                return
        self.parseArticle(
            title,
            revision=revision,
            raw=raw,
            wikidb=wikidb,
            imagedb=imagedb,
        )
        self.article_count += 1
        if self.num_articles:
            self.status(progress=self.article_count * 100 // self.num_articles)
Пример #3
0
 def fetch_article_job(job_id):
     recorddb = RecordDB(wikidb, self.articles, self.templates,
                         self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
         return
     mo = self.redirect_rex.search(raw)
     if mo:
         raw = recorddb.getRawArticle(mo.group('redirect'))
         if raw is None:
             log.warn('Could not get redirected article %r (from %r)' %
                      (mo.group('redirect'), title))
             return
Пример #4
0
 def fetch_article_job(job_id):
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
         return
     mo = self.redirect_rex.search(raw)
     if mo:
         raw = recorddb.getRawArticle(mo.group('redirect'))
         if raw is None:
             log.warn('Could not get redirected article %r (from %r)' % (
                 mo.group('redirect'), title
             ))
             return
Пример #5
0
 def addArticle(self, title,
     revision=None,
     wikidb=None,
     imagedb=None,
 ):
     """Add article with given title and revision to ZIP file. This will add
     all referenced templates and images, too.
     
     @param title: article title
     @type title: unicode
     
     @param revision: article revision (optional)
     @type revision: int
     
     @param wikidb: WikiDB to use
     
     @param imagedb: ImageDB to use (optional)
     """
     
     if title in self.articles:
         return
     self.articles[title] = {}
     
     self.status(article=title)
     
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
         return
     mo = self.redirect_rex.search(raw)
     if mo:
         raw = recorddb.getRawArticle(mo.group('redirect'))
         if raw is None:
             log.warn('Could not get redirected article %r (from %r)' % (
                 mo.group('redirect'), title
             ))
             return
     self.parseArticle(title,
         revision=revision,
         raw=raw,
         wikidb=wikidb,
         imagedb=imagedb,
     )
     self.article_count += 1
     if self.num_articles:
         self.status(progress=self.article_count*100//self.num_articles)
 def fetch_article_job(job_id):
     if self.fetcharticle_status:
         self.fetcharticle_status(article=title)
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
     else:
         mo = self.redirect_rex.search(raw)
         if mo:
             raw = recorddb.getRawArticle(mo.group('redirect'))
             if raw is None:
                 log.warn('Could not get redirected article %r (from %r)' % (
                     mo.group('redirect'), title
                 ))
     self.article_count += 1
     if self.fetcharticle_status:
         self.fetcharticle_status(progress=self.article_count*100/self.num_articles)
Пример #7
0
    def parseArticle(
        self,
        title,
        revision=None,
        raw=None,
        wikidb=None,
        imagedb=None,
    ):
        """Parse article with given title, revision and raw wikitext, adding all
        referenced templates and images, but not adding the article itself.
        
        @param title: title of article
        @type title: unicode
        
        @param revision: revision of article (optional)
        @type revision: int
        
        @param raw: wikitext of article
        @type raw: unicode
        
        @param wikidb: WikiDB to use
        
        @param imagedb: ImageDB to use (optional)
        """

        recorddb = RecordDB(wikidb, self.articles, self.templates,
                            self.sources)
        parse_tree = uparser.parseString(
            title,
            revision=revision,
            raw=raw,
            wikidb=recorddb,
        )
        if imagedb is None:
            return
        for node in parse_tree.allchildren():
            if isinstance(node, parser.ImageLink):
                self.addImage(node.target, imagedb=imagedb, wikidb=wikidb)
            elif isinstance(node,
                            parser.TagNode) and node.caption == 'imagemap':
                imagemap = getattr(node, 'imagemap', None)
                if imagemap is not None:
                    imagelink = getattr(imagemap, 'imagelink', None)
                    if imagelink is not None:
                        self.addImage(imagelink.target,
                                      imagedb=imagedb,
                                      wikidb=wikidb)
Пример #8
0
 def get_template_job(name):
     recorddb = RecordDB(wikidb, self.articles, self.templates,
                         self.sources)
     recorddb.getTemplate(name)
Пример #9
0
 def get_template_job(name):
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     recorddb.getTemplate(name)