def getTemplate(self, title, followRedirects=True):
     ns, partial, full = namespace.splitname(title, defaultns=namespace.NS_TEMPLATE)
     try:
         return self.docs[(ns, partial)]
     except KeyError:
         pass
     return self.db.getTemplate(title, followRedirects=followRedirects)
 def getRawArticle(self, title, revision=None):
     ns, partial, full = namespace.splitname(title)
     try:
         return self.docs[(ns, partial)]
     except KeyError:
         pass
     return self.db.getRawArticle(title, revision=revision)
 def getRawArticle(self, name, revision=None):
     r = self.db.getRawArticle(name, revision=revision)
     if r is None:
         return None
     
     ns, partial, full = namespace.splitname(name)
     if ns==namespace.NS_TEMPLATE:
         self.templates[partial] = {
             'content-type': 'text/x-wiki',
             'content': r,
             }
         
         return r
 
             
     self.articles[name] = {
         'revision': revision,
         'content-type': 'text/x-wiki',
         'content': r,
         'url': self.db.getURL(name, revision=revision),
         'authors': self.db.getAuthors(name, revision=revision),
     }
     if hasattr(self.db, 'getSource'):
         src  = self.db.getSource(name, revision=revision)
         if src and 'url' in src:
             self.articles[name]['source-url'] = src['url']
             if src['url'] not in self.sources:
                 self.sources[src['url']] = src
     return r
 def getRawArticle(self, title, revision=None):
     ns, partial, full = namespace.splitname(title)
     if ns==namespace.NS_TEMPLATE:
         return self.getTemplate(partial)
     article = self._getArticle(title, revision=revision)
     if article:
         result = article['content']
         if isinstance(result, str): # fix bug in some simplejson version w/ Python 2.4
             return unicode(result, 'utf-8')
         return result
     return None
    def __init__(self, db, fn):
        self.fn = fn
        self.db = db

        self.docs = {}
        
        for block in unicode(open(fn, "rb").read(), 'utf-8').split(" "):
            if not block:
                continue
            title, txt = block.split("\n", 1)

            ns, partial, full = namespace.splitname(title)
            self.docs[(ns, partial)] = txt
 def getTemplate(self, name, followRedirects=True):
     ns, name, full = namespace.splitname(name, namespace.NS_TEMPLATE)
     if ns!=namespace.NS_TEMPLATE:
         return self.getRawArticle(full)
     
     
     try:
         result = self.templates[name]['content']
         if isinstance(result, str): # fix bug in some simplejson version w/ Python 2.4
             return unicode(result, 'utf-8')
         return result
     except KeyError:
         pass
     return None
    def getTemplate(self, name, followRedirects=True):
        """
        Note: *Not* following redirects is unsupported!
        """

        ns, name, full = namespace.splitname(name, namespace.NS_TEMPLATE)
        if ns!=namespace.NS_TEMPLATE:
            return self.getRawArticle(full)
        
        if name.replace('_', ' ').lower() in self.template_blacklist:
            log.info("ignoring blacklisted template:" , repr(name))
            return None
        
        try:
            return self.template_cache[name]
        except KeyError:
            pass
        
        titles = [u'Template:%s' % name]
        if self.print_template_pattern:
            titles.insert(0, u'Template:%s' % (self.print_template_pattern.replace(u'$1', name),))
        for title in titles:
            raw = self.getRawArticle(title)
            if raw is None:
                continue
            
            if self.template_exclusion_category:
                page = self.api_helper.page_query(
                    titles=title,
                    redirects=1,
                    prop='categories',
                )
                if page is None:
                    log.warn('Could not get categories for template %r' % title)
                    continue
                if 'categories' in page:
                    categories = [
                        c.get('title', '').split(':', 1)[-1]
                        for c in page['categories']
                    ]
                    if self.template_exclusion_category in categories:
                        log.info('Skipping excluded template %r' % title)
                        continue
            
            self.template_cache[name] = raw
            return raw
        
        log.warn('Could not fetch template %r' % name)
        self.template_cache[name] = None
        return None
 def getTemplate(self, name, followRedirects=False):
     ns, name, full = namespace.splitname(name, namespace.NS_TEMPLATE)
     if ns!=namespace.NS_TEMPLATE:
         return self.getRawArticle(full)
         
     try:
         return self.templates[name]['content']
     except KeyError:
         pass
     r = self.db.getTemplate(name, followRedirects=followRedirects)
     self.templates[name] = {
         'content-type': 'text/x-wiki',
         'content': r,
     }
     return r
 def getTemplatesForArticle(self, title, revision=None):
     """Return a dictionary with all templates used in article with given
     title and revision.
     """
     
     kwargs = {
         'generator': 'templates',
         'gtllimit': 500,
         'gtlnamespace': 10,
         'prop': 'revisions',
         'rvprop': 'content',
     }
     if revision is None:
         kwargs['titles'] = title
     else:
         kwargs['revids'] = revision
     result = self.api_helper.query(**kwargs)
     if not result:
         return None
     result = result['query']
     if 'pages' not in result:
         return None
     title2raw = {}
     for oldid, info in result['pages'].items():
         ns, name, full = namespace.splitname(info['title'], namespace.NS_TEMPLATE)
         if ns != namespace.NS_TEMPLATE:
             continue
         try:
             raw = info['revisions'][0]['*']
             if self.redirect_rex.search(raw):
                 raw = self.getTemplate(name)
             if raw:
                 d = {
                     'content': raw,
                     'content-type': 'text/x-wiki',
                 }
                 title2raw[name] = d
                 self.template_cache[name] = d
         except (KeyError, IndexError):
             continue
     return title2raw