def retrievePageExportXml(self, title): params = {'action':'query', 'titles':title,'export':'1'} request = APIRequest(self.wiki, params) result = request.query()['query'] if '-1' in result['pages'].keys(): return None # page does not exist yet xmlbytes = result['export']['*'].encode('utf-8') # convert to bytes return xml.etree.ElementTree.XML(xmlbytes)
def retrieveImageInfo(self, title): params = {'action':'query','prop':'imageinfo','iiprop':'timestamp|url|sha1|comment','titles':title} request = APIRequest(self.wiki, params) pages = request.query()['query']['pages'] page = pages[pages.keys()[0]] return page
problems.append(i) if char == pair[1]: try: problems.pop() except IndexError: return [i] return problems params = { 'action': 'query', 'list': 'allpages', 'apfilterredir': 'nonredirects', 'aplimit': '500', } titles = set() req = APIRequest(wiki, params) for result in req.queryGen(): for article in result['query']['allpages']: titles.add(article['title']) titles = list(titles) titles.sort() print 'Found', len(titles), 'pages' for title in titles: page = Page(wiki, title) page.getWikiText() text = page.getWikiText().lower() printed_link = False for pair in pairs: if text.count(pair[0]) != text.count(pair[1]): if not printed_link:
def retrieveCategoryMemberList(self, categoryname): params = self._buildCategoryMemberListQuery(categoryname) request = APIRequest(self.wiki, params) return request.query()['query']['categorymembers']
if char == pair[1]: try: problems.pop() except IndexError: return [i] return problems params = { 'action': 'query', 'list': 'allpages', 'apfilterredir': 'nonredirects', 'aplimit': '500', } titles = set() req = APIRequest(wiki, params) for result in req.queryGen(): for article in result['query']['allpages']: titles.add(article['title']) titles = list(titles) titles.sort() print 'Found', len(titles), 'pages' for title in titles: page = Page(wiki, title) page.getWikiText() text = page.getWikiText().lower() printed_link = False for pair in pairs: if text.count(pair[0]) != text.count(pair[1]): if not printed_link: