def get(): #action=query&list=allusers&augroup=sysop&aulimit=max&format=jsonfm params = dict(site=site, augroup='sysop') gen = api.ListGenerator('allusers', **params) for x in gen: yield x['name'].encode('utf-8') yield 'Legoktm'
def InterestingGeographsByNumber(**kwargs): site = kwargs['site'] # Fetch starting ID from a special page. startpage = pywikibot.Page(site, 'User:Geograph Update Bot/last ID') start = int(startpage.text) startsortkeyprefix = " %08d" % (start, ) n = 0 g0 = api.ListGenerator( "categorymembers", parameters=dict( cmtitle="Category:Images from Geograph Britain and Ireland", cmprop="title|sortkeyprefix", cmtype="file", cmstartsortkeyprefix=startsortkeyprefix), **kwargs) g1 = api.QueryGenerator(parameters=dict( generator="categorymembers", gcmtitle="Category:Images from Geograph Britain and Ireland", gcmtype="file", gcmstartsortkeyprefix=startsortkeyprefix, prop="imageinfo", iiprop="size"), **kwargs) for page in InterestingGeographGenerator(site, g0, g1): yield page n = n + 1 if (n % 50 == 0): # Write a checkpoint every fifty yielded items startpage.text = str(page.gridimage_id) startpage.save("Checkpoint: up to %d" % (page.gridimage_id, ))
def find_duplicates(): last_id = dup_id = -1 outfile = StringIO() site = pywikibot.Site() for item in api.ListGenerator( "categorymembers", site=site, cmtitle="Category:Images from Geograph Britain and Ireland", cmprop="title|sortkeyprefix", cmtype="file"): try: gridimage_id = int(item['sortkeyprefix']) print(gridimage_id, end="\r") if gridimage_id == last_id: if dup_id != last_id: print("* [https://www.geograph.org.uk/photo/%d %d]" % (gridimage_id, gridimage_id), file=outfile) print("** [[:%s]]" % (last_title, ), file=outfile) dup_id = last_id print("** [[:%s]]" % (item['title'], ), file=outfile, flush=True) last_id = gridimage_id last_title = item['title'] except Exception: pass reportpage = pywikibot.Page( site, "User:Geograph Update Bot/duplicate Geograph IDs/data") reportpage.text = ( "<!-- This page will be overwritten by Geograph Update Bot -->") reportpage.text += outfile.getvalue() reportpage.save("New list of duplicate IDs")
def fetch_whitelist(): #list=allusers&augroup=sysop|autopatrolled&format=jsonfm&aulimit=max params = {'site': site, 'augroup': 'sysop|autopatrolled|bot', } gen = api.ListGenerator('allusers', **params) for u in gen: yield u['name']
def setUp(self): """Set up test case.""" super().setUp() mysite = self.get_site() mysite._paraminfo['query+allpages'] = { 'prefix': 'ap', 'limit': {'max': 10}, 'namespace': {'multi': True} } self.gen = api.ListGenerator(listaction='allpages', site=mysite)
def fetch(user): params = {'rcuser':user, 'rclimit':'max', 'rcshow':'!patrolled', 'rctoken':'patrol', } gen = api.ListGenerator('recentchanges', site=site, **params) for change in gen: print change yield change['rcid'], change['patroltoken']
def pendingchangesGenerator(): site = pywikibot.Site() list_gen = api.ListGenerator(listaction="oldreviewedpages", site=site, orlimit=5, ornamespace=0) for entry in list_gen: page = pywikibot.Page(site, entry["title"]) yield page
def setUp(self): """Set up test case.""" super(TestDryListGenerator, self).setUp() mysite = self.get_site() mysite._paraminfo['query+allpages'] = { 'prefix': 'ap', 'limit': {'max': 10}, 'namespace': {'multi': True} } mysite._paraminfo.query_modules_with_limits = set(['allpages']) self.gen = api.ListGenerator(listaction="allpages", site=mysite)
def unreviewdpagesGenerator(): site = pywikibot.Site() list_gen = api.ListGenerator(listaction="unreviewedpages", site=site, urlimit=5, urnamespace=0, urfilterredir="nonredirects") for entry in list_gen: page = pywikibot.Page(site, entry["title"]) yield page
def find_rejected(): outfile = StringIO() site = pywikibot.Site() c = geodb.cursor() c.execute(""" SELECT MAX(gridimage_id) FROM gridimage_base ORDER BY gridimage_id desc limit 1""") row = c.fetchone() maxid = row[0] titles_by_id = {} for item in api.ListGenerator( "categorymembers", site=site, cmtitle="Category:Images from Geograph Britain and Ireland", cmprop="title|sortkeyprefix", cmtype="file"): try: gridimage_id = int(item['sortkeyprefix']) titles_by_id[gridimage_id] = item['title'] if gridimage_id > maxid: continue print(gridimage_id, end="\r") c = geodb.cursor() c.execute( """ SELECT gridimage_id FROM gridimage_base WHERE gridimage_id = ? """, (gridimage_id, )) if c.fetchone() == None: print("* [https://www.geograph.org.uk/photo/%d %d]: [[:%s]]" % (gridimage_id, gridimage_id, item['title']), file=outfile, flush=True) r = requests.head('https://www.geograph.org.uk/photo/%d' % (gridimage_id, ), allow_redirects=True) if r.status_code == 200: destid = int(urlsplit(r.url).path.rpartition('/')[2]) if titles_by_id[destid]: print("** → [%s %d]: [[:%s]]" % (r.url, destid, titles_by_id[destid]), file=outfile, flush=True) print("** → [%s %d]" % (r.url, destid), file=outfile, flush=True) except Exception: pass reportpage = pywikibot.Page( site, "User:Geograph Update Bot/images rejected from Geograph/data") reportpage.text = ( "<!-- This page will be overwritten by Geograph Update Bot -->") reportpage.text += outfile.getvalue() reportpage.save("New list of rejected IDs")
def InterestingGeographsByDate(**kwargs): site = kwargs['site'] g0 = api.ListGenerator( "categorymembers", parameters=dict( cmtitle="Category:Images from Geograph Britain and Ireland", cmprop="title|sortkeyprefix", cmtype="file", cmsort="timestamp", cmdir="older", ), **kwargs) g1 = api.QueryGenerator(parameters=dict( generator="categorymembers", gcmtitle="Category:Images from Geograph Britain and Ireland", gcmtype="file", gcmsort="timestamp", gcmdir="older", prop="imageinfo", iiprop="size"), **kwargs) yield from InterestingGeographGenerator(site, g0, g1)
def fetchWatchlist(self): days = 1 page = None for arg in self.args: if arg.startswith('--days'): try: days = int(arg[7:]) except ValueError: pass if arg.startswith('--page'): try: page = pywikibot.Page(self.site, arg[7:]) except: pass dayago = datetime.datetime.utcnow() - datetime.timedelta(days=days) dayago = dayago.strftime('%Y-%m-%dT%H:00:00Z') namespaces = [1, 3, 5, 7, 9, 11, 13, 15, 101, 109] if page: q = [page] else: q = api.ListGenerator(listaction='watchlist', wlstart=dayago) q.set_namespace(namespaces) queue = list() for item in q: if page: title = page.title() else: title = item['title'] if not (title in queue): queue.append(pywikibot.Page(self.site, title)) cur = self.conn.cursor() for page in queue: try: self.do_page(page, cur) except: pass
def test_namespace_resolve_failed(self): """Test ListGenerator set_namespace when resolve fails.""" self.gen = api.ListGenerator(listaction='allpages', site=self.site) self.assertTrue(self.gen.support_namespace()) with self.assertRaises(KeyError): self.gen.set_namespace(10000)
def test_namespace_multi(self): """Test ListGenerator set_namespace when multi.""" self.gen = api.ListGenerator(listaction='allpages', site=self.site) self.assertTrue(self.gen.support_namespace()) self.assertIsNone(self.gen.set_namespace([0, 1]))
def test_namespace_non_multi(self): """Test ListGenerator set_namespace when non multi.""" self.gen = api.ListGenerator(listaction='alllinks', site=self.site) with self.assertRaises(TypeError): self.gen.set_namespace([0, 1]) self.assertIsNone(self.gen.set_namespace(0))
def test_namespace_none(self): """Test ListGenerator set_namespace with None.""" self.gen = api.ListGenerator(listaction='alllinks', site=self.site) with self.assertRaises(TypeError): self.gen.set_namespace(None)
def test_namespace_param_is_not_settable(self): """Test ListGenerator support_namespace.""" self.gen = api.ListGenerator(listaction='querypage', site=self.site) self.assertFalse(self.gen.support_namespace()) self.assertFalse(self.gen.set_namespace([0, 1]))
def get_autoreviewedusers(self): users_gen = api.ListGenerator(listaction="allusers", site=pywikibot.Site(), aurights='autoreview|autopatrol') userlist = {ul['name']: ul for ul in users_gen} return userlist