def handleArgs(): """ Picks the generator per argument passed in command line Usage: gen = pagegen.handleArgs() for page in gen: ...do something """ for arg in wiki.getArgs(): if arg.startswith('-catlinks'): if len(arg) == 9: pg = raw_input('Which page should be operated on? ') return catlinks(wiki.Page(pg)) else: #means that page has been supplied pg = arg[10:] return catlinks(wiki.Page(pg)) if arg.startswith('-cat'): if len(arg) == 4: cat = raw_input('Which category should be operated on? ') if not 'category' in cat.lower(): cat = 'Category:' + cat return category(wiki.Page(cat)) else: #means that category has been supplied cat = arg[5:] if not 'category' in cat.lower(): cat = 'Category:' + cat return category(wiki.Page(cat)) if arg.startswith('-file'): if len(arg) == 5: fil = raw_input('Which file should be read? ') else: #means that file has been supplied fil = arg[6:] try: file = open(fil, 'r') except IOError, err: print err sys.exit(1) text = file.read() list = re.findall('\[\[(.*?)\]\]', text) newlist = [] for i in list: newlist.append(wiki.Page(i)) return newlist if arg.startswith('-new'): if len(arg) == 4: return recentchanges() else: try: limit = int(arg[5:]) except: return recentchanges() return recentchanges(limit=limit)
def checktalk(): page = wiki.Page('User:%s/Stop' % (usernames[config.wiki])) try: wikitext = page.get() except: sys.exit() if wikitext.lower() != 'run': sys.exit()
def checktalk(): page = wiki.Page('User:Legobot III/Stop') try: text = page.get() if text.lower() != 'run': sys.exit('Run page disabled') except: sys.exit('Run page disabled')
def get_pages(): vapage = wiki.Page('Wikipedia:Vital articles') gen = pagegen.links(vapage, ns=0) #we only need mainspace articles text = state0 = vapage.get() num = 0 for page in gen: if page.namespace() == 0: text = do_page(page, text) wiki.showDiff(state0, text) vapage.put(text, 'Bot: Updating Vital Articles', bot=True)
def main(): gen = pagegen.category(wiki.Page('Category:Wikipedia pages with incorrect protection templates')) for page in gen: checktalk() print page.title() +' is being processed.' if page.namespace() in [2, 3, 10]: print 'Skipping userspace/template pages.' else: y=dopage(page) if y == 'Prot': logerror(page)
def docat(cat2): gen = pagegen.category(wiki.Page('Category:' + cat2)) for page in gen: if page.namespace() == 0: try: process_article(page) except UnicodeEncodeError: pass checktalk() else: print 'Skipping %s because it is not in the mainspace' % ( page.title()) print 'Done with Category:%s' % (cat2)
def whatlinkshere(page): """ [[Special:WhatLinksHere]] """ API = wiki.API(wiki=page.getSite()) params = { 'action': 'query', 'bltitle': page.title(), 'list': 'backlinks', 'bllimit': 'max', } res = API.query(params)['query']['backlinks'] for i in res: yield wiki.Page(i['title'])
def all(self, recurse=False): """ Returns all articles in the category """ print 'Getting %s...' % self.page.aslink() params = 'action=query&list=categorymembers&cmlimit=max&cmtitle=' + self.title res = self.API.query(params)['query']['categorymembers'] if not isinstance(recurse, bool) and recurse: recurse = recurse - 1 for i in res: yield wiki.Page(i['title'], wiki=self.page.getSite()) if recurse: for subcat in self.subcats(recurse): for mem in subcat.memvers(recurse): yield mem
def transclude(page): """ Returns pages that transclude a certain template """ API = wiki.API(wiki=page.getSite()) print 'Getting references to [[%s]]...' % (page.title()) params = { 'action': 'query', 'list': 'embeddedin', 'eititle': page.title(), 'eilimit': 'max', } res = API.query(params) for page in res['query']['embeddedin']: yield wiki.Page(page['title'])
def prefixindex(page): """ Returns list of pages with prefix of the page ([[Special:PrefixIndex]]) """ API = wiki.API(wiki=page.getSite()) ns = page.namespace() prefix = page.titlewonamespace() params = { 'action': 'query', 'list': 'allpages', 'apprefix': prefix, 'apnamespace': str(ns), 'aplimit': 'max', } res = API.query(params)['query']['allpages'] list = [] for page in res: yield wiki.Page(page['title'])
def updatelog(self, text): rightime = time.localtime(time.time()) year = str(rightime[0]) month = str(rightime[1]) day = str(rightime[2]) if len(month) == 1: month = u'0' + month try: self.log = logs[wiki.translate()] except KeyError: #no log on wiki return if not self.log: return target = self.log + '/' + year + '/' + month + '/' + day log = wiki.Page(target) if not log.exists(): text = '{|border="2" cellpadding="4" cellspacing="0" style="margin: 0.5em 0.5em 0.5em 1em; padding: 0.5em; background: #bfcda5; border: 1px #b6fd2c solid; border-collapse: collapse; font-size: 95%;"\n!User\n!Contribs\n' + text log.put(text, self.logupmess)
def recentchanges(limit=500, nobot=True, onlyanon=False, hidepatrolled=True, nponly=False, wiki=config.wiki): """ Returns a list of articles that were recently changed ([[Special:RecentChanges]]) If nponly = True, returns only newpages ([[Special:NewPages]]) """ rcshow = [] if nobot: rcshow.append('!bot') if onlyanon: rcshow.append('anon') # if hidepatrolled: # rcshow.append('!patrolled') rcshowparam = '' if len(rcshow) != 0: for i in rcshow: if i == rcshow[len(rcshow) - 1]: #meaning it is the last one.. rcshowparam += i else: rcshowparam += i + '|' params = { 'action': 'query', 'list': 'recentchanges', 'rcshow': rcshowparam, 'rcprop': 'title', 'rclimit': limit } if nponly: print 'Fetching the %s newest pages' % limit params['rctype'] = 'new' else: print 'Fetching the %s latest edits' % limit API = wiki.API(qcontinue=False, wiki=wiki) res = API.query(params)['query']['recentchanges'] for page in res: yield wiki.Page(page['title'])
def run(self): url = wiki.getWiki().getIndex() + '?title=Special:Log/newusers&limit=' + str(self.loadusers) text = wiki.getURL(url) rec = '\<a href="/w/index\.php\?title=User:(.*?)&action=edit&redlink=1"' list = re.findall(rec, text) userlist = [] for i in list: userlist.append(userlib.User(wiki.Page('User:'******'' for user in userlist: logpost += self.doUser(user) time.sleep(2) self.updatelog(logpost) if not self.quitafterrun: print 'Sleeping %s' %self.waittime time.sleep(self.waittime) bot = WelcomeBot() bot.run() else: sys.exit()
def subcats(self, recurse=False): print 'Getting %s...' % self.page.aslink() params = 'action=query&list=categorymembers&cmlimit=max&cmnamespace=14&cmtitle=' + self.title res = self.API.query(params)['query']['categorymembers'] if not isinstance(recurse, bool) and recurse: recurse = recurse - 1 if not hasattr(self, "_subcats"): self._subcats = [] for i in res: subcat = Category( wiki.Page(i['title'], wiki=self.page.getSite())) self._subcats.append(subcat) yield subcat if recurse: for i in subcat.subcats(recurse): yield i else: for subcat in self._subcats: yield subcat if recurse: for i in subcat.subcats(recurse): yield i
def main(): gen = pagegen.category( wiki.Page( 'Category:WikiProject Korea articles using the wg parameter')) for page in gen: do_page(page)
#!usr/bin/python # # (C) Legoktm, 2009 MIT License # __version__ = '$Id$' """ Tags pages for RFD Usage: python rfd.py """ import re import wiki o = wiki.Page('User:Arthur Rubin/Roman redirects') list = re.findall('\[\[:M(.*?)\]\]', o.get()) newlist = [] for i in list: newlist.append(wiki.Page('M' + i)) append = '{{rfd}}\n' summary = 'Tagging for [[WP:RFD]] per [[WP:BOTR#Roman_redirects|request]]' for b in newlist: oldtext = b.get(force=True) if 'rfd' in oldtext: print 'Skipping: ' + b.title() pass else: wiki.showDiff(oldtext, append + oldtext) b.put(append + oldtext, summary)
def main(): gen = pagegen.category(wiki.Page('Category:Wikipedia pages with broken references')) for page in gen: print page dopage(page) time.sleep(15)
#!usr/bin/python import wiki from wiki import pagegen, timedate import re, time, sys wiki.setUser('Legobot III') #get the list of protection templates... fulllist = [] gen = pagegen.category(wiki.Page('Category:Protection templates')) for page in gen: if page.isTemplate(): name = page.titlewonamespace() fulllist.append(name) moveprot = ['pp-move-dispute','pp-move-vandalism','pp-move-indef','pp-move'] def logerror(page): try: orig = open('Errors.txt', 'r') content = orig.read() orig.close() except: content = '' new = open('Errors.txt', 'w') new.write(content + '\n' + page.title()) print 'Logging an error on ' + page.title() new.close() def convertexpiry(ts): epochts = int(time.mktime(time.strptime(ts, '%Y-%m-%dT%H:%M:%SZ'))) st = time.gmtime(epochts) year = str(st.tm_year) monthname = timedate.monthname(st.tm_month)