def abort(self, status = u'Ok', msg = None): log = u'Bot aborted with status: '+status if msg: log += u' and message: '+msg self.log(log) wikipedia.stopme() exit(1)
def checktalk(): site = wikipedia.getSite() path = site.put_address('Non-existing_page') text = site.getUrl(path) if '<div class="usermessage">' in text: wikipedia.output(u'NOTE: You have unread messages on %s' % site) wikipedia.stopme() sys.exit()
def Import(self, target, project = 'w', crono = '1', namespace = '', prompt = True): """Import the page from the wiki. Requires administrator status. If prompt is True, asks the user if he wants to delete the page. """ # Fixing the crono value... if crono == True: crono = '1' elif crono == False: crono = '0' elif crono == '0': pass elif crono == '1': pass else: wikipedia.output(u'Crono value set wrongly.') wikipedia.stopme() # Fixing namespace's value. if namespace == '0': namespace == '' answer = 'y' if prompt: answer = wikipedia.inputChoice(u'Do you want to import %s?' % target, ['Yes', 'No'], ['y', 'N'], 'N') if answer in ['y', 'Y']: host = self.site().hostname() address = '/w/index.php?title=%s&action=submit' % self.urlname() # You need to be a sysop for the import. self.site().forceLogin(sysop = True) # Getting the token. token = self.site().getToken(self, sysop = True) # Defing the predata. predata = { 'action' : 'submit', 'source' : 'interwiki', # from what project do you want to import the page? 'interwiki' : project, # What is the page that you want to import? 'frompage' : target, # The entire history... or not? 'interwikiHistory' : crono, # What namespace do you want? 'namespace': '', } if self.site().hostname() in config.authenticate.keys(): predata['Content-type'] = 'application/x-www-form-urlencoded' predata['User-agent'] = useragent data = self.site().urlEncode(predata) response = urllib2.urlopen(urllib2.Request('http://' + self.site().hostname() + address, data)) data = u'' else: response, data = self.site().postForm(address, predata, sysop = True) if data: wikipedia.output(u'Page imported, checking...') if wikipedia.Page(site, target).exists(): wikipedia.output(u'Import success!') return True else: wikipedia.output(u'Import failed!') return False
def main(): summary_commandline,template,gen = None,None,None exceptions,PageTitles,namespaces = [],[],[] cat='' autoText,autoTitle = False,False genFactory = pagegenerators.GeneratorFactory() arg=False#------if you dont want to work with arguments leave it False if you want change it to True--- if arg==False: for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith( '-page:' ): if len(arg) == 6: PageTitles.append(wikipedia.input( u'Which page do you want to chage?' )) else: PageTitles.append(arg[6:]) elif arg.startswith( '-cat:' ): if len(arg) == 5: cat=wikipedia.input( u'Which Category do you want to chage?' ) else: cat='Category:'+arg[5:] elif arg.startswith( '-template:' ): if len(arg) == 10: template.append(wikipedia.input( u'Which Template do you want to chage?' )) else: template.append('Template:'+arg[10:]) elif arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg.startswith( '-namespace:' ): namespaces.append( int( arg[11:] ) ) elif arg.startswith( '-ns:' ): namespaces.append( int( arg[4:] ) ) elif arg.startswith( '-summary:' ): wikipedia.setAction( arg[9:] ) summary_commandline = True else: generator = genFactory.handleArg(arg) if generator: gen = generator else: PageTitles = [raw_input(u'Page:> ').decode('utf-8')] if cat!='': facatfalist=facatlist(cat) if facatfalist!=False: run(facatfalist) if PageTitles: pages = [wikipedia.Page(faSite,PageTitle) for PageTitle in PageTitles] gen = iter( pages ) if not gen: wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces ) preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 60 )#---number of pages that you want load at same time run(preloadingGen)
def main(): site = pywikibot.getSite('en', 'wikipedia') prefix = 'Uw-' ns = 10 for p in site.prefixindex(prefix, namespace=ns): print p.title() pywikibot.stopme()
def delTestPage(pagename): myuserpage = u"ഉപയോക്താവ്:" + 'Manubot' mypage = myuserpage + "/BotLabs/test" + pagename # doing the job site = wikipedia.getSite('ml','wikipedia') page = wikipedia.Page(site,mypage) page.delete(reason='Deleting Test pages', prompt=False, throttle=True, mark=True) wikipedia.stopme()
def shutoffcheck(self): return # Not implemented print u'Checking emergency shutoff page %s.' % self.shutoffpage.title(asLink=True) self.shutoffpagetext = self.shutoffpage.get() if unicode(self.shutoffpagetext.strip()) != u'enable': print u'Emergency shutoff enabled; stopping.' pywikibot.stopme() exit() else: print u'Emergency shutoff disabled; continuing.'
def udate2wiki(pagename=u'ഉപയോക്താവ്:Manubot/sandbox',towiki=True): global gpageData if towiki: site = wikipedia.getSite('ml','wikipedia') page = wikipedia.Page(site,pagename) page.put(gpageData,u'ബോട്ടിന്റെ കൂന്തി വിളയാട്ടം') wikipedia.stopme() else: f = codecs.open(pagename+u'.txt',encoding='utf-8', mode='w') f.write(gpageData) f.close()
def shutoffcheck(): site = wikipedia.getSite() pagename = "User:Hazard-Bot/Check/Wikiproject" page = wikipedia.Page(site, pagename) print "Checking [[" + pagename + "]]for emergency shutoff." text = page.get() if text.lower() != 'enable': print "Emergency shutoff enabled; stopping." wikipedia.stopme() exit() print "Emergency shutoff disabled; continuing."
def main(*args): global bot try: a = pywikibot.handleArgs(*args) if len(a) == 1: raise RuntimeError('Unrecognized argument "%s"' % a[0]) elif a: raise RuntimeError("Unrecognized arguments: " + " ".join(('"%s"' % arg) for arg in a)) bot = CategoryRedirectBot() bot.run() finally: pywikibot.stopme()
def main(): summary_commandline,gen,template = None,None,None namespaces,PageTitles,exceptions = [],[],[] encat='' autoText,autoTitle = False,False recentcat,newcat=False,False genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith( '-except:' ): exceptions.append( arg[8:] ) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = wikipedia.input( u'At which page do you want to start?') firstPageTitle = wikipedia.Page(fasite,firstPageTitle).title(withNamespace=False) gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, 0, includeredirects=True) elif arg.startswith( '-template:' ): template = arg[10:] elif arg.startswith( '-namespace:' ): namespaces.append( int( arg[11:] ) ) elif arg.startswith( '-summary:' ): wikipedia.setAction( arg[9:] ) summary_commandline = True else: generator = genFactory.handleArg( arg ) if generator: gen = generator if not gen: wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60) preloadingGen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces ) else: preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60) _cache,last_timestamp=get_cache() add_text(preloadingGen) now = str(datetime.now()) todaynum=int(now.split('-')[2].split(' ')[0])+int(now.split('-')[1])*30+(int(now.split('-')[0])-2000)*365 if last_timestamp+3 < todaynum: put_cache(_cache,todaynum) else: put_cache({},0)
def getInfobox(film): info_box_data = [] if type(film).__name__ == 'str' or type(film).__name__ == 'unicode': site = wikipedia.getSite('en','wikipedia') # Taking the default site page = wikipedia.Page(site, film) # Calling the constructor if page.isRedirectPage(): page = page.getRedirectTarget() else: page = film page_data = page.get() #print page_data page_data = page_data.split(u'\n') info_box = 0 #remove the | r = re.compile(r'^\s*\|\s*',re.UNICODE) info_re = re.compile(r'\s*{{\s*Infobox\s*film\s*',re.IGNORECASE|re.UNICODE) #remove spaces r1 = re.compile(r'\s*=\s*',re.UNICODE) #remove comments #r2 = re.compile(r'<!--.*-->') #Get the info box data for line in page_data: if len(line) == 0: continue if info_re.search(line) and info_box == 0: print 'Found infobox' info_box = 1 elif line == u'}}' or line == u'|}}' and info_box ==1: info_box = 0 break elif info_box == 1: #remove unnecessary data line = r.sub('',line) #line = r2.sub('',line) line = r1.sub('=',line) print line info_box_data.append(line) else: pass #update in dictionary for i in info_box_data: info_box_dict[i.split(u'=',1)[0].strip()] = i.split(u'=',1)[1].strip() #print info_box_data #print info_box_dict wikipedia.stopme()
def post(unlock = True): """ This function removes throttle file. It also removes lockfile unless unlock variable is set to False """ if unlock and lockfile: try: os.remove(lockfile) except OSError: error(u"Unable to remove lockfile.") pywikibot.output(u"The script " + fullname + u". Stop at " + getTime()) pywikibot.stopme() sys.exit()
def udate2wiki(pagename=u'',towiki=True): global data if towiki: myuserpage = u"ഉപയോക്താവ്:" + 'Manubot' mypage = myuserpage + "/BotLabs/" + (pagename) # doing the job site = wikipedia.getSite('ml','wikipedia') page = wikipedia.Page(site,mypage) page.put(data,u'ബോട്ടിന്റെ കൂന്തി വിളയാട്ടം') wikipedia.stopme() else: f = codecs.open(pagename+u'.txt',encoding='utf-8', mode='w') f.write(data) f.close()
def main(*args): try: genFactory = GeneratorFactory() for arg in pywikibot.handleArgs(): if not genFactory.handleArg(arg): pywikibot.showHelp('pagegenerators') break else: gen = genFactory.getCombinedGenerator() if gen: i = 0 for page in gen: i+=1 pywikibot.output("%4d: %s" % (i, page.title()), toStdout = True) else: pywikibot.showHelp('pagegenerators') finally: pywikibot.stopme()
def main(*args): try: list_page = pywikibot.Page(pywikibot.getSite(), 'User:Petrb/Weird pages' ) page_text = list_page.get() lines = page_text.split('\n') list_elems = lines[1:-2] summary = u"[[User:HasteurBot|HasteurBot Task 5]]: Adding maint " +\ u"category to identified page" page_match = re.compile('\{\{AFC submission\|') page_match2 = re.compile('\{\{AfC submission\|') cat_match = re.compile('\[\[Category\:AfC submissions with missing AfC template\]\]') limiter = 50 - 14 for elem in list_elems: if excluded_page(elem): continue ind_page = pywikibot.Page(pywikibot.getSite(), u'Wikipedia talk:'+elem ) if not ind_page.exists(): continue if ind_page.isRedirectPage(): ind_page = ind_page.getRedirectTarget() if ind_page.namespace() != 5: continue page_text = ind_page.get() if page_match.match(page_text) is None \ and \ page_match2.match(page_text) is None \ and \ 'AfC_submissions_with_missing_AfC_template' not in page_text \ and \ 'AfC submissions with missing AfC template' not in page_text: limiter = limiter - 1 print elem imp_text = page_text + \ '\n[[Category:AfC submissions with missing AfC template]]' ind_page.put(imp_text, comment=summary) if limiter == 0: break finally: pywikibot.stopme()
def main(): gen = None PageTitles = [] # Read commandline parameters. for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input(u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) PageTitles if __name__ == "__main__": try: main() finally: wikipedia.stopme()
def log(self, msg = u''): self.setLogHeader() text = None try: text = self.logPage.get(force=True) except wikipedia.NoPage: print "ERROR: No log page found." wikipedia.stopme() exit(1) loc = text.find(self.header) if loc == -1: print "ERROR: No header found on log page." wikipedia.stopme() exit(1) loc += len(self.header)+1 log = '\n'+strftime(u'%H:%M:%S - ')+msg+u'<br />' text = text[0:loc] + log + text[loc:] print "logging: "+log[1:len(log)] self.logPage.put(text)
def main(): global CD output(u'Running ' + __version__) CD = CommonsDelinker() output(u'This bot runs from: ' + str(CD.site)) re._MAXCACHE = 4 args = wikipedia.handleArgs() if '-since' in args: # NOTE: Untested ts_format = '%Y-%m-%d %H:%M:%S' try: since = time.strptime( args[args.index('-since') + 1], ts_format) except ValueError: if args[args.index('-since') + 1][0] == '[' and \ len(args) != args.index('-since') + 2: since = time.strptime('%s %s' % \ args[args.index('-since') + 1], '[%s]' % ts_format) else: raise ValueError('Incorrect time format!') output(u'Reading deletion log since [%s]' %\ time.strftime(ts_format, since)) CD.last_check = time.mktime(since) try: try: CD.start() except Exception, e: if type(e) not in (SystemExit, KeyboardInterrupt): output('An exception occured in the main thread!', False) traceback.print_exc(file = sys.stderr) threadpool.terminate() finally: output(u'Stopping CommonsDelinker') wikipedia.stopme() # Flush the standard streams sys.stdout.flush() sys.stderr.flush()
def main(): gen = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-newtem'): arg=arg.replace(':','') if len(arg) == 7: genfa = pagegenerators.NewpagesPageGenerator(100, False, None,10) else: genfa = pagegenerators.NewpagesPageGenerator(int(arg[8:]), False, None,10) gen = pagegenerators.PreloadingGenerator( genfa,60) else: gen = genFactory.handleArg( arg ) if not gen: wikipedia.stopme() sys.exit() preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60) #preloadingGen = pagegenerators.NamespaceFilterPageGenerator(gen,10) add_nav(preloadingGen)
def main(): global R import sys, traceback wikipedia.handleArgs() output(u'Running ' + __version__) try: try: # FIXME: Add support for single-process replacer. R = Replacer() output(u'This bot runs from: ' + str(R.site)) R.start() except (SystemExit, KeyboardInterrupt): raise except Exception, e: output('A critical error has occured! Aborting!') traceback.print_exc(file = sys.stderr) finally: output('Exitting replacer') wikipedia.stopme()
def main(): gen=None #what we will add to the begining of the title pre="" #what we will add to the end of the title post="" #what we will remove from the title rem=[] # Read commandline parameters. for arg in wikipedia.handleArgs(): if arg.startswith('-append'): if len(arg) >= 8: post = arg[8:] else: post = wikipedia.input(u'Please enter the text to append:') elif arg.startswith('-file'): if len(arg) >= 6: textfilename = arg[6:] gen = pagegenerators.TextfilePageGenerator(textfilename) elif arg.startswith('-prefix'): if len(arg) >= 8: pre = arg[8:] else: pre = wikipedia.input(u'Please enter the text to prefix with:') elif arg.startswith('-remove'): if len(arg) >= 8: rem.append(arg[8:]) else: rem.append(wikipedia.input(u'Please enter the text to remove:')) if not gen: # syntax error, show help text from the top of this file wikipedia.output(__doc__, 'utf-8') wikipedia.stopme() sys.exit() preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20) bot = RenameRobot(preloadingGen, pre, post, rem) bot.run()
def main(*args): try: genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(*args): if not genFactory.handleArg(arg): pywikibot.showHelp() break else: gen = genFactory.getCombinedGenerator() if gen: page_match = re.compile('\{\{AFC submission\|') summary = u"[[User:HasteurBot|HasteurBot Task 3]]: Removing " + \ u"maint category that does not apply" disclude_list = [ u'Wikipedia talk:WikiProject Articles for creation', u'Wikipedia talk:WikiProject Articles for creation/2013 5', u'Wikipedia talk:WikiProject Articles for creation/2011', ] for article in gen: if article.title() in disclude_list: continue art_text = article.get() if page_match.match(art_text) is not None: print article art_1 = re.sub( '\\\n\[\[\:Category\:AfC_submissions_with_missing_AfC_template\]\]', '', art_text ) art_2 = re.sub( '\\\n\[\[\:Category\:AfC submissions with missing AfC template\]\]', '', art_1 ) article.put(art_2,comment=summary) else: pywikibot.showHelp() finally: pywikibot.stopme()
def pre(name, lock = False): """ Return argument list, site object, and configuration of the script. This function also handles default arguments, generates lockfile and halt the script if lockfile exists before. """ global fullname, lockfile pywikibot.handleArgs("-log") fullname = name pywikibot.output(u"The script " + fullname + u". Start at " + getTime()) if lock: lockfile = os.path.abspath("../../tmp/" + basescript + ".py") if os.path.exists(lockfile): error(u"Lockfile found. Unable to execute the script.") pywikibot.stopme() sys.exit() open(lockfile, 'w').close() confpath = os.path.abspath("../../conf/" + basescript + ".py") if os.path.exists(confpath): module = imp.load_source("conf", confpath) else: module = None return pywikibot.handleArgs(), pywikibot.getSite(), module
namespaces.append(arg[len('-namespace:'):]) else: argsList.append(arg) if operation == None: pywikibot.showHelp('templatecount') else: robot = TemplateCountRobot() if not argsList: argsList = templates choice = '' if 'reflist' in argsList: pywikibot.output( u'NOTE: it will take a long time to count "reflist".') choice = pywikibot.inputChoice( u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y') if choice == 's': argsList.remove('reflist') if choice == 'n': return elif operation == "Count": robot.countTemplates(argsList, namespaces) elif operation == "List": robot.listTemplates(argsList, namespaces) if __name__ == "__main__": try: main() finally: pywikibot.stopme()
wikipedia.output(u'') wikipedia.output(u'(For checkboxes: An empty string evaluates to False; all others to True)') wikipedia.output(u'') while True: keys, values = [], [] while True: try: keys.append(wikipedia.input(u'Which preference do you wish to set?')) except KeyboardInterrupt: return values.append(wikipedia.input(u"To what value do you wish to set '%s'?" % keys[-1])) if wikipedia.inputChoice(u"Set more preferences?", ['no', 'yes'], ['n', 'y'], 'n') == 'n': break if wikipedia.inputChoice(u"Set %s?" % u', '.join((u'%s:%s' % (key, value) for key, value in zip(keys, values))), ['yes', 'no'], ['y', 'n'], 'n') == 'y': set_all(keys, values, verbose = True) wikipedia.output(u"Preferences have been set on all wikis.") if __name__ == '__main__': import sys sys.path.insert(1, '..') import wikipedia try: wikipedia.handleArgs() main() finally: wikipedia.stopme()
# -*- coding: utf-8 -*- """Splits a interwiki.log file into chunks of warnings separated by language""" # # (C) Rob W.W. Hooft, 2003 # (C) Pywikipedia bot team, 2004-2011 # # Distributed under the terms of the MIT license. # __version__ = '$Id: splitwarning.py 9482 2011-08-29 16:32:37Z xqt $' # import wikipedia as pywikibot import codecs import re pywikibot.stopme( ) # No need to have me on the stack - I don't contact the wiki files = {} count = {} # TODO: Variable log filename fn = pywikibot.config.datafilepath("logs", "interwiki.log") logFile = codecs.open(fn, 'r', 'utf-8') rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*') for line in logFile: m = rWarning.match(line) if m: family = m.group('family') code = m.group('code') if code in pywikibot.getSite().languages(): if not code in files: files[code] = codecs.open(
# -*- coding: utf-8 -*- """Splits a interwiki.log file into chunks of warnings separated by language""" # # (C) Rob W.W. Hooft, 2003 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # import wikipedia import codecs import re wikipedia.stopme( ) # No need to have me on the stack - I don't contact the wiki files = {} count = {} # TODO: Variable log filename fn = wikipedia.config.datafilepath("logs", "interwiki.log") logFile = codecs.open(fn, 'r', 'utf-8') rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*') for line in logFile: m = rWarning.match(line) if m: family = m.group('family') code = m.group('code') if code in wikipedia.getSite().languages(): if not code in files: files[code] = codecs.open(
def __init__(self): for arg in pywikibot.handleArgs(): if arg.startswith('-from'): if arg.startswith('-from:'): self.apfrom = arg[6:] else: self.apfrom = pywikibot.input( u'Which page to start from: ') elif arg.startswith('-reqsize:'): self.aplimit = int(arg[9:]) elif arg == '-links': self.links = True elif arg == '-linksonly': self.links = True self.titles = False elif arg == '-replace': self.replace = True elif arg == '-redir': self.filterredir = 'all' elif arg == '-redironly': self.filterredir = 'redirects' elif arg.startswith('-limit:'): self.stopAfter = int(arg[7:]) elif arg == '-autonomous' or arg == '-a': self.autonomous = True elif arg.startswith('-ns:'): self.namespaces.append(int(arg[4:])) elif arg.startswith('-wikilog:'): self.wikilogfile = arg[9:] elif arg.startswith('-failedlog:'): self.failedTitles = arg[11:] elif arg == '-failed': self.doFailed = True else: pywikibot.output(u'Unknown argument %s.' % arg) pywikibot.showHelp() pywikibot.stopme() sys.exit() if self.namespaces == [] and not self.doFailed: if self.apfrom == u'': # 0 should be after templates ns self.namespaces = [14, 10, 12, 0] else: self.namespaces = [0] if self.aplimit is None: self.aplimit = 200 if self.links else 'max' if not self.doFailed: self.queryParams = { 'action': 'query', 'generator': 'allpages', 'gaplimit': self.aplimit, 'gapfilterredir': self.filterredir } else: self.queryParams = {'action': 'query'} if self.apfrom != u'': pywikibot.output(u'Argument "-from" is ignored with "-failed"') propParam = 'info' if self.links: propParam += '|links|categories' self.queryParams['pllimit'] = 'max' self.queryParams['cllimit'] = 'max' self.queryParams['prop'] = propParam self.site = pywikibot.getSite() if len(self.localSuspects) != len(self.latinSuspects): raise ValueError(u'Suspects must be the same size') if len(self.localKeyboard) != len(self.latinKeyboard): raise ValueError(u'Keyboard info must be the same size') if not os.path.isabs(self.wikilogfile): self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) self.wikilog = self.OpenLogFile(self.wikilogfile) if not os.path.isabs(self.failedTitles): self.failedTitles = pywikibot.config.datafilepath( self.failedTitles) if self.doFailed: with codecs.open(self.failedTitles, 'r', 'utf-8') as f: self.titleList = [self.Page(t) for t in f] self.failedTitles += '.failed' self.lclToLatDict = dict([(ord(self.localSuspects[i]), self.latinSuspects[i]) for i in xrange(len(self.localSuspects))]) self.latToLclDict = dict([(ord(self.latinSuspects[i]), self.localSuspects[i]) for i in xrange(len(self.localSuspects))]) if self.localKeyboard is not None: self.lclToLatKeybDict = dict([ (ord(self.localKeyboard[i]), self.latinKeyboard[i]) for i in xrange(len(self.localKeyboard)) ]) self.latToLclKeybDict = dict([ (ord(self.latinKeyboard[i]), self.localKeyboard[i]) for i in xrange(len(self.localKeyboard)) ]) else: self.lclToLatKeybDict = {} self.latToLclKeybDict = {} badPtrnStr = u'([%s][%s]|[%s][%s])' \ % (self.latLtr, self.localLtr, self.localLtr, self.latLtr) self.badWordPtrn = re.compile(u'[%s%s]*%s[%s%s]*' % (self.latLtr, self.localLtr, badPtrnStr, self.latLtr, self.localLtr)) # Get whitelist self.knownWords = set() self.seenUnresolvedLinks = set() # TODO: handle "continue" if self.site.lang in self.whitelists: wlpage = self.whitelists[self.site.lang] pywikibot.output(u'Loading whitelist from %s' % wlpage) wlparams = { 'action': 'query', 'prop': 'links', 'titles': wlpage, 'redirects': '', 'indexpageids': '', 'pllimit': 'max', } data = query.GetData(wlparams) if len(data['query']['pageids']) == 1: pageid = data['query']['pageids'][0] links = data['query']['pages'][pageid]['links'] allWords = [ nn for n in links for nn in self.FindBadWords(n['title']) ] self.knownWords = set(allWords) # kw = set() # for w in allWords: # if len(self.ProcessTitle(w)[1]) > 0: # kw.add(w) # self.knownWords = kw else: raise ValueError(u'The number of pageids is not 1') pywikibot.output(u'Loaded whitelist with %i items' % len(self.knownWords)) if pywikibot.verbose and len(self.knownWords) > 0: pywikibot.output(u'Whitelist: %s' % u', '.join( [self.MakeLink(i, False) for i in self.knownWords])) else: pywikibot.output(u'Whitelist is not known for language %s' % self.site.lang)
def doarticle(page): key = "4ZK9WURM" try: wikitext = page.get() except wikipedia.IsRedirectPage: print 'Skipping %s as it is a redirect' %str(page) return state0 = wikitext import urllib m=re.findall(r'\<ref\>ISBN(:| )(.*)\</ref\>', wikitext) if len(m) == 0: print 'No changes needed in %s' %str(page) return else: print 'Checking %s' %str(page) print m try: m=string.join(m,'') m=m[1:] except TypeError: m=m[0][1] isbn = m f = urllib.urlopen("http://isbndb.com/api/books.xml?access_key="+key+"&results=authors&index1=isbn&value1=" + m) s = f.read() f.close() print s realisbn = re.findall(r'<BookData (.*) isbn="(.*)" (.*)">', s) try: realisbn=string.join(realisbn,'') realisbn=realisbn[1:] except TypeError: realisbn=realisbn[0][1] print 'The official ISBN is: ' +realisbn title=re.findall(r'\<Title\>(.*)\</Title\>', s) try: title=string.join(title,'') except TypeError: title=title[0][1] print 'Title is: ' + title auth=re.findall(r'\<Person person_id="(.*)"\>(.*)\</Person\>', s) print auth if len(auth) == 1: auth=auth[0][1] elif len(auth) == 2: auth=auth[0][0] auth2=auth[0][1] elif len(auth) >= 3: print 'Too many authors... skipping' return authsplit = re.split(',', auth) last = authsplit[0] first = authsplit[1] pubinfo=re.findall(r'\<PublisherText publisher_id="(.*)"\>(.*)</PublisherText\>', s) print pubinfo[0][0] a = urllib.urlopen("http://isbndb.com/api/publishers.xml?access_key="+key+"&results=details&index1=publisher_id&value1=" + pubinfo[0][0]) a1=a.read() a.close() a=a1 print a pub=re.findall(r'\<Name\>(.*)\</Name\>', a) print pub[0] loc=re.findall(r'\<Details location="(.*)\((.*)\)" /\>', a) print loc[0][0] temp = u'<ref name="'+title+'">{{cite book |title= '+title+'|last= '+last+'|first= '+first+'|isbn= '+realisbn+'|publisher='+pub[0]+'|location='+loc[0][0]+'}} <!--ISBN Converted by Bot --></ref>' wikitext = re.sub('\<ref\>ISBN(:| )(.*)\</ref\>', temp, wikitext) wikipedia.showDiff(state0, wikitext) try: prompt = raw_input('Would you like to accept the changes? [y][n][q] ') except KeyboardInterrupt: print '\n' wikipedia.stopme() sys.exit() if prompt == 'y': page.put(wikitext, 'Fixing raw ISBN') done = True elif prompt == 'n': return False elif prompt == 'q': wikipedia.stopme() sys.exit() else: print 'Error: Invalid choice, skipping %s' %(str(page)) return done
def __init__(self): for arg in pywikibot.handleArgs(): if arg.startswith('-from'): if arg.startswith('-from:'): self.apfrom = arg[6:] else: self.apfrom = pywikibot.input(u'Which page to start from: ') elif arg.startswith('-reqsize:'): self.aplimit = int(arg[9:]) elif arg == '-links': self.links = True elif arg == '-linksonly': self.links = True self.titles = False elif arg == '-replace': self.replace = True elif arg == '-redir': self.filterredir = 'all' elif arg == '-redironly': self.filterredir = 'redirects' elif arg.startswith('-limit:'): self.stopAfter = int(arg[7:]) elif arg == '-autonomous' or arg == '-a': self.autonomous = True elif arg.startswith('-ns:'): self.namespaces.append( int(arg[4:]) ) elif arg.startswith('-wikilog:'): self.wikilogfile = arg[9:] elif arg.startswith('-failedlog:'): self.failedTitles = arg[11:] elif arg == '-failed': self.doFailed = True else: pywikibot.output(u'Unknown argument %s.' % arg) pywikibot.showHelp() pywikibot.stopme() sys.exit() if self.namespaces == [] and not self.doFailed: if self.apfrom == u'': # 0 should be after templates ns self.namespaces = [14, 10, 12, 0] else: self.namespaces = [0] if self.aplimit is None: self.aplimit = 200 if self.links else 'max' if not self.doFailed: self.queryParams = { 'action' : 'query', 'generator' : 'allpages', 'gaplimit' : self.aplimit, 'gapfilterredir': self.filterredir} else: self.queryParams = { 'action' : 'query' } if self.apfrom != u'': pywikibot.output(u'Argument "-from" is ignored with "-failed"') propParam = 'info' if self.links: propParam += '|links|categories' self.queryParams['pllimit'] = 'max' self.queryParams['cllimit'] = 'max' self.queryParams['prop'] = propParam self.site = pywikibot.getSite() if len(self.localSuspects) != len(self.latinSuspects): raise ValueError(u'Suspects must be the same size') if len(self.localKeyboard) != len(self.latinKeyboard): raise ValueError(u'Keyboard info must be the same size') if not os.path.isabs(self.wikilogfile): self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) self.wikilog = self.OpenLogFile(self.wikilogfile) if not os.path.isabs(self.failedTitles): self.failedTitles = pywikibot.config.datafilepath(self.failedTitles) if self.doFailed: with codecs.open(self.failedTitles, 'r', 'utf-8') as f: self.titleList = [self.Page(t) for t in f] self.failedTitles += '.failed' self.lclToLatDict = dict([(ord(self.localSuspects[i]), self.latinSuspects[i]) for i in xrange(len(self.localSuspects))]) self.latToLclDict = dict([(ord(self.latinSuspects[i]), self.localSuspects[i]) for i in xrange(len(self.localSuspects))]) if self.localKeyboard is not None: self.lclToLatKeybDict = dict( [(ord(self.localKeyboard[i]), self.latinKeyboard[i]) for i in xrange(len(self.localKeyboard))]) self.latToLclKeybDict = dict( [(ord(self.latinKeyboard[i]), self.localKeyboard[i]) for i in xrange(len(self.localKeyboard))]) else: self.lclToLatKeybDict = {} self.latToLclKeybDict = {} badPtrnStr = u'([%s][%s]|[%s][%s])' \ % (self.latLtr, self.localLtr, self.localLtr, self.latLtr) self.badWordPtrn = re.compile(u'[%s%s]*%s[%s%s]*' % (self.latLtr, self.localLtr, badPtrnStr, self.latLtr, self.localLtr)) # Get whitelist self.knownWords = set() self.seenUnresolvedLinks = set() # TODO: handle "continue" if self.site.lang in self.whitelists: wlpage = self.whitelists[self.site.lang] pywikibot.output(u'Loading whitelist from %s' % wlpage) wlparams = { 'action' : 'query', 'prop' : 'links', 'titles' : wlpage, 'redirects' : '', 'indexpageids' : '', 'pllimit' : 'max', } data = query.GetData(wlparams) if len(data['query']['pageids']) == 1: pageid = data['query']['pageids'][0] links = data['query']['pages'][pageid]['links'] allWords = [nn for n in links for nn in self.FindBadWords(n['title'])] self.knownWords = set(allWords) # kw = set() # for w in allWords: # if len(self.ProcessTitle(w)[1]) > 0: # kw.add(w) # self.knownWords = kw else: raise ValueError(u'The number of pageids is not 1') pywikibot.output(u'Loaded whitelist with %i items' % len(self.knownWords)) if pywikibot.verbose and len(self.knownWords) > 0: pywikibot.output(u'Whitelist: %s' % u', '.join([self.MakeLink(i, False) for i in self.knownWords])) else: pywikibot.output(u'Whitelist is not known for language %s' % self.site.lang)
for entry in sqldump.query_find(keyword): yield entry elif action == 'findr': keyword = wikipedia.input(u'Search for:') for entry in sqldump.query_findr(keyword): yield entry elif action == 'unmountedcats': for entry in sqldump.query_unmountedcats(): yield entry elif action == 'baddisambiguation': for entry in sqldump.entries(): if entry.namespace == 0 and entry.title.endswith(')') and entry.text.startswith("''") and not entry.text.startswith("'''"): yield entry if __name__=="__main__": wikipedia.stopme() # No need to have me on the stack, as I'm not contacting the wiki import sys action = None filename = None for arg in sys.argv[1:]: arg = wikipedia.argHandler(arg, 'sqldump') if arg: if arg.startswith('-sql'): if len(arg) == 4: filename = wikipedia.input(u'Please enter the SQL dump\'s filename: ') else: filename = arg[5:] else: action = arg if not filename or not action: wikipedia.output(__doc__, 'utf-8')
'--dry-run', action='store_true', dest='dry', default=False, help='do not make any changes') optionParser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='be verbose') (self.opts, _) = optionParser.parse_args() if not self.opts.file: optionParser.print_help() return # Parse the XML file (compId, namespace, langs, meanings) = self.parseFile() langs = self.dropInactive(langs) for meaning in meanings: # Update pages in the given cluster self.processMeaning(compId, namespace, langs, meaning) if __name__ == "__main__": try: iac = InterwikiAnalysisCommit() iac.main() finally: wikipedia.stopme()
elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp('selflink') else: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = SelflinkBot(preloadingGen, always) bot.run() if __name__ == "__main__": try: main() finally: pywikibot.stopme()
# -*- coding: utf-8 -*- """Splits a interwiki.log file into chunks of warnings separated by language""" # # (C) Rob W.W. Hooft, 2003 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # import wikipedia import codecs import re wikipedia.stopme() # No need to have me on the stack - I don't contact the wiki files={} count={} # TODO: Variable log filename fn = wikipedia.config.datafilepath("logs", "interwiki.log") logFile = codecs.open(fn, 'r', 'utf-8') rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*') for line in logFile: m = rWarning.match(line) if m: family = m.group('family') code = m.group('code') if code in wikipedia.getSite().languages(): if not code in files: files[code] = codecs.open( wikipedia.config.datafilepath('logs',