def login(self, retry=False): if not self.password: # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % {'name': self.username, 'site': self.site}, password=True) # self.password = self.password.encode(self.site.encoding()) pywikibot.output(u"Logging in to %(site)s as %(name)s" % {'name': self.username, 'site': self.site}) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") ## # Show a warning according to the local bot policy ## FIXME: disabled due to recursion; need to move this to the Site object after ## login ## if not self.botAllowed(): ## logger.error( ## u"Username '%(name)s' is not listed on [[%(page)s]]." ## % {'name': self.username, ## 'page': botList[self.site.family.name][self.site.code]}) ## logger.error( ##"Please make sure you are allowed to use the robot before actually using it!") ## return False return True
def input_list_choice(self, question, answers, default=None, force=False): """Ask the user to select one entry from a list of entries.""" message = question clist = answers line_template = '{{0: >{0}}}: {{1}}'.format( int(math.log10(len(clist)) + 1)) for n, i in enumerate(clist): pywikibot.output(line_template.format(n + 1, i)) while True: choice = self.input(message, default=default, force=force) try: choice = int(choice) - 1 except ValueError: try: choice = clist.index(choice) except IndexError: choice = -1 # User typed choice number if 0 <= choice < len(clist): return clist[choice] else: pywikibot.error('Invalid response')
def revert(self, item): history = pywikibot.Page(self.site, item['title']).fullVersionHistory( total=2, rollback=self.rollback) if len(history) > 1: rev = history[1] else: return False comment = i18n.twtranslate(pywikibot.Site(), 'revertbot-revert', {'revid': rev[0], 'author': rev[2], 'timestamp': rev[1]}) if self.comment: comment += ': ' + self.comment page = pywikibot.Page(self.site, item['title']) pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title(asLink=True, forceInterwiki=True, textlink=True)) if not self.rollback: old = page.text page.text = rev[3] pywikibot.showDiff(old, page.text) page.save(comment) return comment try: pywikibot.data.api.Request(action="rollback", title=page.title(), user=self.user, token=rev[4], markbot=1).submit() except pywikibot.data.api.APIError as e: if e.code == 'badtoken': pywikibot.error("There was an API token error rollbacking the edit") else: pywikibot.exception() return False return u"The edit(s) made in %s by %s was rollbacked" % (page.title(), self.user)
def delete_redirect(self, page, summary_key): """Delete the redirect page.""" assert page.site == self.site, ( 'target page is on different site {0}'.format(page.site)) reason = i18n.twtranslate(self.site, summary_key) if page.site.logged_in(sysop=True): page.delete(reason, prompt=False) elif i18n.twhas_key(page.site, 'redirect-broken-redirect-template'): pywikibot.output(u"No sysop in user-config.py, " u"put page to speedy deletion.") try: content = page.get(get_redirect=True) except pywikibot.SectionError: content_page = pywikibot.Page(page.site, page.title(withSection=False)) content = content_page.get(get_redirect=True) # TODO: Add bot's signature if needed (Bug: T131517) content = i18n.twtranslate( page.site, 'redirect-broken-redirect-template') + '\n' + content try: page.put(content, reason) except pywikibot.PageSaveRelatedError as e: pywikibot.error(e) else: pywikibot.output( u'No speedy deletion template available')
def _template_link_target(self, item, link_text): link = pywikibot.Link(link_text) try: linked_page = pywikibot.Page(link) except pywikibot.exceptions.InvalidTitle: pywikibot.error('%s is not a valid title so it cannot be linked. ' 'Skipping.' % link_text) return if not linked_page.exists(): pywikibot.output('%s does not exist so it cannot be linked. ' 'Skipping.' % (linked_page)) return if linked_page.isRedirectPage(): linked_page = linked_page.getRedirectTarget() try: linked_item = pywikibot.ItemPage.fromPage(linked_page) except pywikibot.NoPage: linked_item = None if not linked_item or not linked_item.exists(): pywikibot.output('%s does not have a wikidata item to link with. ' 'Skipping.' % (linked_page)) return if linked_item.title() == item.title(): pywikibot.output('%s links to itself. Skipping.' % (linked_page)) return return linked_item
def run(self): pywikibot.output(u'\n\ninit complete: ' + (datetime.datetime.now() .strftime('%d. %B %Y, %H:%M:%S')).decode('utf-8')) if self.adtTitle is not None: pywikibot.output(u'Heutiger AdT: ' + self.adtTitle) try: self.addto_verwaltung() except Exception as inst: pywikibot.output(u'ERROR: ' + str(type(inst))) pywikibot.output(inst) try: self.addto_chron() except Exception as inst: pywikibot.output(u'ERROR: ' + str(type(inst))) pywikibot.output(inst) try: self.add_template() except Exception as inst: pywikibot.output(u'ERROR: ' + str(type(inst))) pywikibot.output(inst) # self.cleanup_templates() # Purge yesterdays AdT disc page yesterday = self.today - datedelta.relativedelta(days=1) self.get_adt(yesterday) if self.adtTitle is not None: pywikibot.output(u'Purge Disc. von ' + self.adtTitle) page = pywikibot.Page(self.site, self.adtTitle, ns=1) page.purge() else: pywikibot.error(u'Konnte heutigen AdT nicht finden!')
def listchoice(clist, message=None, default=None): """Ask the user to select one entry from a list of entries.""" if not message: message = u"Select" if default: message += u" (default: %s)" % default message += u": " line_template = u"{{0: >{0}}}: {{1}}".format(int(math.log10(len(clist)) + 1)) for n, i in enumerate(clist): pywikibot.output(line_template.format(n + 1, i)) while True: choice = pywikibot.input(message) if choice == '' and default: return default try: choice = int(choice) - 1 except ValueError: try: choice = clist.index(choice) except IndexError: choice = -1 # User typed choice number if 0 <= choice < len(clist): return clist[choice] else: pywikibot.error("Invalid response")
def treat_page(self): commons = pywikibot.Site(code = u'commons', fam = u'commons') today = datetime.date.today() # fileTemplate = pywikibot.Page(commons, u'Template:Potd filename') # captionTemplate = pywikibot.Page(commons, u'Template:Potd description') # (Potd page, POTD description) filePage = pywikibot.Page(commons, u'Template:Potd/%s' % today.isoformat()) file = get_template_parameter_value(filePage, u'Potd filename', u'1') # TODO: use languages instead of lang captionPage = pywikibot.Page(commons, u'Template:Potd/%s (%s)' % (today.isoformat(), self.current_page.site.lang)) if self.current_page.site.lang != u'en' and not captionPage.exists(): pywikibot.warning(u'%s does not exist' % captionPage.title(asLink=True)) # try en instead captionPage = pywikibot.Page(commons, u'Template:Potd/%s (en)' % today.isoformat()) caption = get_template_parameter_value(captionPage, u'Potd description', u'1') # TODO: Complete caption parsing to fix links (if not an interwiki then make it an interwiki to Commons) caption = re.sub(r"\[\[([^:])", r"[[:\1", caption, flags=re.UNICODE) # Force links to start with ':' caption = re.sub(r"\[\[(:Category:)", r"[[:c\1", caption, flags=re.UNICODE | re.IGNORECASE) # Make category links interwiki links # TODO: Use [[d:Q4608595]] to get the local {{Documentation}} doc = u'Documentation' if file != u'': summary = u'Updating Commons picture of the day' if caption != u'': summary = summary + u', [[:c:%s|caption attribution]]' % captionPage.title() else: summary = summary + u', failed to parse caption' pywikibot.error(u'Failed to parse parameter 1 from {{Potd description}} on %s' % captionPage.title(asLink=True)) self.put_current(u'<includeonly>{{#switch:{{{1|}}}|caption=%s|#default=%s}}</includeonly><noinclude>\n{{%s}}</noinclude>' % (caption, file, doc), summary=summary, minor=False) else: pywikibot.error(u'Failed to parse parameter 1 from {{Potd filename}} on %s' % filePage.title(asLink=True))
def translate(self, string): """Translate expiry time string into german.""" table = { 'gmt': 'UTC', 'mon': 'Montag', 'sat': 'Samstag', 'sun': 'Sonntag', 'second': 'Sekunde', 'seconds': 'Sekunden', 'min': 'Min.', 'minute': 'Minute', 'minutes': 'Minuten', 'hour': 'Stunde', 'hours': 'Stunden', 'day': 'Tag', 'days': 'Tage', 'week': 'Woche', 'weeks': 'Wochen', 'month': 'Monat', 'months': 'Monate', 'year': 'Jahr', 'years': 'Jahre', 'infinite': 'unbeschränkt', 'indefinite': 'unbestimmt', } for pattern in re.findall('([DHIMSWYa-z]+)', string): try: string = string.replace(pattern, table[pattern.lower()]) except KeyError: pywikibot.error(pattern + ' not found.') return string
def _ocr_callback(self, cmd_uri, parser_func=None): """OCR callback function. @return: tuple (error, text [error description in case of error]). """ def id(x): return x if not cmd_uri: raise ValueError('Parameter cmd_uri is mandatory.') if parser_func is None: parser_func = id if not callable(parser_func): raise TypeError('Keyword parser_func must be callable.') # wrong link fail with Exceptions try: response = http.fetch(cmd_uri, charset='utf-8') except Exception as e: pywikibot.error('Querying %s: %s' % (cmd_uri, e)) return (True, e) data = json.loads(response.content) assert 'error' in data, 'Error from phe-tools: %s' % data assert data['error'] in [0, 1], 'Error from phe-tools: %s' % data error = bool(data['error']) if error: pywikibot.error('Querying %s: %s' % (cmd_uri, data['text'])) return (error, data['text']) else: return (error, parser_func(data['text']))
def _call_cmd(args, lib='djvulibre'): """ Tiny wrapper around subprocess.Popen(). @param args: same as Popen() @type args: sequence or string @param library: library to be logged in logging messages @type library: string @param log: log process output; errors are always logged. @type library: bool @return: returns a tuple (res, stdoutdata), where res is True if dp.returncode != 0 else False """ if not isinstance(args, StringTypes): # upcast if any param in sequence args is not in StringTypes args = [str(a) if not isinstance(a, StringTypes) else a for a in args] cmd = ' '.join(args) else: cmd = args dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdoutdata, stderrdata = dp.communicate() if dp.returncode != 0: pywikibot.error('{0} error; {1}'.format(lib, cmd)) pywikibot.error('{0}'.format(stderrdata)) return (False, stdoutdata) pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid)) return (True, stdoutdata)
def new_from_site(cls, site): try: page = site.page_from_repository('Q10784379') except (NotImplementedError, UnknownExtension) as e: pywikibot.error(e) return None return cls.new_from_text(page.text, site.dbName())
def main(*args): options = {} local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg.startswith('-'): arg, sep, value = arg.partition(':') if value != '': options[arg[1:]] = value if not value.isdigit() else int(value) else: options[arg[1:]] = True generator = genFactory.getCombinedGenerator(preload=True) site = pywikibot.Site() if not generator: try: category = site.page_from_repository('Q11925744') except (NotImplementedError, UnknownExtension) as e: pywikibot.error(e) return if not category: pywikibot.output("%s doesn't have an appropriate category" % site) return gen_combined = pagegenerators.CombinedPageGenerator( [category.articles(namespaces=0), category.subcategories()]) generator = pagegenerators.WikibaseItemFilterPageGenerator(gen_combined) bot = CommonscatCleaningBot(generator, site=site, **options) bot.run()
def login(self, retry=False, force=False): """ Attempt to log into the server. @param retry: infinitely retry if exception occurs during authentication. @type retry: bool @param force: force to re-authenticate @type force: bool """ if self.access_token is None or force: pywikibot.output('Logging in to {site!s} via OAuth consumer {key!s}'.format(**{'key': self.consumer_token[0], 'site': self.site})) consumer_token = mwoauth.ConsumerToken(self.consumer_token[0], self.consumer_token[1]) handshaker = mwoauth.Handshaker( self.site.base_url(self.site.path()), consumer_token) try: redirect, request_token = handshaker.initiate() pywikibot.stdout('Authenticate via web browser..') webbrowser.open(redirect) pywikibot.stdout('If your web browser does not open ' 'automatically, please point it to: %s' % redirect) request_qs = pywikibot.input('Response query string: ') access_token = handshaker.complete(request_token, request_qs) self._access_token = (access_token.key, access_token.secret) except Exception as e: pywikibot.error(e) if retry: self.login(retry=True, force=force) else: pywikibot.output('Logged in to {site!s} via consumer {key!s}'.format(**{'key': self.consumer_token[0], 'site': self.site}))
def add_mbid_claim_to_item(pid, item, mbid, donefunc, simulate=False): """ Adds a claim with pid `pid` with value `mbid` to `item` and call `donefunc` with `mbid` to signal the completion. :type pid: str :type mbid: str :type item: pywikibot.ItemPage """ claim = wp.Claim(const.WIKIDATA, pid) claim.setTarget(mbid) wp.output(u"Adding property {pid}, value {mbid} to {title}".format (pid=pid, mbid=mbid, title=item.title())) if simulate: wp.output("Simulation, no property has been added") return try: item.addClaim(claim, True) except wp.UserBlocked as e: wp.error("I have been blocked") exit(1) except wp.Error as e: wp.warning(e) return else: wp.output("Adding the source Claim") claim.addSource(const.MUSICBRAINZ_CLAIM, bot=True) donefunc(mbid)
def _oauth_login(site): consumer_key, consumer_secret = _get_consumer_token(site) login_manager = OauthLoginManager(consumer_secret, False, site, consumer_key) login_manager.login() identity = login_manager.identity if identity is None: pywikibot.error('Invalid OAuth info for %(site)s.' % {'site': site}) elif site.username() != identity['username']: pywikibot.error('Logged in on %(site)s via OAuth as %(wrong)s, ' 'but expect as %(right)s' % {'site': site, 'wrong': identity['username'], 'right': site.username()}) else: oauth_token = login_manager.consumer_token + login_manager.access_token pywikibot.output('Logged in on %(site)s as %(username)s' 'via OAuth consumer %(consumer)s' % {'site': site, 'username': site.username(sysop=False), 'consumer': consumer_key}) pywikibot.output('NOTE: To use OAuth, you need to copy the ' 'following line to your user-config.py:') pywikibot.output('authenticate[\'%(hostname)s\'] = %(oauth_token)s' % {'hostname': site.hostname(), 'oauth_token': oauth_token})
def upload_image(self, html, data, imgfile): site = self.targetSite # Construct the name commons_filename = "AMH-%s-%s_%s.jpg" % ( data["amh_id"], data["institution_shortcode"].upper(), data["title_en"][:150] ) if self.page_exists(commons_filename): pywikibot.output("%s already exists, skipping" % commons_filename) return imagepage = pywikibot.ImagePage(site, commons_filename) # normalizes filename imagepage.text = html pywikibot.output(u'Uploading file %s to %s via API....' % (commons_filename, site)) try: site.upload(imagepage, source_filename = imgfile) except pywikibot.UploadWarning as warn: pywikibot.output(u"We got a warning message: ", newline=False) pywikibot.output(str(warn)) except Exception as e: pywikibot.error("Upload error: ", exc_info=True) else: # No warning, upload complete. pywikibot.output(u"Upload successful.")
def cleanup_templates(self): for adt in self.erl_props: if adt in self.props: # mehrmals für AdT vorgeschlagen continue page = pywikibot.Page(self.site, adt, ns=1) if not page.exists(): pywikibot.error(u'ERROR: disc for AdT-Vorschlag ' + adt + u' does not exist!') return oldtext = page.text code = mwparser.parse(page.text) for template in code.filter_templates(recursive=False): if template.name.matches("AdT-Vorschlag Hinweis"): code.remove(template) pywikibot.output(adt + u': {{AdT-Vorschlag Hinweis}} ' u'gefunden, entfernt') page.text = unicode(code) if page.text == oldtext: continue page.text = page.text.lstrip(u'\n') pywikibot.showDiff(oldtext, page.text) comment = u'Bot: [[Vorlage:AdT-Vorschlag Hinweis]] entfernt' if not self.dry: page.save(comment=comment, botflag=True, minor=True)
def main(): local_args = pywikibot.handleArgs() cache_paths = None delete = False command = None for arg in local_args: if command == '': command = arg elif arg == '-delete': delete = True elif arg == '-password': command = 'has_password(entry)' elif arg == '-c': if command: pywikibot.error('Only one command may be executed.') exit(1) command = '' else: if not cache_paths: cache_paths = [arg] else: cache_paths.append(arg) func = None if not cache_paths: cache_paths = ['apicache', 'tests/apicache'] # Also process the base directory, if it isnt the current directory if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir: cache_paths += [ os.path.join(pywikibot.config2.base_dir, 'apicache')] # Also process the user home cache, if it isnt the config directory if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir: cache_paths += [ os.path.join(os.path.expanduser('~/.pywikibot'), 'apicache')] if delete: action_func = lambda entry: entry._delete() else: action_func = lambda entry: pywikibot.output(entry) if command: try: command_func = eval('lambda entry: ' + command) except: pywikibot.exception() pywikibot.error(u'Can not compile command: %s' % command) exit(1) func = lambda entry: command_func(entry) and action_func(entry) else: func = action_func for cache_path in cache_paths: if len(cache_paths) > 1: pywikibot.output(u'Processing %s' % cache_path) process_entries(cache_path, func)
def revert(self, item): history = pywikibot.Page(self.site, item["title"]).fullVersionHistory(total=2, rollback=self.rollback) if len(history) > 1: rev = history[1] else: return False comment = i18n.twtranslate( pywikibot.Site(), "revertbot-revert", {"revid": rev[0], "author": rev[2], "timestamp": rev[1]} ) if self.comment: comment += ": " + self.comment page = pywikibot.Page(self.site, item["title"]) pywikibot.output( "\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title(asLink=True, forceInterwiki=True, textlink=True) ) if not self.rollback: old = page.text page.text = rev[3] pywikibot.showDiff(old, page.text) page.save(comment) return comment try: pywikibot.data.api.Request( self.site, parameters={"action": "rollback", "title": page, "user": self.user, "token": rev[4], "markbot": True}, ).submit() except pywikibot.data.api.APIError as e: if e.code == "badtoken": pywikibot.error("There was an API token error rollbacking the edit") else: pywikibot.exception() return False return "The edit(s) made in %s by %s was rollbacked" % (page.title(), self.user)
def run(self): """Run bot.""" # early check that upload is enabled if self.targetSite.is_uploaddisabled(): pywikibot.error( "Upload error: Local file uploads are disabled on %s." % self.targetSite) return # early check that user has proper rights to upload if "upload" not in self.targetSite.userinfo["rights"]: pywikibot.error( "User '%s' does not have upload rights on site %s." % (self.targetSite.user(), self.targetSite)) return try: if isinstance(self.url, basestring): self._treat_counter = 1 return self.upload_file(self.url) for file_url in self.url: self.upload_file(file_url) self._treat_counter += 1 except QuitKeyboardInterrupt: pywikibot.output('\nUser quit %s bot run...' % self.__class__.__name__) except KeyboardInterrupt: if config.verbose_output: raise else: pywikibot.output('\nKeyboardInterrupt during %s bot run...' % self.__class__.__name__) finally: self.exit()
def put_page(self, page, new): """ Print diffs between orginal and new (text), put new text for page """ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(page.get(), new) if not self.acceptall: choice = pywikibot.inputChoice(u'Do you want to accept ' + u'these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice == 'a': self.acceptall = True if choice == 'y': page.text = new page.save(self.msg, async=True) if self.acceptall: try: page.text = new page.save(self.msg) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.PageNotSaved as error: pywikibot.error(u'putting page: %s' % (error.args,)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(),)) except pywikibot.ServerError as e: pywikibot.output(u'Server Error : %s' % e)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode @rtype: bool """ exists_arg = '' commandline_claims = list() # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) gen = pagegenerators.GeneratorFactory() for arg in local_args: # Handle args specifying how to handle duplicate claims if arg.startswith('-exists:'): exists_arg = arg.split(':')[1] continue # Handle page generator args if gen.handleArg(arg): continue commandline_claims.append(arg) if len(commandline_claims) % 2: pywikibot.error('Incomplete command line property-value pair.') return False claims = list() repo = pywikibot.Site().data_repository() for i in range(0, len(commandline_claims), 2): claim = pywikibot.Claim(repo, commandline_claims[i]) if claim.type == 'wikibase-item': target = pywikibot.ItemPage(repo, commandline_claims[i + 1]) elif claim.type == 'string': target = commandline_claims[i + 1] elif claim.type == 'globe-coordinate': coord_args = [float(c) for c in commandline_claims[i + 1].split(',')] if len(coord_args) >= 3: precision = coord_args[2] else: precision = 0.0001 # Default value (~10 m at equator) target = pywikibot.Coordinate(coord_args[0], coord_args[1], precision=precision) else: raise NotImplementedError( "%s datatype is not yet supported by claimit.py" % claim.type) claim.setTarget(target) claims.append(claim) generator = gen.getCombinedGenerator() if not generator: pywikibot.bot.suggest_help(missing_generator=True) return False bot = ClaimRobot(generator, claims, exists_arg) bot.run() return True
def add_mbid_claim_to_item(self, item, mbid): """ Adds a claim with pid `pid` with value `mbid` to `item` and call `donefunc` with `mbid` to signal the completion. :type pid: str :type mbid: str :type item: pywikibot.ItemPage """ claim = wp.Claim(const.WIKIDATA_DATASITE, self.property_id) claim.setTarget(mbid) wp.debug(u"Adding property {pid}, value {mbid} to {title}".format (pid=self.property_id, mbid=mbid, title=item.title()), layer="") if wp.config.simulate: wp.output("Simulation, no property has been added") return try: item.addClaim(claim, True) except wp.UserBlocked as e: wp.error("I have been blocked") exit(1) except wp.Error as e: wp.warning(e) return else: wp.debug("Adding the source Claim", layer="") claim.addSources([const.MUSICBRAINZ_CLAIM, const.RETRIEVED_CLAIM], bot=True) self.donefunc(mbid)
def get_wikidata_itempage_from_wikilink(wikilink): """Given a link to a wikipedia page, retrieve its page on Wikidata""" parsed_url = urlparse(wikilink) if "wikipedia" in parsed_url.netloc: pagename = parsed_url.path.replace(WIKI_PREFIX, "") wikilanguage = parsed_url.netloc.split(".")[0] wikisite = wp.Site(wikilanguage, "wikipedia") enwikipage = wp.Page(wikisite, pagename) check_url_needs_to_be_skipped(wikilink, enwikipage) try: wikidatapage = wp.ItemPage.fromPage(enwikipage) except wp.NoPage: wp.error("%s does not exist" % enwikipage) return None elif "wikidata" in parsed_url.netloc: pagename = parsed_url.path.replace(WIKI_PREFIX, "") wikidatapage = wp.ItemPage(const.WIKIDATA_DATASITE, pagename) else: raise ValueError("%s is not a link to a wikipedia page" % wikilink) try: wikidatapage.get(get_redirect=True) except wp.NoPage: wp.error("%s does not exist" % pagename) return None check_url_needs_to_be_skipped(wikilink, wikidatapage) return wikidatapage
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() csv_dir = None for arg in local_args: if arg.startswith("-csvdir:"): csv_dir = arg[8:] else: genFactory.handleArg(arg) config_generator = genFactory.getCombinedGenerator() if not config_generator or not csv_dir: pywikibot.bot.suggest_help( missing_parameters=[] if csv_dir else ["-csvdir"], missing_generator=not config_generator ) return False for config_page in config_generator: try: config_page.get() except pywikibot.NoPage: pywikibot.error("%s does not exist" % config_page) continue configuration = DataIngestionBot.parseConfigurationPage(config_page) filename = os.path.join(csv_dir, configuration["csvFile"]) try: f = codecs.open(filename, "r", configuration["csvEncoding"]) except (IOError, OSError) as e: pywikibot.error("%s could not be opened: %s" % (filename, e)) continue try: files = CSVReader( f, urlcolumn="url", site=config_page.site, dialect=configuration["csvDialect"], delimiter=str(configuration["csvDelimiter"]), ) bot = DataIngestionBot(files, configuration["titleFormat"], configuration["formattingTemplate"], site=None) bot.run() finally: f.close()
def searchSirutaInWD(self, siruta): query = "SELECT ?item WHERE { ?item wdt:P843 \"%d\" . SERVICE wikibase:label { bd:serviceParam wikibase:language \"ro\" }}" % siruta query_object = sparql.SparqlQuery() data = query_object.get_items(query, result_type=list) if len(data) != 1: pywikibot.error("There are %d items with siruta %d" % (len(data), siruta)) return return data[0]
def main(): local_args = pywikibot.handleArgs() cache_paths = None delete = False command = None for arg in local_args: if command == "": command = arg elif arg == "-delete": delete = True elif arg == "-password": command = "has_password(entry)" elif arg == "-c": if command: pywikibot.error("Only one command may be executed.") exit(1) command = "" else: if not cache_paths: cache_paths = [arg] else: cache_paths.append(arg) func = None if not cache_paths: cache_paths = ["apicache", "tests/apicache"] # Also process the base directory, if it isnt the current directory if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir: cache_paths += [os.path.join(pywikibot.config2.base_dir, "apicache")] # Also process the user home cache, if it isnt the config directory if os.path.expanduser("~/.pywikibot") != pywikibot.config2.base_dir: cache_paths += [os.path.join(os.path.expanduser("~/.pywikibot"), "apicache")] if delete: action_func = lambda entry: entry._delete() else: action_func = lambda entry: pywikibot.output(entry) if command: try: command_func = eval("lambda entry: " + command) except: pywikibot.exception() pywikibot.error("Can not compile command: %s" % command) exit(1) func = lambda entry: command_func(entry) and action_func(entry) else: func = action_func for cache_path in cache_paths: if len(cache_paths) > 1: pywikibot.output("Processing %s" % cache_path) process_entries(cache_path, func)
def login(self, retry=False): """ Attempt to log into the server. @param retry: infinitely retry if the API returns an unknown error @type retry: bool @raises NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % {'name': self.login_name, 'site': self.site}, password=True) pywikibot.output(u"Logging in to %(site)s as %(name)s" % {'name': self.login_name, 'site': self.site}) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if e.code == 'NotExists': raise NoUsername(u"Username '%s' does not exist on %s" % (self.login_name, self.site)) elif e.code == 'Illegal': raise NoUsername(u"Username '%s' is invalid on %s" % (self.login_name, self.site)) elif e.code == 'readapidenied': raise NoUsername( 'Username "{0}" does not have read permissions on ' '{1}'.format(self.login_name, self.site)) # TODO: investigate other unhandled API codes (bug T75539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") # # Show a warning according to the local bot policy # FIXME: disabled due to recursion; need to move this to the Site object after # login # if not self.botAllowed(): # logger.error( # u"Username '%(name)s' is not listed on [[%(page)s]]." # % {'name': self.username, # 'page': botList[self.site.family.name][self.site.code]}) # logger.error( # "Please make sure you are allowed to use the robot before actually using it!") # return False return True
def number_of_images(self): """Return the (cached) number of images in the djvu file.""" if not hasattr(self, '_image_count'): dp = subprocess.Popen(['djvused', '-e', 'n', self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: pywikibot.error('djvulibre library error!\n%s' % stderrdata) self._image_count = int(stdoutdata) return self._image_count
def main(): """Process command line arguments and invoke bot.""" local_args = pywikibot.handleArgs() cache_paths = None delete = False command = None output = None for arg in local_args: if command == '': command = arg elif output == '': output = arg elif arg == '-delete': delete = True elif arg == '-password': command = 'has_password(entry)' elif arg == '-c': if command: pywikibot.error('Only one command may be executed.') exit(1) command = '' elif arg == '-o': if output: pywikibot.error('Only one output may be defined.') exit(1) output = '' else: if not cache_paths: cache_paths = [arg] else: cache_paths.append(arg) if not cache_paths: cache_paths = ['apicache', 'tests/apicache'] # Also process the base directory, if it isnt the current directory if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir: cache_paths += [ os.path.join(pywikibot.config2.base_dir, 'apicache') ] # Also process the user home cache, if it isnt the config directory if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir: cache_paths += [ os.path.join(os.path.expanduser('~/.pywikibot'), 'apicache') ] if delete: action_func = CacheEntry._delete else: action_func = None if output: output_func = _parse_command(output, 'output') if output_func is None: return False else: output_func = None if command: filter_func = _parse_command(command, 'filter') if filter_func is None: return False else: filter_func = None for cache_path in cache_paths: if len(cache_paths) > 1: pywikibot.output('Processing %s' % cache_path) process_entries(cache_path, filter_func, output_func=output_func, action_func=action_func)
def main(): global site, language import sys filename = None pagename = None namespace = None salt = None force = False calc = None args = [] def if_arg_value(arg, name): if arg.startswith(name): yield arg[len(name) + 1:] for arg in pywikibot.handleArgs(*sys.argv): for v in if_arg_value(arg, '-file'): filename = v for v in if_arg_value(arg, '-locale'): #Required for english month names locale.setlocale(locale.LC_TIME, v.encode('utf8')) for v in if_arg_value(arg, '-timezone'): os.environ['TZ'] = v.timezone #Or use the preset value if hasattr(time, 'tzset'): time.tzset() for v in if_arg_value(arg, '-calc'): calc = v for v in if_arg_value(arg, '-salt'): salt = v for v in if_arg_value(arg, '-force'): force = True for v in if_arg_value(arg, '-filename'): filename = v for v in if_arg_value(arg, '-page'): pagename = v for v in if_arg_value(arg, '-namespace'): namespace = v if not arg.startswith('-'): args.append(arg) if calc: if not salt: pywikibot.error('Note: you must specify a salt to calculate a key') return s = new_hash() s.update(salt + '\n') s.update(calc + '\n') pywikibot.output(u'key = ' + s.hexdigest()) return if not salt: salt = '' site = pywikibot.Site() language = site.language() if not args or len(args) <= 1: pywikibot.output(u'NOTE: you must specify a template to run the bot') pywikibot.showHelp('archivebot') return for a in args[1:]: pagelist = [] a = a.decode('utf8') if not filename and not pagename: if namespace is not None: ns = [str(namespace)] else: ns = [] for pg in generate_transclusions(site, a, ns): pagelist.append(pg) if filename: for pg in file(filename, 'r').readlines(): pagelist.append(pywikibot.Page(site, pg, ns=10)) if pagename: pagelist.append(pywikibot.Page(site, pagename, ns=3)) pagelist = sorted(pagelist) for pg in iter(pagelist): pywikibot.output(u'Processing %s' % pg) # Catching exceptions, so that errors in one page do not bail out # the entire process try: archiver = PageArchiver(pg, a, salt, force) archiver.run() time.sleep(10) except Exception as e: pywikibot.output( u'Error occured while processing page %s: %s' % (pg, e)) pywikibot.output(traceback.format_exc())
def process_entries(cache_path, func, use_accesstime=None, output_func=None, action_func=None): """ Check the contents of the cache. This program tries to use file access times to determine whether cache files are being used. However file access times are not always usable. On many modern filesystems, they have been disabled. On unix, check the filesystem mount options. You may need to remount with 'strictatime'. @param use_accesstime: Whether access times should be used. @type use_accesstime: bool tristate: - None = detect - False = dont use - True = always use """ if not cache_path: cache_path = os.path.join(pywikibot.config2.base_dir, 'apicache') if not os.path.exists(cache_path): pywikibot.error('%s: no such file or directory' % cache_path) return if os.path.isdir(cache_path): filenames = [ os.path.join(cache_path, filename) for filename in os.listdir(cache_path) ] else: filenames = [cache_path] for filepath in filenames: filename = os.path.basename(filepath) cache_dir = os.path.dirname(filepath) if use_accesstime is not False: stinfo = os.stat(filepath) entry = CacheEntry(cache_dir, filename) try: entry._load_cache() except ValueError as e: pywikibot.error('Failed loading {0}'.format( entry._cachefile_path())) pywikibot.exception(e, tb=True) continue if use_accesstime is None: stinfo2 = os.stat(filepath) use_accesstime = stinfo.st_atime != stinfo2.st_atime if use_accesstime: # Reset access times to values before loading cache entry. os.utime(filepath, (stinfo.st_atime, stinfo.st_mtime)) entry.stinfo = stinfo try: entry.parse_key() except ParseError: pywikibot.error('Problems parsing %s with key %s' % (entry.filename, entry.key)) pywikibot.exception() continue try: entry._rebuild() except Exception as e: pywikibot.error('Problems loading %s with key %s, %r' % (entry.filename, entry.key, entry._parsed_key)) pywikibot.exception(e, tb=True) continue if func is None or func(entry): if output_func or action_func is None: if output_func is None: output = entry else: output = output_func(entry) if output is not None: pywikibot.output(output) if action_func: action_func(entry)
def main(*args): """ Process command line arguments and generate user-config. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ global base_dir default_args = (config.family, config.mylang, None) local_args = pywikibot.handle_args(args) if local_args: pywikibot.output('Unknown arguments: %s' % ' '.join(local_args)) return False username = config.usernames[config.family].get(config.mylang) args = (config.family, config.mylang, username) if args != default_args: force = True pywikibot.output(u'Automatically generating user-config.py') else: force = False # Force default if config.family == 'wikipedia' and config.mylang == 'language': args = ('wikipedia', 'en', username) while not force or config.verbose_output: pywikibot.output(u'\nYour default user directory is "%s"' % base_dir) if pywikibot.input_yn("Do you want to use that directory?", default=True, automatic_quit=False, force=force): break else: new_base = change_base_dir() if new_base: base_dir = new_base break copied_config = False copied_fixes = False while not force or config.verbose_output: if os.path.exists(os.path.join(base_dir, "user-config.py")): break if pywikibot.input_yn( "Do you want to copy user files from an existing Pywikibot " "installation?", default=False, force=force, automatic_quit=False): oldpath = pywikibot.input("Path to existing user-config.py?") if not os.path.exists(oldpath): pywikibot.error("Not a valid path") continue if os.path.isfile(oldpath): # User probably typed /user-config.py at the end, so strip it oldpath = os.path.dirname(oldpath) if not os.path.isfile(os.path.join(oldpath, "user-config.py")): pywikibot.error("No user_config.py found in that directory") continue shutil.copyfile(os.path.join(oldpath, "user-config.py"), os.path.join(base_dir, "user-config.py")) copied_config = True if os.path.isfile(os.path.join(oldpath, "user-fixes.py")): shutil.copyfile(os.path.join(oldpath, "user-fixes.py"), os.path.join(base_dir, "user-fixes.py")) copied_fixes = True else: break if not os.path.isfile(os.path.join(base_dir, "user-config.py")): if ((force and not config.verbose_output) or pywikibot.input_yn( 'Create user-config.py file? Required for ' 'running bots.', default=True, automatic_quit=False, force=force)): create_user_config(args, force=force) elif not copied_config: pywikibot.output("user-config.py already exists in the directory") if not os.path.isfile(os.path.join(base_dir, "user-fixes.py")): if ((force and not config.verbose_output) or pywikibot.input_yn( 'Create user-fixes.py file? Optional and ' 'for advanced users.', force=force, default=False, automatic_quit=False)): create_user_fixes() elif not copied_fixes: pywikibot.output("user-fixes.py already exists in the directory")
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-replacementfile': replacement_file = pywikibot.input( u'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-replacementfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error( '-replacementfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error(u'Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip(u'\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input( u'Please enter the text that should be replaced:') while old: new = pywikibot.input(u'Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate(site, 'replace-replacing', { 'description': ' (-%s +%s)' % (replacement.old, replacement.new) }) replacements.append(replacement) if not edit_summary: if single_summary: pywikibot.output(u'The summary message for the command line ' 'replacements will be something like: %s' % single_summary) if fixes_set: pywikibot.output('If a summary is defined for the fix, this ' 'default summary won\'t be applied.') edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') # Perform one of the predefined actions. for fix in fixes_set: try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % ', '.join(fixes.fixes.keys())) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if "msg" in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary) for replacement in fix['replacements']: summary = None if len(replacement) < 3 else replacement[2] if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if chars.contains_invisible(replacement[1]): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacements.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary, site) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ url = u'' description = ['Automatic upload by pywikibot'] summary = None keepFilename = True always = False useFilename = None verifyDescription = False aborts = set() ignorewarn = set() chunk_size = 0 chunk_size_regex = r'^-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$' chunk_size_regex = re.compile(chunk_size_regex, re.I) recursive = False # process all global bot args # returns a list of non-global args, i.e. args for upload.py for arg in pywikibot.handle_args(args): if arg: if arg == '-always': keepFilename = True always = True verifyDescription = False elif arg == '-recursive': recursive = True elif arg.startswith('-keep'): keepFilename = True elif arg.startswith('-filename:'): useFilename = arg[10:] elif arg.startswith('-summary'): summary = arg[9:] elif arg.startswith('-noverify'): verifyDescription = False elif arg.startswith('-abortonwarn'): if len(arg) > len('-abortonwarn:') and aborts is not True: aborts.add(arg[len('-abortonwarn:'):]) else: aborts = True elif arg.startswith('-ignorewarn'): if len(arg) > len('-ignorewarn:') and ignorewarn is not True: ignorewarn.add(arg[len('-ignorewarn:'):]) else: ignorewarn = True elif arg.startswith('-chunked'): match = chunk_size_regex.match(arg) if match: if match.group(1): # number was in there base = float(match.group(1)) if match.group(2): # suffix too suffix = match.group(2).lower() if suffix == "k": suffix = 1000 elif suffix == "m": suffix = 1000000 elif suffix == "ki": suffix = 1 << 10 elif suffix == "mi": suffix = 1 << 20 else: pass # huh? else: suffix = 1 chunk_size = math.trunc(base * suffix) else: chunk_size = 1 << 20 # default to 1 MiB else: pywikibot.error('Chunk size parameter is not valid.') elif url == u'': url = arg else: description.append(arg) description = u' '.join(description) while not ("://" in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break else: pywikibot.output(error) url = pywikibot.input(u'URL, file or directory where files are now:') if always and ((aborts is not True and ignorewarn is not True) or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return False if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: if not (pywikibot.FilePage(pywikibot.Site(), "File:" + dir_file).exists()): file_list.append(os.path.join(directory_info[0], dir_file)) else: pywikibot.output("File:{0} has been uploaded, skipping".format(dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, useFilename=useFilename, keepFilename=keepFilename, verifyDescription=verifyDescription, aborts=aborts, ignoreWarning=ignorewarn, chunk_size=chunk_size, always=always, summary=summary) bot.run()
def _ocr_callback(self, cmd_uri, parser_func=None, ocr_tool=None): """OCR callback function. @return: tuple (error, text [error description in case of error]). """ def identity(x): return x if not cmd_uri: raise ValueError('Parameter cmd_uri is mandatory.') if parser_func is None: parser_func = identity if not callable(parser_func): raise TypeError('Keyword parser_func must be callable.') if ocr_tool not in self._OCR_METHODS: raise TypeError("ocr_tool must be in %s, not '%s'." % (self._OCR_METHODS, ocr_tool)) # wrong link fail with Exceptions retry = 0 while retry < 5: pywikibot.debug('{0}: get URI {1!r}'.format(ocr_tool, cmd_uri), _logger) try: response = http.fetch(cmd_uri) except requests.exceptions.ReadTimeout as e: retry += 1 pywikibot.warning('ReadTimeout %s: %s' % (cmd_uri, e)) pywikibot.warning('retrying in %s seconds ...' % (retry * 5)) time.sleep(retry * 5) except Exception as e: pywikibot.error('"%s": %s' % (cmd_uri, e)) return (True, e) else: pywikibot.debug('{0}: {1}'.format(ocr_tool, response.text), _logger) break if 400 <= response.status < 600: return (True, 'Http response status {0}'.format(response.status)) data = json.loads(response.text) if ocr_tool == self._PHETOOLS: # phetools assert 'error' in data, 'Error from phetools: %s' % data assert data['error'] in [0, 1, 2, 3], ('Error from phetools: %s' % data) error, _text = bool(data['error']), data['text'] else: # googleOCR if 'error' in data: error, _text = True, data['error'] else: error, _text = False, data['text'] if error: pywikibot.error('OCR query %s: %s' % (cmd_uri, _text)) return (error, _text) else: return (error, parser_func(_text))
def treat(self, page, item): """Process a single page/item.""" if willstop: raise KeyboardInterrupt self.current_page = page item.get() if set(self.fields.values()) <= set(item.claims.keys()): pywikibot.output('%s item %s has claims for all properties. ' 'Skipping.' % (page, item.title())) return pagetext = page.get() templates = textlib.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) except pywikibot.exceptions.InvalidTitle: pywikibot.error( "Failed parsing template; '%s' should be the template name." % template) continue # We found the template we were looking for if template in self.templateTitles: for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( 'A claim for %s already exists. Skipping.' % claim.getID()) # TODO: Implement smarter approach to merging # harvested values with existing claims esp. # without overwriting humans unintentionally. else: if claim.type == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if not match: pywikibot.output( '%s field %s value %s is not a ' 'wikilink. Skipping.' % (claim.getID(), field, value)) continue link_text = match.group(1) linked_item = self._template_link_target( item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type in ('string', 'external-id'): claim.setTarget(value.strip()) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site( 'commons', 'commons') imagelink = pywikibot.Link(value, source=commonssite, defaultNamespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( "{0} doesn't exist. I can't link to it" ''.format(image.title(asLink=True))) continue claim.setTarget(image) else: pywikibot.output( '%s is not a supported datatype.' % claim.type) continue pywikibot.output( 'Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) # A generator might yield pages from multiple sites source = self.getSource(page.site) if source: claim.addSource(source, bot=True)
def treat_page_and_item(self, page, item): """Process a single page/item.""" if willstop: raise KeyboardInterrupt templates = page.raw_extracted_templates for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(with_ns=False) except pywikibot.exceptions.InvalidTitle: pywikibot.error("Failed parsing template; '{}' should be " 'the template name.'.format(template)) continue if template not in self.templateTitles: continue # We found the template we were looking for for field, value in fielddict.items(): field = field.strip() # todo: extend the list of tags to ignore value = textlib.removeDisabledParts( # todo: eventually we may want to import the references value, tags=['ref'], site=page.site).strip() if not field or not value: continue if field not in self.fields: continue # This field contains something useful for us prop, options = self.fields[field] claim = pywikibot.Claim(self.repo, prop) exists_arg = self._get_option_with_fallback(options, 'exists') if claim.type == 'wikibase-item': do_multi = self._get_option_with_fallback(options, 'multi') matched = False # Try to extract a valid page for match in pywikibot.link_regex.finditer(value): matched = True link_text = match.group(1) linked_item = self._template_link_target( item, link_text) added = False if linked_item: claim.setTarget(linked_item) added = self.user_add_claim_unless_exists( item, claim, exists_arg, page.site, pywikibot.output) claim = pywikibot.Claim(self.repo, prop) # stop after the first match if not supposed to add # multiple values if not do_multi: break # update exists_arg, so we can add more values if 'p' not in exists_arg and added: exists_arg += 'p' if matched: continue if not self._get_option_with_fallback(options, 'islink'): pywikibot.output( '{} field {} value {} is not a wikilink. Skipping.' .format(claim.getID(), field, value)) continue linked_item = self._template_link_target(item, value) if not linked_item: continue claim.setTarget(linked_item) elif claim.type in ('string', 'external-id'): claim.setTarget(value.strip()) elif claim.type == 'url': match = self.linkR.search(value) if not match: continue claim.setTarget(match.group('url')) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site('commons', 'commons') imagelink = pywikibot.Link(value, source=commonssite, default_namespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output( "{} doesn't exist. I can't link to it".format( image.title(as_link=True))) continue claim.setTarget(image) else: pywikibot.output('{} is not a supported datatype.'.format( claim.type)) continue # A generator might yield pages from multiple sites self.user_add_claim_unless_exists(item, claim, exists_arg, page.site, pywikibot.output)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ index = None djvu_path = '.' # default djvu file directory pages = '1-' options = {} # Parse command line arguments. local_args = pywikibot.handle_args(args) for arg in local_args: if arg.startswith('-index:'): index = arg[7:] elif arg.startswith('-djvu:'): djvu_path = arg[len('-djvu:'):] elif arg.startswith('-pages:'): pages = arg[7:] elif arg.startswith('-summary:'): options['summary'] = arg[len('-summary:'):] elif arg == '-force': options['force'] = True elif arg == '-always': options['always'] = True else: pywikibot.output('Unknown argument %s' % arg) # index is mandatory. if not index: pywikibot.bot.suggest_help(missing_parameters=['-index']) return False # If djvu_path is not a fle, build djvu_path from dir+index. djvu_path = os.path.expanduser(djvu_path) djvu_path = os.path.abspath(djvu_path) if not os.path.exists(djvu_path): pywikibot.error('No such file or directory: %s' % djvu_path) return False if os.path.isdir(djvu_path): djvu_path = os.path.join(djvu_path, index) # Check the djvu file exists and, if so, create the DjVuFile wrapper. djvu = DjVuFile(djvu_path) if not djvu.has_text(): pywikibot.error('No text layer in djvu file %s' % djvu.file_djvu) return False # Parse pages param. pages = pages.split(',') for interval in range(len(pages)): start, sep, end = pages[interval].partition('-') start = 1 if not start else int(start) if not sep: end = start else: end = int(end) if end else djvu.number_of_images() pages[interval] = (start, end) site = pywikibot.Site() if not site.has_extension('ProofreadPage'): pywikibot.error('Site %s must have ProofreadPage extension.' % site) return False index_page = pywikibot.Page(site, index, ns=site.proofread_index_ns) if not index_page.exists(): raise pywikibot.NoPage(index) pywikibot.output('uploading text from %s to %s' % (djvu.file_djvu, index_page.title(asLink=True))) bot = DjVuTextBot(djvu, index_page, pages, **options) bot.run()
# # Distributed under the terms of the MIT license. # from __future__ import absolute_import, division, unicode_literals import re import sys import pywikibot from pywikibot import i18n try: import pycountry except ImportError: pywikibot.error('This script requires the python-pycountry module') pywikibot.error('See: https://pypi.org/project/pycountry') pywikibot.exception() sys.exit(1) class StatesRedirectBot(pywikibot.Bot): """Bot class used for implementation of re-direction norms.""" def __init__(self, start, force): """Initializer. Parameters: @param start:xxx Specify the place in the alphabet to start searching. @param force: Don't ask whether to create pages, just create them.
gens = [ pagegenerators.ReferringPageGenerator(t, onlyTemplateInclusion=True) for t in oldTemplates ] gen = pagegenerators.CombinedPageGenerator(gens) gen = pagegenerators.DuplicateFilterPageGenerator(gen) if user: gen = pagegenerators.UserEditFilterGenerator(gen, user, timestamp, skip, max_revision_depth=100, show_filtered=True) if not genFactory.gens: # make sure that proper namespace filtering etc. is handled gen = genFactory.getCombinedGenerator(gen) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = TemplateRobot(preloadingGen, templates, **options) bot.run() if __name__ == "__main__": try: main() except Exception: pywikibot.error("Fatal error:", exc_info=True)
def replacements(self): if self.can_load: yield (FULL_ARTICLE_REGEX, self.replace) else: pywikibot.error('Cannot run SectionsFix when mwparserfromhell ' 'is not installed')
text = rule.apply(text, replaced) page.text = text count = len(replaced) if count > 0: # todo: separate function if count > 1: max_typos = self.maxsummarytypos summary = 'oprava překlepů: %s' % ', '.join( replaced[:max_typos]) if count > max_typos: if count - max_typos > 1: summary += ' a %s dalších' % (count - max_typos) else: summary += ' a jednoho dalšího' else: summary = 'oprava překlepu: %s' % replaced[0] summaries.append(summary) lazy_fixes = { fix.key: fix for fix in (CategoriesFix, CheckWikiFix, FilesFix, RedirectFix, RedirectsFromFileFix, RefSortFix, SectionsFix, TemplateFix, TypoFix) } all_fixes = {fix.key: fix for fix in (AdataFix, InterwikiFix, StyleFix)} all_fixes.update(lazy_fixes) if __name__ == '__main__': pywikibot.error('Run wikitext.py instead')
def test_error(self): pywikibot.error('error') self.assertEqual(newstdout.getvalue(), '') self.assertEqual(newstderr.getvalue(), 'ERROR: error\n')
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ template_title = None # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) gen = pg.GeneratorFactory() current_args = [] fields = {} options = {} for arg in local_args: if arg.startswith('-template'): if len(arg) == 9: template_title = pywikibot.input( 'Please enter the template to work on:') else: template_title = arg[10:] elif arg.startswith('-create'): options['create'] = True elif gen.handleArg(arg): if arg.startswith('-transcludes:'): template_title = arg[13:] else: optional = arg.startswith('-') complete = len(current_args) == 3 if optional: needs_second = len(current_args) == 1 if needs_second: break # will stop below arg, sep, value = arg[1:].partition(':') if len(current_args) == 0: assert not fields options[arg] = value or True else: assert complete current_args[2][arg] = value or True else: if complete: handler = PropertyOptionHandler(**current_args[2]) fields[current_args[0]] = (current_args[1], handler) del current_args[:] current_args.append(arg) if len(current_args) == 2: current_args.append({}) # handle leftover if len(current_args) == 3: handler = PropertyOptionHandler(**current_args[2]) fields[current_args[0]] = (current_args[1], handler) elif len(current_args) == 1: pywikibot.error('Incomplete command line param-property pair.') return False if not template_title: pywikibot.error( 'Please specify either -template or -transcludes argument') return generator = gen.getCombinedGenerator(preload=True) if not generator: gen.handleArg('-transcludes:' + template_title) generator = gen.getCombinedGenerator(preload=True) bot = HarvestRobot(generator, template_title, fields, **options) bot.run()
def process_filename(self, file_url=None): """Return base filename portion of file_url.""" if not file_url: file_url = self.url pywikibot.warning('file_url is not given. ' 'Set to self.url by default.') always = self.getOption('always') # Isolate the pure name filename = file_url # Filename may be either a URL or a local file path if '://' in filename: # extract the path portion of the URL filename = urlparse(filename).path filename = os.path.basename(filename) if self.use_filename: filename = self.use_filename if self.filename_prefix: filename = self.filename_prefix + filename if not self.keep_filename: pywikibot.output( 'The filename on the target wiki will default to: %s' % filename) assert not always newfn = pywikibot.input( 'Enter a better name, or press enter to accept:') if newfn != '': filename = newfn # FIXME: these 2 belong somewhere else, presumably in family # forbidden characters are handled by pywikibot/page.py forbidden = ':*?/\\' # to be extended try: allowed_formats = self.target_site.siteinfo.get('fileextensions', get_default=False) except KeyError: allowed_formats = [] else: allowed_formats = [item['ext'] for item in allowed_formats] # ask until it's valid first_check = True while True: if not first_check: if always: filename = None else: filename = pywikibot.input('Enter a better name, or press ' 'enter to skip the file:') if not filename: return None first_check = False ext = os.path.splitext(filename)[1].lower().strip('.') # are any chars in forbidden also in filename? invalid = set(forbidden) & set(filename) if invalid: c = ''.join(invalid) pywikibot.output('Invalid character(s): %s. Please try again' % c) continue if allowed_formats and ext not in allowed_formats: if always: pywikibot.output('File format is not one of ' '[{0}]'.format(' '.join(allowed_formats))) continue elif not pywikibot.input_yn( 'File format is not one of [%s], but %s. Continue?' % (' '.join(allowed_formats), ext), default=False, automatic_quit=False): continue potential_file_page = pywikibot.FilePage(self.target_site, filename) if potential_file_page.exists(): overwrite = self._handle_warning('exists') if overwrite is False: pywikibot.output( 'File exists and you asked to abort. Skipping.') return None if potential_file_page.has_permission(): if overwrite is None: overwrite = not pywikibot.input_yn( 'File with name %s already exists. ' 'Would you like to change the name? ' '(Otherwise file will be overwritten.)' % filename, default=True, automatic_quit=False) if not overwrite: continue else: break else: pywikibot.output('File with name %s already exists and ' 'cannot be overwritten.' % filename) continue else: try: if potential_file_page.fileIsShared(): pywikibot.output( 'File with name %s already exists in shared ' 'repository and cannot be overwritten.' % filename) continue else: break except pywikibot.NoPage: break # A proper description for the submission. # Empty descriptions are not accepted. if self.description: pywikibot.output('The suggested description is:\n%s' % self.description) while not self.description or self.verify_description: if not self.description: pywikibot.output( color_format( '{lightred}It is not possible to upload a file ' 'without a description.{default}')) assert not always # if no description, ask if user want to add one or quit, # and loop until one is filled. # if self.verify_description, ask if user want to change it # or continue. if self.description: question = 'Do you want to change this description?' else: question = 'No description was given. Add one?' if pywikibot.input_yn(question, default=not self.description, automatic_quit=self.description): from pywikibot import editor as editarticle editor = editarticle.TextEditor() try: new_description = editor.edit(self.description) except ImportError: raise except Exception as e: pywikibot.error(e) continue # if user saved / didn't press Cancel if new_description: self.description = new_description elif not self.description: raise QuitKeyboardInterrupt self.verify_description = False return filename
start = 1 if not start else int(start) if not sep: end = start else: end = int(end) if end else djvu.number_of_images() pages[i] = (start, end) site = pywikibot.Site() if not site.has_extension('ProofreadPage'): pywikibot.error( 'Site {} must have ProofreadPage extension.'.format(site)) return index_page = pywikibot.Page(site, index, ns=site.proofread_index_ns) if not index_page.exists(): raise NoPageError(index) pywikibot.output('uploading text from {} to {}'.format( djvu.file, index_page.title(as_link=True))) bot = DjVuTextBot(djvu, index_page, pages=pages, site=site, **options) bot.run() if __name__ == '__main__': try: main() except Exception: pywikibot.error('Fatal error:', exc_info=True)
def _ocr_callback(self, cmd_uri, parser_func=None, ocr_tool=None): """OCR callback function. :return: tuple (error, text [error description in case of error]). """ def identity(x): return x if not cmd_uri: raise ValueError('Parameter cmd_uri is mandatory.') if parser_func is None: parser_func = identity if not callable(parser_func): raise TypeError('Keyword parser_func must be callable.') if ocr_tool not in self._OCR_METHODS: raise TypeError("ocr_tool must be in {}, not '{}'.".format( self._OCR_METHODS, ocr_tool)) # wrong link fail with Exceptions for retry in range(5, 30, 5): pywikibot.debug('{}: get URI {!r}'.format(ocr_tool, cmd_uri), _logger) try: response = http.fetch(cmd_uri) except ReadTimeout as e: pywikibot.warning('ReadTimeout {}: {}'.format(cmd_uri, e)) except Exception as e: pywikibot.error('"{}": {}'.format(cmd_uri, e)) return True, e else: pywikibot.debug('{}: {}'.format(ocr_tool, response.text), _logger) break pywikibot.warning('retrying in {} seconds ...'.format(retry)) time.sleep(retry) else: return True, ReadTimeout if HTTPStatus.BAD_REQUEST <= response.status_code < 600: return True, 'Http response status {}'.format(response.status_code) data = json.loads(response.text) if ocr_tool == self._PHETOOLS: # phetools assert 'error' in data, 'Error from phetools: {}'.format(data) assert data['error'] in [0, 1, 2, 3], \ 'Error from phetools: {}'.format(data) error, _text = bool(data['error']), data['text'] else: # googleOCR if 'error' in data: error, _text = True, data['error'] else: error, _text = False, data['text'] if error: pywikibot.error('OCR query {}: {}'.format(cmd_uri, _text)) return error, _text return error, parser_func(_text)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ options = {} gen = None # summary message edit_summary = '' # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a separate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False sql_query = None # Set the default regular expression flags flags = 0 # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() local_args = genFactory.handle_args(local_args) for arg in local_args: if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( 'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith('-mysqlquery'): useSql = True sql_query = arg.partition(':')[2] elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): options['sleep'] = float(arg[7:]) elif arg in ('-always', '-recursive', '-allowoverlap'): options[arg[1:]] = True elif arg == '-nocase': flags |= re.IGNORECASE elif arg == '-dotall': flags |= re.DOTALL elif arg == '-multiline': flags |= re.MULTILINE elif arg.startswith('-addcat:'): options['addcat'] = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-automaticsummary'): edit_summary = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-pairsfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-pairsfile': replacement_file = pywikibot.input( 'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-pairsfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return if replacement_file_arg_misplaced: pywikibot.error('-pairsfile used between a pattern replacement pair.') return if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except OSError as e: pywikibot.error('Error loading {0}: {1}'.format( replacement_file, e)) return if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return # Strip BOM from first line file_replacements[0].lstrip('\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input('Please enter the text that should be replaced:') while old: new = pywikibot.input('Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') # The summary stored here won't be actually used but is only an example single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', { 'description': ' (-{0} +{1})'.format(replacement.old, replacement.new) }) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary generators_given = bool(genFactory.gens) for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output('Available predefined fixes are: {0}'.format( ', '.join(fixes.fixes.keys()))) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if not fix['replacements']: pywikibot.warning('No replacements defined for fix ' '"{0}"'.format(fix_name)) continue if 'msg' in fix: if isinstance(fix['msg'], str): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] if isinstance(gen_args, str): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handle_arg(gen_arg) replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary, name=fix_name) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix['replacements'], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append('"{0}" (replacement #{1})'.format( fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacement_set.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Exceptions specified via 'fix' shall be merged to those via CLI. if replacement_set: replacements.extend(replacement_set) if replacement_set._exceptions is not None: for k, v in replacement_set._exceptions.items(): if k in exceptions: exceptions[k] = list(set(exceptions[k]) | set(v)) else: exceptions[k] = v if len(fix['replacements']) == len(missing_fix_summaries): missing_fixes_summaries.append( '"{0}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if ((not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary)): if single_summary: pywikibot.output('The summary message for the command line ' 'replacements will be something like: ' + single_summary) if missing_fixes_summaries: pywikibot.output('The summary will not be used when the fix has ' 'one defined but the following fix(es) do(es) ' 'not have a summary defined: ' '{0}'.format(', '.join(missing_fixes_summaries))) if edit_summary is not True: edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') else: edit_summary = '' # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: if not sql_query: whereClause = 'WHERE (%s)' % ' OR '.join( "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions) else: exceptClause = '' query = sql_query or """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen, preload=True) if not gen: pywikibot.bot.suggest_help(missing_generator=True) return bot = ReplaceRobot(gen, replacements, exceptions, site=site, summary=edit_summary, **options) site.login() bot.run() # Explicitly call pywikibot.stopme(). It will make sure the callback is # triggered before replace.py is unloaded. pywikibot.stopme()
def treat_page_and_item(self, page, item): """Process a single page/item.""" if willstop: raise KeyboardInterrupt templates = page.raw_extracted_templates for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) except pywikibot.exceptions.InvalidTitle: pywikibot.error( "Failed parsing template; '%s' should be the template name." % template) continue if template not in self.templateTitles: continue # We found the template we were looking for for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue if field not in self.fields: continue # This field contains something useful for us prop, options = self.fields[field] claim = pywikibot.Claim(self.repo, prop) if claim.type == 'wikibase-item': # Try to extract a valid page match = pywikibot.link_regex.search(value) if match: link_text = match.group(1) else: if self._get_option_with_fallback(options, 'islink'): link_text = value else: pywikibot.output( '%s field %s value %s is not a wikilink. ' 'Skipping.' % (claim.getID(), field, value)) continue linked_item = self._template_link_target(item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type in ('string', 'external-id'): claim.setTarget(value.strip()) elif claim.type == 'url': match = self.linkR.search(value) if not match: continue claim.setTarget(match.group('url')) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site('commons', 'commons') imagelink = pywikibot.Link(value, source=commonssite, defaultNamespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output( "{0} doesn't exist. I can't link to it" ''.format(image.title(asLink=True))) continue claim.setTarget(image) else: pywikibot.output('%s is not a supported datatype.' % claim.type) continue # A generator might yield pages from multiple sites self.user_add_claim_unless_exists( item, claim, self._get_option_with_fallback(options, 'exists'), page.site, pywikibot.output)
def handle_bad_page(self, *values): """Process one bad page.""" try: self.content = self.page.get() except IsRedirectPageError: pywikibot.output('Already redirected, skipping.') return except NoPageError: pywikibot.output('Already deleted') return for d in pywikibot.translate(self.site.code, done): if d in self.content: pywikibot.output( 'Found: "{}" in content, nothing necessary'.format(d)) return pywikibot.output('---- Start content ----------------') pywikibot.output(self.content) pywikibot.output('---- End of content ---------------') # Loop other user answer answered = False while not answered: answer = pywikibot.input(self.question) if answer == 'q': raise QuitKeyboardInterrupt if answer == 'd': pywikibot.output('Trying to delete page [[{}]].'.format( self.page.title())) self.page.delete() return if answer == 'e': old = self.content new = editor.TextEditor().edit(old) msg = pywikibot.input('Summary message:') self.userPut(self.page, old, new, summary=msg) return if answer == 'b': pywikibot.output('Blanking page [[{}]].'.format( self.page.title())) try: self.page.put('', summary=i18n.twtranslate( self.site.lang, 'followlive-blanking', {'content': self.content})) except EditConflictError: pywikibot.output( 'An edit conflict occurred! Automatically retrying') self.handle_bad_page(self) return if answer == '': pywikibot.output('Page correct! Proceeding with next pages.') return # Check user input: if answer[0] == 'u': # Answer entered as string answer = answer[1:] try: choices = answer.split(',') except ValueError: # User entered wrong value pywikibot.error('"{}" is not valid'.format(answer)) continue # test input for choice in choices: try: x = int(choice) except ValueError: break else: answered = (x >= 1 and x <= len(self.questionlist)) if not answered: pywikibot.error('"{}" is not valid'.format(answer)) continue summary = '' for choice in choices: answer = int(choice) # grab the template parameters tpl = pywikibot.translate(self.site, templates)[self.questionlist[answer]] if tpl['pos'] == 'top': pywikibot.output('prepending {}...'.format( self.questionlist[answer])) self.content = self.questionlist[answer] + '\n' + self.content elif tpl['pos'] == 'bottom': pywikibot.output('appending {}...'.format( self.questionlist[answer])) self.content += '\n' + self.questionlist[answer] else: raise RuntimeError( '"pos" should be "top" or "bottom" for template {}. ' 'Contact a developer.'.format(self.questionlist[answer])) summary += tpl['msg'] + ' ' pywikibot.output('Probably added ' + self.questionlist[answer]) self.page.put(self.content, summary=summary) pywikibot.output('with comment {}\n'.format(summary))
def _closed_error(self, notice=''): """An error instead of pointless API call.""" pywikibot.error('Site {} has been closed. {}'.format( self.sitename, notice))
def main(): enablePage = None # Check if someone set an enablePage or not limit = 50000 # Hope that there aren't so many lonely pages in a project generator = None # Check if bot should use default generator or not nwpages = False # Check variable for newpages always = False # Check variable for always disambigPage = None # If no disambigPage given, not use it. # Arguments! local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-enable'): if len(arg) == 7: enablePage = pywikibot.input( u'Would you like to check if the bot should run or not?') else: enablePage = arg[8:] if arg.startswith('-disambig'): if len(arg) == 9: disambigPage = pywikibot.input( u'In which page should the bot save the disambig pages?') else: disambigPage = arg[10:] elif arg.startswith('-limit'): if len(arg) == 6: limit = int(pywikibot.input( u'How many pages do you want to check?')) else: limit = int(arg[7:]) elif arg.startswith('-newpages'): if len(arg) == 9: nwlimit = 50 # Default: 50 pages else: nwlimit = int(arg[10:]) generator = pywikibot.Site().newpages(number=nwlimit) nwpages = True elif arg == '-always': always = True else: genFactory.handleArg(arg) # Retrive the site wikiSite = pywikibot.Site() if not generator: generator = genFactory.getCombinedGenerator() # If the generator is not given, use the default one if not generator: generator = wikiSite.lonelypages(repeat=True, number=limit) # Take the configurations according to our project comment = i18n.twtranslate(wikiSite, 'lonelypages-comment-add-template') commentdisambig = i18n.twtranslate(wikiSite, 'lonelypages-comment-add-disambig-template') template = i18n.translate(wikiSite, Template) exception = i18n.translate(wikiSite, exception_regex) if template is None or exception is None: raise Exception("Missing configuration for site %r" % wikiSite) # EnablePage part if enablePage is not None: # Define the Page Object enable = pywikibot.Page(wikiSite, enablePage) # Loading the page's data try: getenable = enable.text except pywikibot.NoPage: pywikibot.output( u"%s doesn't esist, I use the page as if it was blank!" % enable.title()) getenable = '' except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % enable.title()) getenable = '' # If the enable page is set to disable, turn off the bot # (useful when the bot is run on a server) if getenable != 'enable': pywikibot.output('The bot is disabled') return # DisambigPage part if disambigPage is not None: disambigpage = pywikibot.Page(wikiSite, disambigPage) try: disambigtext = disambigpage.get() except pywikibot.NoPage: pywikibot.output(u"%s doesn't esist, skip!" % disambigpage.title()) disambigtext = '' except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, don't use it!" % disambigpage.title()) disambigPage = None # Main Loop for page in generator: if nwpages: # The newpages generator returns a tuple, not a Page object. page = page[0] pywikibot.output(u"Checking %s..." % page.title()) if page.isRedirectPage(): # If redirect, skip! pywikibot.output(u'%s is a redirect! Skip...' % page.title()) continue # refs is not a list, it's a generator while resList... is a list, yes. refs = page.getReferences() refsList = list() for j in refs: if j is None: # We have to find out why the function returns that value pywikibot.error(u'1 --> Skip page') continue refsList.append(j) # This isn't possible with a generator if refsList != []: pywikibot.output(u"%s isn't orphan! Skip..." % page.title()) continue # Never understood how a list can turn in "None", but it happened :-S elif refsList is None: # We have to find out why the function returns that value pywikibot.error(u'2 --> Skip page') continue else: # no refs, no redirect; check if there's already the template try: oldtxt = page.get() except pywikibot.NoPage: pywikibot.output(u"%s doesn't exist! Skip..." % page.title()) continue except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect! Skip..." % page.title()) continue # I've used a loop in a loop. If I use continue in the second loop, # it won't do anything in the first. So let's create a variable to # avoid this problem. for regexp in exception: res = re.findall(regexp, oldtxt.lower()) # Found a template! Let's skip the page! if res != []: pywikibot.output( u'Your regex has found something in %s, skipping...' % page.title()) break else: continue if page.isDisambig() and disambigPage is not None: pywikibot.output(u'%s is a disambig page, report..' % page.title()) if not page.title().lower() in disambigtext.lower(): disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title()) disambigpage.put(disambigtext, commentdisambig) continue # Is the page a disambig but there's not disambigPage? Skip! elif page.isDisambig(): pywikibot.output(u'%s is a disambig page, skip...' % page.title()) continue else: # Ok, the page need the template. Let's put it there! # Adding the template in the text newtxt = u"%s\n%s" % (template, oldtxt) pywikibot.output(u"\t\t>>> %s <<<" % page.title()) pywikibot.showDiff(oldtxt, newtxt) choice = 'y' if not always: choice = pywikibot.inputChoice( u'Orphan page found, add template?', ['Yes', 'No', 'All'], 'yna') if choice == 'a': always = True choice = 'y' if choice == 'y': page.text = newtxt try: page.save(comment) except pywikibot.EditConflict: pywikibot.output(u'Edit Conflict! Skip...') continue
def main(*args: Tuple[str, ...]): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ index = None djvu_path = '.' # default djvu file directory pages = '1-' options = {} # Parse command line arguments. local_args = pywikibot.handle_args(args) for arg in local_args: opt, _, value = arg.partition(':') if opt == '-index': index = value elif opt == '-djvu': djvu_path = value elif opt == '-pages': pages = value elif opt == '-summary': options['summary'] = value elif opt in ('-force', '-always'): options[opt[1:]] = True else: pywikibot.output('Unknown argument ' + arg) # index is mandatory. if not index: pywikibot.bot.suggest_help(missing_parameters=['-index']) return # If djvu_path is not a file, build djvu_path from dir+index. djvu_path = os.path.expanduser(djvu_path) djvu_path = os.path.abspath(djvu_path) if not os.path.exists(djvu_path): pywikibot.error('No such file or directory: ' + djvu_path) return if os.path.isdir(djvu_path): djvu_path = os.path.join(djvu_path, index) # Check the djvu file exists and, if so, create the DjVuFile wrapper. djvu = DjVuFile(djvu_path) if not djvu.has_text(): pywikibot.error('No text layer in djvu file {}'.format(djvu.file)) return # Parse pages param. pages = pages.split(',') for i, page in enumerate(pages): start, sep, end = page.partition('-') start = 1 if not start else int(start) if not sep: end = start else: end = int(end) if end else djvu.number_of_images() pages[i] = (start, end) site = pywikibot.Site() if not site.has_extension('ProofreadPage'): pywikibot.error( 'Site {} must have ProofreadPage extension.'.format(site)) return index_page = pywikibot.Page(site, index, ns=site.proofread_index_ns) if not index_page.exists(): raise NoPageError(index) pywikibot.output('uploading text from {} to {}'.format( djvu.file, index_page.title(as_link=True))) bot = DjVuTextBot(djvu, index_page, pages=pages, site=site, **options) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ url = '' description = [] summary = None keep_filename = False always = False use_filename = None filename_prefix = None verify_description = True aborts = set() ignorewarn = set() chunk_size = 0 recursive = False description_file = None # process all global bot args # returns a list of non-global args, i.e. args for upload.py local_args = pywikibot.handle_args(args) for option in local_args: arg, _, value = option.partition(':') if arg == '-always': keep_filename = True always = True verify_description = False elif arg == '-recursive': recursive = True elif arg == '-keep': keep_filename = True elif arg == '-filename': use_filename = value elif arg == '-prefix': filename_prefix = value elif arg == '-summary': summary = value elif arg == '-noverify': verify_description = False elif arg == '-abortonwarn': if value and aborts is not True: aborts.add(value) else: aborts = True elif arg == '-ignorewarn': if value and ignorewarn is not True: ignorewarn.add(value) else: ignorewarn = True elif arg == '-chunked': match = CHUNK_SIZE_REGEX.match(option) chunk_size = get_chunk_size(match) elif arg == '-descfile': description_file = value elif not url: url = option else: description.append(option) description = ' '.join(description) if description_file: if description: pywikibot.error('Both a description and a -descfile were ' 'provided. Please specify only one of those.') return False with codecs.open(description_file, encoding=pywikibot.config.textfile_encoding) as f: description = f.read().replace('\r\n', '\n') while not ('://' in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break else: pywikibot.output(error) url = pywikibot.input('URL, file or directory where files are now:') if always and (aborts is not True and ignorewarn is not True or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return False if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: file_list.append(os.path.join(directory_info[0], dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, useFilename=use_filename, keepFilename=keep_filename, verifyDescription=verify_description, aborts=aborts, ignoreWarning=ignorewarn, chunk_size=chunk_size, always=always, summary=summary, filename_prefix=filename_prefix) bot.run()
def review_hunks(self) -> None: """Review hunks.""" def find_pending(start: int, end: int) -> Optional[int]: step = -1 if start > end else +1 for pending in range(start, end, step): if super_hunks[pending].reviewed == Hunk.PENDING: return pending return None # TODO: Missing commands (compared to git --patch): edit and search help_msg = { 'y': 'accept this hunk', 'n': 'do not accept this hunk', 'q': 'do not accept this hunk and quit reviewing', 'a': 'accept this hunk and all other pending', 'd': 'do not apply this hunk or any of the later hunks in ' 'the file', 'g': 'select a hunk to go to', 'j': 'leave this hunk undecided, see next undecided hunk', 'J': 'leave this hunk undecided, see next hunk', 'k': 'leave this hunk undecided, see previous undecided ' 'hunk', 'K': 'leave this hunk undecided, see previous hunk', 's': 'split this hunk into smaller ones', '?': 'help', } super_hunks = self._generate_super_hunks(h for h in self.hunks if h.reviewed == Hunk.PENDING) position = 0 # type: Optional[int] while any( any(hunk.reviewed == Hunk.PENDING for hunk in super_hunk) for super_hunk in super_hunks): assert position is not None super_hunk = super_hunks[position] next_pending = find_pending(position + 1, len(super_hunks)) prev_pending = find_pending(position - 1, -1) answers = ['y', 'n', 'q', 'a', 'd', 'g'] if next_pending is not None: answers += ['j'] if position < len(super_hunks) - 1: answers += ['J'] if prev_pending is not None: answers += ['k'] if position > 0: answers += ['K'] if len(super_hunk) > 1: answers += ['s'] answers += ['?'] pywikibot.output(self._generate_diff(super_hunk)) choice = pywikibot.input('Accept this hunk [{}]?'.format( ','.join(answers))) if choice not in answers: choice = '?' if choice in ['y', 'n']: super_hunk.reviewed = \ Hunk.APPR if choice == 'y' else Hunk.NOT_APPR if next_pending is not None: position = next_pending else: position = find_pending(0, position) elif choice == 'q': for super_hunk in super_hunks: for hunk in super_hunk: if hunk.reviewed == Hunk.PENDING: hunk.reviewed = Hunk.NOT_APPR elif choice in ['a', 'd']: for super_hunk in super_hunks[position:]: for hunk in super_hunk: if hunk.reviewed == Hunk.PENDING: hunk.reviewed = \ Hunk.APPR if choice == 'a' else Hunk.NOT_APPR position = find_pending(0, position) elif choice == 'g': hunk_list = [] rng_width = 18 for index, super_hunk in enumerate(super_hunks, start=1): assert -1 <= super_hunk.reviewed <= 1, \ "The super hunk's review status is unknown." status = ' +-'[super_hunk.reviewed] if super_hunk[0].a_rng[1] - super_hunk[0].a_rng[0] > 0: mode = '-' first = self.a[super_hunk[0].a_rng[0]] else: mode = '+' first = self.b[super_hunk[0].b_rng[0]] hunk_list += [(status, index, Hunk.get_header_text( *self._get_context_range(super_hunk), affix=''), mode, first)] rng_width = max(len(hunk_list[-1][2]), rng_width) line_template = ('{0}{1} {2: >' + str(int(math.log10(len(super_hunks)) + 1)) + '}: {3: <' + str(rng_width) + '} {4}{5}') # the last entry is the first changed line which usually ends # with a \n (only the last may not, which is covered by the # if-condition following this block) hunk_list_str = ''.join( line_template.format( '*' if hunk_entry[1] == position + 1 else ' ', *hunk_entry) for hunk_entry in hunk_list) if hunk_list_str.endswith('\n'): hunk_list_str = hunk_list_str[:-1] pywikibot.output(hunk_list_str) next_hunk = pywikibot.input('Go to which hunk?') try: next_hunk_position = int(next_hunk) - 1 except ValueError: next_hunk_position = False if (next_hunk_position is not False and 0 <= next_hunk_position < len(super_hunks)): position = next_hunk_position elif next_hunk: # nothing entered is silently ignored pywikibot.error( 'Invalid hunk number "{}"'.format(next_hunk)) elif choice == 'j': assert next_pending is not None position = next_pending elif choice == 'J': position += 1 elif choice == 'k': assert prev_pending is not None position = prev_pending elif choice == 'K': position -= 1 elif choice == 's': super_hunks = (super_hunks[:position] + super_hunks[position].split() + super_hunks[position + 1:]) pywikibot.output('Split into {} hunks'.format( len(super_hunk._hunks))) else: # choice == '?': pywikibot.output( color_format( '{purple}{0}{default}', '\n'.join('{0} -> {1}'.format(answer, help_msg[answer]) for answer in answers)))
(page.title(asLink=True), targetpage.title(asLink=True))) pywikibot.log("Getting page text.") text = page.get(get_redirect=True) text += ("<noinclude>\n\n<small>This page was moved from %s. It's " "edit history can be viewed at %s</small></noinclude>" % (page.title(asLink=True, insite=targetpage.site), edithistpage.title(asLink=True, insite=targetpage.site))) pywikibot.log("Getting edit history.") historytable = page.getVersionHistoryTable() pywikibot.log("Putting page text.") targetpage.put(text, summary=summary) pywikibot.log("Putting edit history.") edithistpage.put(historytable, summary=summary) if __name__ == "__main__": try: main() except TargetSiteMissing as e: pywikibot.error(u'Need to specify a target site and/or language') pywikibot.error(u'Try running this script with -help for help/usage') pywikibot.exception() except TargetPagesMissing as e: pywikibot.error(u'Need to specify a page range') pywikibot.error(u'Try running this script with -help for help/usage') pywikibot.exception()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ filename = None pagename = None namespace = None salt = '' force = False calc = None args = [] def if_arg_value(arg, name): if arg.startswith(name): yield arg[len(name) + 1:] for arg in pywikibot.handle_args(args): for v in if_arg_value(arg, '-file'): filename = v for v in if_arg_value(arg, '-locale'): # Required for english month names locale.setlocale(locale.LC_TIME, v.encode('utf8')) for v in if_arg_value(arg, '-timezone'): os.environ['TZ'] = v.timezone # Or use the preset value if hasattr(time, 'tzset'): time.tzset() for v in if_arg_value(arg, '-calc'): calc = v for v in if_arg_value(arg, '-salt'): salt = v for v in if_arg_value(arg, '-force'): force = True for v in if_arg_value(arg, '-filename'): filename = v for v in if_arg_value(arg, '-page'): pagename = v for v in if_arg_value(arg, '-namespace'): namespace = v if not arg.startswith('-'): args.append(arg) site = pywikibot.Site() if calc: if not salt: pywikibot.bot.suggest_help(missing_parameters=['-salt']) return False page = pywikibot.Page(site, calc) if page.exists(): calc = page.title() else: pywikibot.output( u'NOTE: the specified page "%s" does not (yet) exist.' % calc) pywikibot.output('key = %s' % calc_md5_hexdigest(calc, salt)) return if not args: pywikibot.bot.suggest_help( additional_text='No template was specified.') return False for a in args: pagelist = [] a = pywikibot.Page(site, a, ns=10).title() if not filename and not pagename: if namespace is not None: ns = [str(namespace)] else: ns = [] for pg in generate_transclusions(site, a, ns): pagelist.append(pg) if filename: for pg in open(filename, 'r').readlines(): pagelist.append(pywikibot.Page(site, pg, ns=10)) if pagename: pagelist.append(pywikibot.Page(site, pagename, ns=3)) pagelist = sorted(pagelist) for pg in iter(pagelist): pywikibot.output(u'Processing %s' % pg) # Catching exceptions, so that errors in one page do not bail out # the entire process try: archiver = PageArchiver(pg, a, salt, force) archiver.run() except ArchiveBotSiteConfigError as e: # no stack trace for errors originated by pages on-site pywikibot.error( 'Missing or malformed template in page %s: %s' % (pg, e)) except Exception: pywikibot.error(u'Error occurred while processing page %s' % pg) pywikibot.exception(tb=True)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # If none, the var is setted only for check purpose. summary = None addText = None regexSkip = None regexSkipUrl = None always = False textfile = None talkPage = False reorderEnabled = True # Put the text above or below the text? up = False # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() # Loading the arguments for arg in local_args: if arg.startswith('-textfile'): if len(arg) == 9: textfile = pywikibot.input( u'Which textfile do you want to add?') else: textfile = arg[10:] elif arg.startswith('-text'): if len(arg) == 5: addText = pywikibot.input(u'What text do you want to add?') else: addText = arg[6:] elif arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-excepturl'): if len(arg) == 10: regexSkipUrl = pywikibot.input(u'What text should I skip?') else: regexSkipUrl = arg[11:] elif arg.startswith('-except'): if len(arg) == 7: regexSkip = pywikibot.input(u'What text should I skip?') else: regexSkip = arg[8:] elif arg == '-up': up = True elif arg == '-noreorder': reorderEnabled = False elif arg == '-always': always = True elif arg == '-talk' or arg == '-talkpage': talkPage = True else: genFactory.handleArg(arg) if textfile and not addText: with codecs.open(textfile, 'r', config.textfile_encoding) as f: addText = f.read() generator = genFactory.getCombinedGenerator() if not generator: pywikibot.bot.suggest_help(missing_generator=True) return False if not addText: pywikibot.error("The text to add wasn't given.") return if talkPage: generator = pagegenerators.PageWithTalkPageGenerator(generator, True) for page in generator: (text, newtext, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True, reorderEnabled=reorderEnabled, create=talkPage)