def read_replace_log(self): # FIXME: Make sqlite3 compatible insert = """INSERT INTO %s (timestamp, old_image, new_image, status, user, comment) VALUES (%%s, %%s, %%s, 'pending', %%s, %%s)""" % self.config['replacer_table'] page = wikipedia.Page(self.site, self.config['command_page']) # Get last revision date if self.cursor.execute("""SELECT timestamp FROM %s ORDER BY timestamp DESC LIMIT 1""" % \ self.config['replacer_table']): since = mw_timestamp(self.cursor.fetchone()[0]) else: since = None try: revisions = page.fullVersionHistory(max = 500, since = since) # Fetch the page any way, to prevent editconflicts old_text = text = page.get() except StandardError, e: # Network error, not critical output(u'Warning! Unable to read replacement log.', False) output('%s: %s' % (e.__class__.__name__, str(e)), False) return time.sleep(self.config['timeout'])
def do(self, args): try: self.report(args) except Exception, e: output(u'A critical error during reporting has occured!') output('%s: %s' % (e.__class__.__name__, str(e)), False) traceback.print_exc(file = sys.stderr) self.exit() os.kill(0, signal.SIGTERM)
def do(self, args): try: self.report(args) except Exception, e: output(u'A critical error during reporting has occured!', False) output('%s: %s' % (e.__class__.__name__, str(e)), False) traceback.print_exc(file = sys.stderr) sys.stderr.flush() self.exit() os.kill(0, signal.SIGTERM)
def examine_revision_history(self, revisions, replacement, username): """ Find out who is to blame for a replacement """ for revision in revisions: if replacement.group(0) in revision['*']: db_time = db_timestamp(revision['timestamp']) if db_time < self.first_revision or not self.first_revision: self.first_revision = int(db_time) return (db_time, strip_image(replacement.group(1)), strip_image(replacement.group(2)), revision['user'], replacement.group(3)) output('Warning! Could not find out who did %s' % \ repr(replacement.group(0)), False) return
def examine_revision_history(self, revisions, replacement, username): if replacement.group(0) in revisions[0][2]: return (db_timestamp(revisions[0][0]), strip_image(replacement.group(1)), strip_image(replacement.group(2)), '<Unknown>', replacement.group(3)) for timestamp, user, text in revisions[1:]: if replacement.group(0) in text and user != username: return (db_timestamp(timestamp), strip_image(replacement.group(1)), strip_image(replacement.group(2)), user, replacement.group(3)) output('Warning! Could not find out who did %s' % \ repr(replacement.group(0)), False) return
def examine_revision_history(self, revisions, replacement, username): #if replacement.group(0) in revisions[0][2]: # return (db_timestamp(revisions[0][0]), # strip_image(replacement.group(1)), # strip_image(replacement.group(2)), # '<Unknown>', replacement.group(3)) for timestamp, user, text in revisions: if replacement.group(0) in text and user != username: db_time = db_timestamp(timestamp) if db_time < self.first_revision or not self.first_revision: self.first_revision = int(db_time) return (db_time, strip_image(replacement.group(1)), strip_image(replacement.group(2)), user, replacement.group(3)) output('Warning! Could not find out who did %s' % \ repr(replacement.group(0)), False) return
def read_replace_log(self): """ The actual worker method """ # FIXME: Make sqlite3 compatible insert = """INSERT INTO %s (timestamp, old_image, new_image, status, user, comment) VALUES (%%s, %%s, %%s, 'pending', %%s, %%s)""" % self.config['replacer_table'] page = wikipedia.Page(self.site, self.config['command_page']) # Get last revision date if self.cursor.execute("""SELECT timestamp FROM %s ORDER BY timestamp DESC LIMIT 1""" % \ self.config['replacer_table']): since = mw_timestamp(self.cursor.fetchone()[0]) else: since = None if self.config.get('clean_list', False): username = config.sysopnames[self.site.family.name][self.site.lang] else: username = None try: # Fetch revision history revisions = self.get_history(page.title(), since, username) # Fetch the page any way, to prevent editconflicts old_text = text = page.get() except (SystemExit, KeyboardInterrupt): raise except Exception, e: # Network error, not critical output(u'Warning! Unable to read replacement log.', False) output('%s: %s' % (e.__class__.__name__, str(e)), False) #self.site.conn.close() #self.site.conn.connect() return time.sleep(self.config['timeout'])
def main(): global R import sys, traceback wikipedia.handleArgs() output(u'Running ' + __version__) try: try: # FIXME: Add support for single-process replacer. R = Replacer() output(u'This bot runs from: ' + str(R.site)) R.start() except (SystemExit, KeyboardInterrupt): raise except Exception, e: output('A critical error has occured! Aborting!') traceback.print_exc(file = sys.stderr) finally: output('Exitting replacer') wikipedia.stopme()
class Replacer(object): def __init__(self): self.config = config.CommonsDelinker self.config.update(getattr(config, 'Replacer', ())) self.template = re.compile(r'\{\{%s\|([^|]*?)\|([^|]*?)(?:(?:\|reason\=(.*?))?)\}\}' % \ self.config['replace_template']) self.disallowed_replacements = [(re.compile(i[0], re.I), re.compile(i[1], re.I)) for i in self.config.get('disallowed_replacements', ())] self.site = wikipedia.getSite() self.site.forceLogin() self.database = connect_database() self.cursor = self.database.cursor() self.first_revision = 0 if self.config.get('replacer_report_replacements', False): self.reporters = [] else: self.reporters = None def read_replace_log(self): """ The actual worker method """ # FIXME: Make sqlite3 compatible insert = """INSERT INTO %s (timestamp, old_image, new_image, status, user, comment) VALUES (%%s, %%s, %%s, 'pending', %%s, %%s)""" % self.config['replacer_table'] page = wikipedia.Page(self.site, self.config['command_page']) # Get last revision date if self.cursor.execute("""SELECT timestamp FROM %s ORDER BY timestamp DESC LIMIT 1""" % \ self.config['replacer_table']): since = mw_timestamp(self.cursor.fetchone()[0]) else: since = None if self.config.get('clean_list', False): username = config.sysopnames[self.site.family.name][self.site.lang] else: username = None try: # Fetch revision history revisions = self.get_history(page.title(), since, username) # Fetch the page any way, to prevent editconflicts old_text = text = page.get() except (SystemExit, KeyboardInterrupt): raise except Exception, e: # Network error, not critical output(u'Warning! Unable to read replacement log.', False) output('%s: %s' % (e.__class__.__name__, str(e)), False) #self.site.conn.close() #self.site.conn.connect() return time.sleep(self.config['timeout']) # We're being killed if '{{stop}}' in text.lower(): output(u'Found {{stop}} on command page. Not replacing anything.') return time.sleep(self.config['timeout']) # Sort oldest first revisions.sort(key = lambda rev: rev['timestamp']) # Find all commands replacements = self.template.finditer(text) remove_from_list = [] count = 0 for replacement in replacements: if count == self.config.get('replacer_rate_limit', -1): break # Find out who's to blame res = self.examine_revision_history( revisions, replacement, username) if res and self.allowed_replacement(replacement) and \ replacement.group(1) != replacement.group(2): # Insert replace command into database self.cursor.execute(insert, res) # Tag line for removal remove_from_list.append(replacement.group(0)) output('Replacing %s by %s: %s' % replacement.groups()) count += 1 # Save all replaces to database self.database.commit() if remove_from_list and self.config.get('clean_list', False): # Cleanup the command page while True: try: for remove in remove_from_list: text = text.replace(remove, u'') # Kill the freaky CommonsDupes text = text.replace('== Dummy section, heading can be deleted (using [http://tools.wikimedia.de/~magnus/commons_dupes.php CommonsDupes]) ==', '') # Kill the freaky whitespace text = text.replace('\r', '') while '\n\n\n' in text: text = text.replace('\n\n\n', '\n') # Save the page page.put(text.strip(), comment = 'Removing images being processed') return except wikipedia.EditConflict: # Try again text = page.get()
not_ok_items.append(u'[[:%s%s]]' % \ (namespace_name, page_title)) else: not_ok_items.append(u'[[:%s:%s%s]]' % (site_prefix(site), namespace_name, page_title)) template = u'{{%s|new_image=%s|user=%s|comment=%s|not_ok=%s}}' % \ (self.config['replacer_report_template'], new_image, user, comment, self.config.get('replacer_report_seperator', u', ').join(not_ok_items)) page = wikipedia.Page(self.site, u'Image:' + old_image) try: text = page.get() except wikipedia.NoPage: output(u'Warning! Unable to report replacement to %s. Page does not exist!' % old_image) return except wikipedia.IsRedirectPage: output(u'Warning! %s is a redirect; not reporting replacement!' % old_image) return try: page.put(u'%s\n%s' % (template, text), comment = u'This image has been replaced by ' + new_image) except wikipedia.PageNotSaved, e: output(u'Warning! Unable to report replacement to %s.' % old_image, False) output('%s: %s' % (e.__class__.__name__, str(e)), False) except wikipedia.ServerError, e: output(u'Warning! Server error while reporting replacement to %s.' % old_image, False) output('%s: %s' % (e.__class__.__name__, str(e)), False) return self.report((old_image, new_image, user, comment, not_ok)) else:
not_ok_items.append(u'[[:%s%s]]' % \ (namespace_name, page_title)) else: not_ok_items.append(u'[[:%s:%s%s]]' % (site_prefix(site), namespace_name, page_title)) template = u'{{%s|new_image=%s|user=%s|comment=%s|not_ok=%s}}' % \ (self.config['replacer_report_template'], new_image, user, comment, self.config.get('replacer_report_seperator', u', ').join(not_ok_items)) page = wikipedia.Page(self.site, u'Image:' + old_image) try: text = page.get() except wikipedia.NoPage: output(u'Warning! Unable to report replacement to %s. Page does not exist!' % old_image) return try: page.put(u'%s\n%s' % (template, text), comment = u'This image has been replaced by ' + new_image) except PageNotSaved, e: output(u'Warning! Unable to report replacement to %s.' % old_image, False) output('%s: %s' % (e.__class__.__name__, str(e)), False) else: output(u'Reporting replacement of %s by %s.' % \ (old_image, new_image)) if __name__ == '__main__': import sys, traceback
for timestamp, user, text in revisions[1:]: if replacement.group(0) in text and user != username: return (db_timestamp(timestamp), strip_image(replacement.group(1)), strip_image(replacement.group(2)), user, replacement.group(3)) output('Warning! Could not find out who did %s' % \ repr(replacement.group(0)), False) return def start(self): while True: self.read_replace_log() # Replacer should not loop as often as delinker time.sleep(self.config['timeout'] * 2) if __name__ == '__main__': import sys, cgitb try: # FIXME: Add support for single-process replacer. r = Replacer() r.start() except StandardError, e: if type(e) not in (SystemExit, KeyboardInterrupt): output('A critical error has occured! Aborting!') print >>sys.stderr, cgitb.text(sys.exc_info()) except: pass wikipedia.stopme()