Exemplo n.º 1
0
def notify(user):
	"""The function checks that a number of
	conditions are true, then notifies the user
	in question. Calls generate_subst().
	"""
	usertalk = 'User talk:' + user
	if already_told(user) == False:
		if bot.nobots(page=usertalk) == True:
			print "We're cleared to edit."
			if notified_already(user) == True:
				print "User hasn't already been notified!"
				page = site.Pages[usertalk]
				text = page.edit()
				text = text + generate_subst(user)
				#text = generate_subst(user)
				try:
					page.save(text,summary="Notifying user about file(s) with inadequate source information ([[WP:BOT|bot]] - [[User:Theo's Little Bot/disable/bsr|disable]])")
					global total_done_now
					total_done_now = total_done_now + 1
					global users_notfied
					users_notified.append(user)
					now123 = datetime.datetime.utcnow()
					users_notified.append(now123)
					print user.encode('ascii', 'ignore') + " notified"
				except:
					print "Unknown error // skipping user."
			else:
				print "User was notifed already."
		else:
			print "Bot denied!"
	else:
		print "User has already been notified in past 7 days."
Exemplo n.º 2
0
def notify(user):
    """The function checks that a number of
	conditions are true, then notifies the user
	in question. Calls generate_subst().
	"""
    usertalk = "User talk:" + user
    if were_they_told_in_three(user) == False:
        if bot.nobots(page=usertalk) == True:
            print "We're cleared to edit."
            if notified_already(user) == True:
                print "User hasn't already been notified!"
                page = site.Pages[usertalk]
                text = page.edit()
                text = text + generate_subst(user)
                page.save(text, summary="Notifying user about missing file description(s) ([[WP:BOT|bot]] on trial)")
                global total_done_now
                total_done_now = total_done_now + 1
                global users_notfied
                users_notified.append(user)
                now123 = datetime.datetime.utcnow()
                users_notified.append(now123)
                print user.encode("ascii", "ignore") + " notified"
            else:
                print "User was notifed already."
        else:
            print "Bot denied!"
    else:
        print "User has already been notified in past 3 days."
Exemplo n.º 3
0
def process_page(page):
	"""Given an image object, gets its uploader and
	its upload date, fills in {{Information}} for it,
	and saves the new page.
	"""
	revision =  page.revisions(dir='newer').next()

	user = revision['user']

	date = get_exif_date(page)
	if date == None:
		date = time.strftime("%d %B %Y",revision['timestamp'])

	contents = page.edit()

	if contents != "":
		description = contents.strip()
		desc_code = mwparserfromhell.parse(description)
		for bad_code in desc_code.ifilter_templates(): # Remove templates
			description = description.replace(unicode(bad_code),'')
		for bad_code in desc_code.ifilter_headings(): # Remove headers
			description = description.replace(unicode(bad_code),'')
		if description.find('<nowiki') != -1:
			return # Skip complex descriptions
		description = description.replace('|','{{!}}') # Escape pipe symbols
		description = re.sub(r"""[ ]{2,}"""," ",description,flags=re.U) # Remove excessive spaces
		description = re.sub(r"""\[\[(?:File|Image):(.*?)(?:\|.*?)\]\]""",r"[[:File:\1]]",description,flags=re.U) # Turn images into links
		description = re.sub(r"""\[\[User:.*?\]\] \(\[\[User talk:J.*?\]\]\).*?\(UTC\)""",'',description,flags=re.U) # Remove signatures when possible
	else:
		description = ""

	contents = u"""{{Information
| description = """+description+"""
| source      = {{own}}
| date        = """ + unicode(date) + """
| author      = {{subst:usernameexpand|""" + user.replace(" ","_") + """}}
}}\n""" + contents

	global donenow
	if bot.donenow("User:Theo's Little Bot/disable/selfimages",donenow=donenow,donenow_div=5,shutdown=100) == True:
		# adding the template
		page.save(contents,"[[WP:BOT|Bot]]: Automatically adding {{[[Template:Information|Information]]}} to self-published work) ([[User:Theo's Little Bot/disable/selfimages|disable]]")
		donenow += 1
		# notifying the uploader
		usertalktitle = "User talk:"+user
		if bot.nobots(usertalktitle,user="******",task='selfimages') == True:
			usertalk = site.Pages[usertalktitle]
			notification = "\n\n== Notification of automated file description generation ==\n{{subst:Un-botfill|file="+page.page_title+"|sig=~~~~}}"
			usertalk.save(appendtext=notification,"[[WP:BOT|Bot]]: Notifying user about autogenerated {{[[Template:Information|Information]]}} addition) ([[User:Theo's Little Bot/disable/selfimages|disable]]",redirect=True)
	else:
		sys.exit()
Exemplo n.º 4
0
def main():
	global site
	site = mwclient.Site('en.wikipedia.org')
	site.login(password.username, password.password)

	print "And we're live."
	connection = MySQLdb.connect(
		host = 'enwiki.labsdb',
		db = 'enwiki_p',
		read_default_file = '~/replica.my.cnf'
	)

	# The script runs in 500 article increments.
	# In other words, in each run, it will process
	# and fix 500 articles and then stop.
	# !todo figure out how long a run takes vs replag
	# and then optimize crontab
	cursor = connection.cursor()
	query = """\
	SELECT page_title
	FROM externallinks
	JOIN page
	ON page_id = el_from
	WHERE el_to LIKE "%&utm_%=%"
	AND page_namespace = 0
	LIMIT 5000;
	"""
	cursor.execute(query)

	donenow = 0
	for title in cursor.fetchall():
		title = title[0].decode("utf-8") # since tuples are returned
		if bot.donenow("User:Theo's Little Bot/disable/tracking",donenow=donenow,donenow_div=5) == True:
			if bot.nobots(page=title,task='tracking') == True:
				if process(site.Pages[title]) == True:
					donenow += 1
				else:
					print "No changes to make."
			else:
				print "Bot was denied, boo hoo."
			
		else:
			print "Bot was disabled...shutting down..."
			sys.exit()
Exemplo n.º 5
0
def notify(user):
    """The function checks that a number of
	conditions are true, then notifies the user
	in question. Calls generate_subst().
	"""
    usertalk = 'User talk:' + user
    if already_told(user) == False:
        if bot.nobots(page=usertalk) == True:
            print "We're cleared to edit."
            if notified_already(user) == True:
                print "User hasn't already been notified!"
                page = site.Pages[usertalk]
                text = page.edit()
                text = text + generate_subst(user)
                #text = generate_subst(user)
                try:
                    page.save(
                        text,
                        summary=
                        "Notifying user about missing file description(s) ([[WP:BOT|bot]] - [[User:Theo's Little Bot/disable/desc_notifier|disable]])"
                    )
                    global total_done_now
                    total_done_now = total_done_now + 1
                    global users_notfied
                    users_notified.append(user)
                    now123 = datetime.datetime.utcnow()
                    users_notified.append(now123)
                    print user.encode('ascii', 'ignore') + " notified"
                except:
                    print "Unknown error // skipping user."
            else:
                print "User was notifed already."
        else:
            print "Bot denied!"
    else:
        print "User has already been notified in past 3 days."
Exemplo n.º 6
0
	def run(self):
		category = mwclient.listing.Category(site, 'Category:All articles with dead external links')
		# category = [site.Pages['10 Hronia Mazi']] - debugging only
		for page in category:
			dead_refs = []
			print page.page_title
			orig_contents = page.edit()
			contents = page.edit()
			number_done = 0
			all_refs = re.findall(r"""<ref[^>]*>.*?</ref>""",contents,flags=re.UNICODE | re.IGNORECASE)
			for ref in all_refs:
				ref_lower = ref.lower()
				if  any(name in ref_lower for name in ['{{'+name for name in self.deadlink_names]):
					dead_refs.append(ref)
			for ref in dead_refs:
				ref_code = mwparserfromhell.parse(ref)
				updated = False
				for template in ref_code.filter_templates():
					if "cite web" in template.name and template.has_param('archiveurl') == False:
						url = unicode(template.get('url').value.strip())
						try: 
							if url.find('web.archive.org') != -1:
								okay_to_edit = False
								print "The url is already an archive link!"
							elif requests.get(url, timeout=15).status_code != requests.codes.ok:
								okay_to_edit = True
							elif requests.get(url, timeout=15).status_code == requests.codes.ok:
								okay_to_edit = False
								print "No need to add an archive, since the citations's URL currently works!"
						except:
							okay_to_edit = True
						if okay_to_edit == True:
							if template.has_param('accessdate'):
								try:
									accessdate = parser.parse(str(template.get('accessdate').value))
									wayback_date = accessdate.strftime("%Y%m%d%H%M%S")
									r = requests.get("http://web.archive.org/web/{date}/{url}".format(date=wayback_date,url=url)) 
								except ValueError: # in case we can't parse the accessdate
									r = requests.get("http://web.archive.org/web/form-submit.jsp", params={'url':url, 'type':'replay'})
							else:
								r = requests.get("http://web.archive.org/web/form-submit.jsp", params={'url':url, 'type':'replay'})
							print r.url
							print r.status_code
							if r.status_code == requests.codes.ok:
								number_done += 1
								updated = True
								wayback_url = r.url
								try:
									wayback_date_object = datetime.strptime(wayback_url.split('/')[4],"%Y%m%d%H%M%S")
									wayback_date = wayback_date_object.strftime('%d %B %Y')
									template.add('archivedate',wayback_date)
								except ValueError:
									print "Unable to fetch date...no worries, we have exception handing!"
								template.add('archiveurl',wayback_url)
							else:
								print "{url} not archived in wayback machine.".format(url=url)
								continue # this url was not archived by the wayback machine; nothing we can do here.
						else:
							print "Not adding archive link, per above."
				for template in ref_code.filter_templates():
					nameoftemp = template.name.lower()
					if any(name in nameoftemp for name in self.deadlink_names) and updated == True:
						ref_code.remove(template)
				if updated == True:
					new_ref = unicode(ref_code)
					contents = re.sub(re.escape(ref),new_ref,contents,flags=re.U)
				else:
					pass
			if self.DRYRUN == False and number_done > 0:
				if bot.donenow("User:Theo's Little Bot/disable/deadlinks",donenow=self.donenow,donenow_div=5) == True:
					if bot.nobots(page=page.page_title) == True:
						try:
							page.save(contents,summary="Adding archiveurl for {0} dead link{1} ([[WP:BOT|bot]] - [[User:Theo's Little Bot/disable/deadlinks|disable]])".format(number_done,'s' if number_done > 1 else ''))
							print "{0} saved!".format(page.page_title)
							self.donenow += 1
						except mwclient.errors.EditError as e:
							print "ERROR - unable to save page: ", e
					else:
						print "Could not save page...bot not authorized."
				else:
					print "Bot was disabled."
					sys.exit()
			elif self.DRYRUN == True and self.VERBOSE == True:
				diff = difflib.unified_diff(orig_contents.splitlines(), contents.splitlines())
				print '\n'.join(diff)