def callback(link, text, groups, rng): self.assertEqual(link.site, self.wp_site) if link.title == 'World': return pywikibot.Link('Homeworld', link.site) elif link.title.lower() == 'you': return False
def get_redirects_from_dump(self, alsoGetPageTitles=False): """ Extract redirects from dump. Load a local XML dump file, look at all pages which have the redirect flag set, and find out where they're pointing at. Return a dictionary where the redirect names are the keys and the redirect targets are the values. """ xmlFilename = self.xmlFilename redict = {} # open xml dump and read page titles out of it dump = xmlreader.XmlDump(xmlFilename) redirR = self.site.redirectRegex() readPagesCount = 0 if alsoGetPageTitles: pageTitles = set() for entry in dump.parse(): readPagesCount += 1 # always print status message after 10000 pages if readPagesCount % 10000 == 0: pywikibot.output('{0} pages read...'.format(readPagesCount)) if len(self.namespaces) > 0: if pywikibot.Page(self.site, entry.title).namespace() \ not in self.namespaces: continue if alsoGetPageTitles: pageTitles.add(space_to_underscore(pywikibot.Link(entry.title, self.site))) m = redirR.match(entry.text) if m: target = m.group(1) # There might be redirects to another wiki. Ignore these. target_link = pywikibot.Link(target, self.site) try: target_link.parse() except pywikibot.SiteDefinitionError as e: pywikibot.log(e) pywikibot.output( 'NOTE: Ignoring {0} which is a redirect ({1}) to an ' 'unknown site.'.format(entry.title, target)) target_link = None else: if target_link.site != self.site: pywikibot.output( 'NOTE: Ignoring {0} which is a redirect to ' 'another site {1}.' .format(entry.title, target_link.site)) target_link = None # if the redirect does not link to another wiki if target_link and target_link.title: source = pywikibot.Link(entry.title, self.site) if target_link.anchor: pywikibot.output( 'HINT: {0} is a redirect with a pipelink.' .format(entry.title)) redict[space_to_underscore(source)] = ( space_to_underscore(target_link)) if alsoGetPageTitles: return redict, pageTitles else: return redict
def treat_page_and_item(self, page, item): """Process a single page/item.""" if willstop: raise KeyboardInterrupt templates = page.raw_extracted_templates for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(with_ns=False) except pywikibot.exceptions.InvalidTitle: pywikibot.error("Failed parsing template; '{}' should be " 'the template name.'.format(template)) continue if template not in self.templateTitles: continue # We found the template we were looking for for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue if field not in self.fields: continue # This field contains something useful for us prop, options = self.fields[field] claim = pywikibot.Claim(self.repo, prop) if claim.type == 'wikibase-item': # Try to extract a valid page match = pywikibot.link_regex.search(value) if match: link_text = match.group(1) else: if self._get_option_with_fallback(options, 'islink'): link_text = value else: pywikibot.output( '{} field {} value {} is not a wikilink. ' 'Skipping.'.format(claim.getID(), field, value)) continue linked_item = self._template_link_target(item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type in ('string', 'external-id'): claim.setTarget(value.strip()) elif claim.type == 'url': match = self.linkR.search(value) if not match: continue claim.setTarget(match.group('url')) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site('commons', 'commons') imagelink = pywikibot.Link(value, source=commonssite, default_namespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output( "{0} doesn't exist. I can't link to it" ''.format(image.title(as_link=True))) continue claim.setTarget(image) else: pywikibot.output('{} is not a supported datatype.'.format( claim.type)) continue # A generator might yield pages from multiple sites self.user_add_claim_unless_exists( item, claim, self._get_option_with_fallback(options, 'exists'), page.site, pywikibot.output)
def run(self): """Run the bot.""" # validate L10N try: self.template_list = self.site.family.category_redirect_templates[ self.site.code] except KeyError: pywikibot.warning(u"No redirect templates defined for %s" % self.site) return if not self.get_cat_title(): pywikibot.warning(u"No redirect category found for %s" % self.site) return user = self.site.user() # invokes login() newredirs = [] l = time.localtime() today = "%04d-%02d-%02d" % l[:3] edit_request_page = pywikibot.Page( self.site, u"User:%s/category edit requests" % user) datafile = pywikibot.config.datafilepath("%s-catmovebot-data" % self.site.dbName()) try: with open(datafile, "rb") as inp: record = cPickle.load(inp) except IOError: record = {} if record: with open(datafile + ".bak", "wb") as f: cPickle.dump(record, f, protocol=config.pickle_protocol) # regex to match soft category redirects # note that any templates containing optional "category:" are # incorrect and will be fixed by the bot template_regex = re.compile( r"""{{\s*(?:%(prefix)s\s*:\s*)? # optional "template:" (?:%(template)s)\s*\| # catredir template name (\s*%(catns)s\s*:\s*)? # optional "category:" ([^|}]+) # redirect target cat (?:\|[^|}]*)*}} # optional arguments 2+, ignored """ % { 'prefix': self.site.namespace(10).lower(), 'template': "|".join( item.replace(" ", "[ _]+") for item in self.template_list), 'catns': self.site.namespace(14) }, re.I | re.X) self.check_hard_redirect() comment = i18n.twtranslate(self.site, self.move_comment) counts = {} nonemptypages = [] redircat = pywikibot.Category(pywikibot.Link(self.cat_title, self.site)) pywikibot.output(u"\nChecking %d category redirect pages" % redircat.categoryinfo['subcats']) catpages = set() for cat in redircat.subcategories(): catpages.add(cat) cat_title = cat.title(withNamespace=False) if "category redirect" in cat_title: self.log_text.append(u"* Ignoring %s" % cat.title(asLink=True, textlink=True)) continue if hasattr(cat, "_catinfo"): # skip empty categories that don't return a "categoryinfo" key catdata = cat.categoryinfo if "size" in catdata and int(catdata['size']): # save those categories that have contents nonemptypages.append(cat) if cat_title not in record: # make sure every redirect has a record entry record[cat_title] = {today: None} try: newredirs.append("*# %s -> %s" % (cat.title(asLink=True, textlink=True), cat.getCategoryRedirectTarget().title( asLink=True, textlink=True))) except pywikibot.Error: pass # do a null edit on cat try: cat.save() except: pass # delete record entries for non-existent categories for cat_name in record.keys(): if pywikibot.Category(self.site, self.catprefix + cat_name) not in catpages: del record[cat_name] pywikibot.output(u"\nMoving pages out of %s redirected categories." % len(nonemptypages)) for cat in pagegenerators.PreloadingGenerator(nonemptypages): try: if not cat.isCategoryRedirect(): self.log_text.append(u"* False positive: %s" % cat.title(asLink=True, textlink=True)) continue except pywikibot.Error: self.log_text.append(u"* Could not load %s; ignoring" % cat.title(asLink=True, textlink=True)) continue cat_title = cat.title(withNamespace=False) if not self.readyToEdit(cat): counts[cat_title] = None self.log_text.append(u"* Skipping %s; in cooldown period." % cat.title(asLink=True, textlink=True)) continue dest = cat.getCategoryRedirectTarget() if not dest.exists(): self.problems.append("# %s redirects to %s" % (cat.title(asLink=True, textlink=True), dest.title(asLink=True, textlink=True))) # do a null edit on cat to update any special redirect # categories this wiki might maintain try: cat.save() except: pass continue if dest.isCategoryRedirect(): double = dest.getCategoryRedirectTarget() if double == dest or double == cat: self.log_text.append( u"* Redirect loop from %s" % dest.title(asLink=True, textlink=True)) # do a null edit on cat try: cat.save() except: pass else: self.log_text.append( u"* Fixed double-redirect: %s -> %s -> %s" % (cat.title(asLink=True, textlink=True), dest.title(asLink=True, textlink=True), double.title(asLink=True, textlink=True))) oldtext = cat.text # remove the old redirect from the old text, # leaving behind any non-redirect text oldtext = template_regex.sub("", oldtext) newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % { 'redirtemp': self.template_list[0], 'ncat': double.title(withNamespace=False) }) newtext = newtext + oldtext.strip() try: cat.text = newtext cat.save( i18n.twtranslate(self.site, self.dbl_redir_comment)) except pywikibot.Error as e: self.log_text.append("** Failed: %s" % e) continue found, moved = self.move_contents(cat_title, dest.title(withNamespace=False), editSummary=comment) if found is None: self.log_text.append(u"* [[:%s%s]]: error in move_contents" % (self.catprefix, cat_title)) elif found: record[cat_title][today] = found self.log_text.append(u"* [[:%s%s]]: %d found, %d moved" % (self.catprefix, cat_title, found, moved)) counts[cat_title] = found # do a null edit on cat try: cat.save() except: pass with open(datafile, "wb") as f: cPickle.dump(record, f, protocol=config.pickle_protocol) self.log_text.sort() self.problems.sort() newredirs.sort() comment = i18n.twtranslate(self.site, self.maint_comment) self.log_page.text = ( u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n" % time.gmtime()[:6] + u'\n'.join(self.log_text) + u'\n* New redirects since last report:\n' + u'\n'.join(newredirs) + u'\n' + u'\n'.join(self.problems) + u'\n' + self.get_log_text()) self.log_page.save(comment) if self.edit_requests: edit_request_page.text = (self.edit_request_text % { 'itemlist': u"\n" + u"\n".join((self.edit_request_item % item) for item in self.edit_requests) }) edit_request_page.save(comment)
def handleOneLink(match): titleWithSection = match.group('titleWithSection') label = match.group('label') trailingChars = match.group('linktrail') newline = match.group('newline') try: is_interwiki = self.site.isInterwikiLink(titleWithSection) except ValueError: # T111513 is_interwiki = True if not is_interwiki: # The link looks like this: # [[page_title|link_text]]trailing_chars # We only work on namespace 0 because pipes and linktrails work # differently for images and categories. page = pywikibot.Page( pywikibot.Link(titleWithSection, self.site)) try: namespace = page.namespace() except pywikibot.InvalidTitle: return match.group() if namespace == 0: # Replace underlines by spaces, also multiple underlines titleWithSection = re.sub('_+', ' ', titleWithSection) # Remove double spaces titleWithSection = re.sub(' +', ' ', titleWithSection) # Remove unnecessary leading spaces from title, # but remember if we did this because we eventually want # to re-add it outside of the link later. titleLength = len(titleWithSection) titleWithSection = titleWithSection.lstrip() hadLeadingSpaces = (len(titleWithSection) != titleLength) hadTrailingSpaces = False # Remove unnecessary trailing spaces from title, # but remember if we did this because it may affect # the linktrail and because we eventually want to # re-add it outside of the link later. if not trailingChars: titleLength = len(titleWithSection) titleWithSection = titleWithSection.rstrip() hadTrailingSpaces = (len(titleWithSection) != titleLength) # Convert URL-encoded characters to unicode from pywikibot.page import url2unicode titleWithSection = url2unicode(titleWithSection, encodings=self.site) if titleWithSection == '': # just skip empty links. return match.group() # Remove unnecessary initial and final spaces from label. # Please note that some editors prefer spaces around pipes. # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway. if label is not None: # Remove unnecessary leading spaces from label, # but remember if we did this because we want # to re-add it outside of the link later. labelLength = len(label) label = label.lstrip() hadLeadingSpaces = (len(label) != labelLength) # Remove unnecessary trailing spaces from label, # but remember if we did this because it affects # the linktrail. if not trailingChars: labelLength = len(label) label = label.rstrip() hadTrailingSpaces = (len(label) != labelLength) else: label = titleWithSection if trailingChars: label += trailingChars if self.site.siteinfo['case'] == 'first-letter': firstcase_title = first_lower(titleWithSection) firstcase_label = first_lower(label) else: firstcase_title = titleWithSection firstcase_label = label if firstcase_label == firstcase_title: newLink = '[[%s]]' % label # Check if we can create a link with trailing characters # instead of a pipelink elif (firstcase_label.startswith(firstcase_title) and trailR.sub('', label[len(titleWithSection):]) == ''): newLink = '[[%s]]%s' % (label[:len(titleWithSection)], label[len(titleWithSection):]) else: # Try to capitalize the first letter of the title. # Not useful for languages that don't capitalize nouns. # TODO: Add a configuration variable for each site, # which determines if the link target is written in # uppercase if self.site.sitename == 'wikipedia:de': titleWithSection = first_upper(titleWithSection) newLink = "[[%s|%s]]" % (titleWithSection, label) # re-add spaces that were pulled out of the link. # Examples: # text[[ title ]]text -> text [[title]] text # text[[ title | name ]]text -> text [[title|name]] text # text[[ title |name]]text -> text[[title|name]]text # text[[title| name]]text -> text [[title|name]]text if hadLeadingSpaces and not newline: newLink = ' ' + newLink if hadTrailingSpaces: newLink = newLink + ' ' if newline: newLink = newline + newLink return newLink # don't change anything return match.group()
def findAlternatives(self, disambPage): if disambPage.isRedirectPage() and not self.primary: if (disambPage.site.lang in self.primary_redir_template and self.primary_redir_template[disambPage.site.lang] in disambPage.templates(get_redirect=True)): baseTerm = disambPage.title() for template in disambPage.templatesWithParams( get_redirect=True): if template[0] == self.primary_redir_template[ disambPage.site.lang] \ and len(template[1]) > 0: baseTerm = template[1][1] disambTitle = primary_topic_format[self.mylang] % baseTerm try: disambPage2 = pywikibot.Page( pywikibot.Link(disambTitle, self.mysite)) links = disambPage2.linkedPages() links = [correctcap(l, disambPage2.get()) for l in links] except pywikibot.NoPage: pywikibot.output(u"No page at %s, using redirect target." % disambTitle) links = disambPage.linkedPages()[:1] links = [ correctcap(l, disambPage.get(get_redirect=True)) for l in links ] self.alternatives += links else: try: target = disambPage.getRedirectTarget().title() self.alternatives.append(target) except pywikibot.NoPage: pywikibot.output(u"The specified page was not found.") user_input = pywikibot.input(u"""\ Please enter the name of the page where the redirect should have pointed at, or press enter to quit:""") if user_input == "": sys.exit(1) else: self.alternatives.append(user_input) except pywikibot.IsNotRedirectPage: pywikibot.output( u"The specified page is not a redirect. Skipping.") return False elif self.getAlternatives: try: if self.primary: try: disambPage2 = pywikibot.Page( pywikibot.Link( primary_topic_format[self.mylang] % disambPage.title(), self.mysite)) links = disambPage2.linkedPages() links = [ correctcap(l, disambPage2.get()) for l in links ] except pywikibot.NoPage: pywikibot.output( u"Page does not exist, using the first link in page %s." % disambPage.title()) links = disambPage.linkedPages()[:1] links = [ correctcap(l, disambPage.get()) for l in links ] else: try: links = disambPage.linkedPages() links = [ correctcap(l, disambPage.get()) for l in links ] except pywikibot.NoPage: pywikibot.output(u"Page does not exist, skipping.") return False except pywikibot.IsRedirectPage: pywikibot.output(u"Page is a redirect, skipping.") return False self.alternatives += links return True
def test_invalid_link_as_source(self): """Test IndexPage from invalid Link as source.""" source = pywikibot.Link(self.not_existing_invalid_title, source=self.site) self.assertRaises(ValueError, IndexPage, source)
def move_to_category(self, article, original_cat, current_cat): """ Given an article which is in category original_cat, ask the user if it should be moved to one of original_cat's subcategories. Recursively run through subcategories' subcategories. NOTE: current_cat is only used for internal recursion. You should always use current_cat = original_cat. """ pywikibot.output(u'') # Show the title of the page where the link was found. # Highlight the title in purple. pywikibot.output(u'Treating page \03{lightpurple}%s\03{default}, ' u'currently in \03{lightpurple}%s\03{default}' % (article.title(), current_cat.title())) # Determine a reasonable amount of context to print try: full_text = article.get(get_redirect=True) except pywikibot.NoPage: pywikibot.output(u'Page %s not found.' % article.title()) return try: contextLength = full_text.index('\n\n') except ValueError: # substring not found contextLength = 500 if full_text.startswith(u'[['): # probably an image # Add extra paragraph. contextLength = full_text.find('\n\n', contextLength + 2) if contextLength > 1000 or contextLength < 0: contextLength = 500 pywikibot.output('\n' + full_text[:contextLength] + '\n') # we need list to index the choice subcatlist = list(self.catDB.getSubcats(current_cat)) supercatlist = list(self.catDB.getSupercats(current_cat)) if not subcatlist: pywikibot.output('This category has no subcategories.\n') if not supercatlist: pywikibot.output('This category has no supercategories.\n') # show subcategories as possible choices (with numbers) for i, supercat in enumerate(supercatlist): # layout: we don't expect a cat to have more than 10 supercats pywikibot.output(u'u%d - Move up to %s' % (i, supercat.title())) for i, subcat in enumerate(subcatlist): # layout: we don't expect a cat to have more than 100 subcats pywikibot.output(u'%2d - Move down to %s' % (i, subcat.title())) pywikibot.output( ' j - Jump to another category\n' ' s - Skip this article\n' ' r - Remove this category tag\n' ' ? - Print first part of the page (longer and longer)\n' u'Enter - Save category as %s' % current_cat.title()) flag = False while not flag: pywikibot.output('') choice = pywikibot.input(u'Choice:') if choice in ['s', 'S']: flag = True elif choice == '': pywikibot.output(u'Saving category as %s' % current_cat.title()) if current_cat == original_cat: pywikibot.output('No changes necessary.') else: article.change_category(original_cat, current_cat, comment=self.editSummary) flag = True elif choice in ['j', 'J']: newCatTitle = pywikibot.input(u'Please enter the category the ' u'article should be moved to:') newCat = pywikibot.Category( pywikibot.Link('Category:' + newCatTitle)) # recurse into chosen category self.move_to_category(article, original_cat, newCat) flag = True elif choice in ['r', 'R']: # remove the category tag article.change_category(original_cat, None, comment=self.editSummary) flag = True elif choice == '?': contextLength += 500 pywikibot.output('\n' + full_text[:contextLength] + '\n') # if categories possibly weren't visible, show them additionally # (maybe this should always be shown?) if len(full_text) > contextLength: pywikibot.output('') pywikibot.output('Original categories: ') for cat in article.categories(): pywikibot.output(u'* %s' % cat.title()) elif choice[0] == 'u': try: choice = int(choice[1:]) except ValueError: # user pressed an unknown command. Prompt him again. continue self.move_to_category(article, original_cat, supercatlist[choice]) flag = True else: try: choice = int(choice) except ValueError: # user pressed an unknown command. Prompt him again. continue # recurse into subcategory self.move_to_category(article, original_cat, subcatlist[choice]) flag = True
def treat_page_and_item(self, page, item) -> None: """Process a single page/item.""" if willstop: raise KeyboardInterrupt templates = page.raw_extracted_templates for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(with_ns=False) except InvalidTitleError: pywikibot.error("Failed parsing template; '{}' should be " 'the template name.'.format(template)) continue if template not in self.templateTitles: continue # We found the template we were looking for for field, value in fielddict.items(): field = field.strip() # todo: extend the list of tags to ignore value = textlib.removeDisabledParts( # todo: eventually we may want to import the references value, tags=['ref'], site=page.site).strip() if not field or not value: continue if field not in self.fields: continue # This field contains something useful for us prop, options = self.fields[field] claim = pywikibot.Claim(self.repo, prop) exists_arg = self._get_option_with_fallback(options, 'exists') if claim.type == 'wikibase-item': do_multi = self._get_option_with_fallback(options, 'multi') matched = False # Try to extract a valid page for match in pywikibot.link_regex.finditer(value): matched = True link_text = match.group(1) linked_item = self._template_link_target( item, link_text) added = False if linked_item: claim.setTarget(linked_item) added = self.user_add_claim_unless_exists( item, claim, exists_arg, page.site, pywikibot.output) claim = pywikibot.Claim(self.repo, prop) # stop after the first match if not supposed to add # multiple values if not do_multi: break # update exists_arg, so we can add more values if 'p' not in exists_arg and added: exists_arg += 'p' if matched: continue if not self._get_option_with_fallback(options, 'islink'): pywikibot.output( '{} field {} value {} is not a wikilink. Skipping.' .format(claim.getID(), field, value)) continue linked_item = self._template_link_target(item, value) if not linked_item: continue claim.setTarget(linked_item) elif claim.type in ('string', 'external-id'): claim.setTarget(value.strip()) elif claim.type == 'url': match = self.linkR.search(value) if not match: continue claim.setTarget(match.group('url')) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site('commons', 'commons') imagelink = pywikibot.Link(value, source=commonssite, default_namespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output( "{} doesn't exist. I can't link to it".format( image.title(as_link=True))) continue claim.setTarget(image) else: pywikibot.output('{} is not a supported datatype.'.format( claim.type)) continue # A generator might yield pages from multiple sites self.user_add_claim_unless_exists(item, claim, exists_arg, page.site, pywikibot.output)
def example(self, textvalue): prop = self.current_page # todo: scope constraint if any( map(methodcaller('target_equals', 'Q15720608'), prop.claims.get('P31', []))): pywikibot.output('{} is for qualifier use'.format(prop.title())) return False if prop.type in ('external-id', 'string'): regex = self.get_regex_from_prop(prop) if regex is None: pywikibot.output('Regex for "{}" not found'.format( prop.title())) return False formatter = None for claim in prop.claims.get('P1630', []): if claim.snaktype != 'value': continue searchObj = self.get_formatter_regex().search( claim.getTarget()) if searchObj is None: pywikibot.output('Found wrongly formatted formatter URL ' 'for "{}"'.format(prop.title())) continue formatter = searchObj.group() break if formatter is None: if prop.type == 'external-id': pywikibot.output('Info: No formatter found for "{}"' ''.format(prop.title())) try: regex = re.compile('^(?P<value>{})$'.format(regex)) except re.error as e: pywikibot.output("Couldn't create a regex") pywikibot.exception(e) return False else: split = formatter.split('$1') full_regex = '' full_regex += '(?P<value>{})'.format(regex).join( map(re.escape, split[:2])) full_regex += '(?P=value)'.join(map(re.escape, split[2:])) if full_regex.endswith(re.escape('/')): full_regex += '?' else: full_regex += re.escape('/') + '?' full_regex = ('(?:' + full_regex + r'|(?:^["\'<]?|\s)(?P<value2>' + regex + r')(?:["\'>]?$|\]))') try: regex = re.compile(full_regex) except re.error as e: pywikibot.output("Couldn't create a regex") pywikibot.exception(e) return False elif prop.type == 'commonsMedia': regex = self.get_regex_from_prop(prop) if regex is None: regex = self.regexes[prop.type] else: flags = 0 if regex.startswith('(?i)'): regex = regex[4:] flags |= re.I regex = re.compile( r'\b(?:[Ff]il|[Ii]mag)e:(?P<value>{})' r''.format(regex), flags) else: if prop.type in self.regexes: regex = self.regexes[prop.type] else: pywikibot.output( '"{}" is not supported datatype for matching examples'. format(prop.type)) return False remove = True split = self.regexes['split-break'].split(textvalue) if len(split) == 1: split = self.regexes['split-comma'].split(textvalue) for match in split: if match.strip() == '': continue pair = re.split(self.regexes['arrow'], match) if len(pair) == 1: pywikibot.output( 'Example pair not recognized in "{}"'.format(match)) remove = False continue pair = [pair[i] for i in (0, -1)] searchObj = self.regexes['wikibase-item'].search(pair[0]) if searchObj is None: pywikibot.output('No item id found in "{}"'.format(pair[0])) remove = False continue item_match = 'Q' + searchObj.group('value') target = pywikibot.ItemPage(self.repo, item_match) while target.isRedirectPage(): target = target.getRedirectTarget() if any( map(methodcaller('target_equals', target), prop.claims.get('P1855', []))): pywikibot.output( 'There is already one example with "{}"'.format( item_match)) continue qual_match = regex.search(pair[1]) if not qual_match: pywikibot.output( 'Couldn\'t match example value in "{}"'.format(pair[1])) remove = False continue for g in ('value', 'value2', 'url'): if g in qual_match.groupdict(): if qual_match.group(g): qual_target = qual_match.group(g) break if prop.type == 'wikibase-item': qual_target = pywikibot.ItemPage(self.repo, 'Q' + qual_target) if not qual_target.exists(): pywikibot.output('"{}" doesn\'t exist'.format( qual_target.title())) remove = False continue while qual_target.isRedirectPage(): qual_target = qual_target.getRedirectTarget() elif prop.type == 'wikibase-property': qual_target = pywikibot.PropertyPage(self.repo, 'P' + qual_target) elif prop.type == 'commonsMedia': commons = pywikibot.Site('commons', 'commons') imagelink = pywikibot.Link(qual_target, source=commons, defaultNamespace=6) qual_target = pywikibot.FilePage(imagelink) if not qual_target.exists(): pywikibot.output('"{}" doesn\'t exist'.format( qual_target.title())) remove = False continue while qual_target.isRedirectPage(): qual_target = pywikibot.FilePage( qual_target.getRedirectTarget()) elif prop.type == 'quantity': try: amount = parse_float(qual_match.group('amount')) except ValueError: pywikibot.output( 'Couldn\'t parse "{}"'.format(qual_target)) remove = False continue error = qual_match.group('error') unit = qual_match.group('unit') if error: try: error = parse_float(error) except ValueError: pywikibot.output( 'Couldn\'t parse "{}"'.format(qual_target)) remove = False continue if unit: search = self.regexes['wikibase-item'].search(unit) unit = pywikibot.ItemPage(self.repo, 'Q' + search.group('value')) if unit.isRedirectPage(): unit = unit.getRedirectTarget() else: unit = None qual_target = pywikibot.WbQuantity(amount, unit, error, site=self.repo) claim = pywikibot.Claim(self.repo, 'P1855') claim.setTarget(target) qualifier = prop.newClaim(is_qualifier=True) qualifier.setTarget(qual_target) claim.addQualifier(qualifier) claim.addSource(self.get_source()) ok = self.user_add_claim(prop, claim, summary=self.make_summary()) remove = ok and remove return remove
def find_add(self, page): """ Returns (user, oldid, timestamp) where * user is the user who added the {{Mort récente}} template (pywikibot.User) * oldid is the oldid of the revision of this add (int) * timestamp """ death_found = True history = page.getVersionHistory() if len(history) == 1: [(id, timestamp, user, comment)] = history return (pywikibot.User(self.site, user), id) oldid = None requester = None timestamp = None previous_timestamp = None for (id, timestamp, user, comment) in history: pywikibot.output( "Analyzing id %i: timestamp is %s and user is %s" % (id, timestamp, user)) text = page.getOldVersion(id) templates_params_list = textlib.extract_templates_and_params(text) death_found = False for (template_name, dict_param) in templates_params_list: try: template_page = pywikibot.Page( pywikibot.Link(template_name, self.site, defaultNamespace=10), self.site) # TODO : auto-finding redirections if template_page.title(withNamespace=False) in [ u"Mort récente", u"Décès récent" ]: death_found = True break except Exception, myexception: pywikibot.output( u'An error occurred while analyzing template %s' % template_name) pywikibot.output(u'%s %s' % (type(myexception), myexception.args)) if oldid: print("id is %i ; oldid is %i" % (id, oldid)) else: print("id is %i ; no oldid" % id) if not death_found: if id == oldid: pywikibot.output( "Last revision does not contain any {{Mort récente}} template!" ) return None else: pywikibot.output(u"-------------------------------------") triplet = (requester, oldid, previous_timestamp) pywikibot.output( u"Found it: user is %s; oldid is %i and timestamp is %s" % triplet) return triplet else: requester = pywikibot.User(self.site, user) oldid = id previous_timestamp = timestamp
def run(self): """ Starts the robot. """ for monument in self.generator: try: monumentItem = None newclaims = [] if monument.get('id') in self.monumentIds: monumentItemTitle = u'Q%s' % (self.monumentIds.get( monument.get('id')), ) print monument print monumentItemTitle monumentItem = pywikibot.ItemPage(self.repo, title=monumentItemTitle) else: print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') # Fix wikitext and more shit monumentName = monument.get('name') #monumentName = re.sub('^\[\[([^\|]+)\|([^\]]+)\]\](.+)$', u'\\2\\3', monumentName) monumentName = re.sub('\[\[([^\|]+)\|([^\]]+)\]\]', u'\\2', monumentName) #monumentName = re.sub('^\[\[([^\]]+)\]\](.+)$', u'\\1\\2', monumentName) monumentName = re.sub('\[\[([^\]]+)\]\]', u'\\1', monumentName) if len(monumentName) > 200: monumentName = re.sub('^(.{20,200})\.(.+)$', u'\\1.', monumentName) if len(monumentName) > 200: monumentName = re.sub('^(.{20,200}),(.+)$', u'\\1.', monumentName) # Still have to do more shit data = { 'labels': { monument.get('lang'): { 'language': monument.get('lang'), 'value': monumentName } } } identification = {} summary = u'Creating new item with data from %s' % ( monument.get('source'), ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result monumentItemTitle = result.get(u'entity').get('id') monumentItem = pywikibot.ItemPage(self.repo, title=monumentItemTitle) ''' {u'success': 1, u'entity': {u'lastrevid': 134951692, u'labels': {u'nl': {u'value ': u'[[Elswout]]: hoofdgebouw', u'language': u'nl'}}, u'descriptions': [], u'cla ims': [], u'type': u'item', u'id': u'Q17000292', u'aliases': []}} {u'success': 1, u'entity': {u'lastrevid': 134951703, u'labels': {u'nl': {u'value ': u'Elswout: landgoed', u'language': u'nl'}}, u'descriptions': [], u'claims': [ ], u'type': u'item', u'id': u'Q17000293', u'aliases': []}} {u'success': 1, u'entity': {u'lastrevid': 134951710, u'labels': {u'nl': {u'value ': u'Elswout: keermuren van het voorplein', u'language': u'nl'}}, u'descriptions ': [], u'claims': [], u'type': u'item', u'id': u'Q17000294', u'aliases': []}} ''' #print monumentItem.title() newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.monumentIdProperty, )) newclaim.setTarget(monument.get('id')) pywikibot.output('Adding new id claim to %s' % monumentItem) monumentItem.addClaim(newclaim) if monumentItem and monumentItem.exists(): data = monumentItem.get() descriptions = data.get('descriptions') claims = data.get('claims') print claims if monument.get('address') and not descriptions.get( monument.get('lang')): #FIXME: If it contains links like '[[]]' it will break if not u'(' in monument.get('address'): monumentDescription = u'Rijksmonument op %s' % ( monument.get('address'), ) summary = u'Setting %s description to "%s"' % ( monument.get('lang'), monumentDescription, ) try: monumentItem.editDescriptions( { monument.get('lang'): monumentDescription }, summary=summary) except pywikibot.exceptions.APIError: pywikibot.output( u'Ooops, that didn\'t work. Another item already has the same description' ) if u'P31' not in claims: newclaim = pywikibot.Claim(self.repo, u'P31') monumentTypeItem = pywikibot.ItemPage( self.repo, title=self.monumentType) newclaim.setTarget(monumentTypeItem) pywikibot.output('Adding instance claim to %s' % monumentItem) monumentItem.addClaim(newclaim) if monument.get('adm0') and u'P17' not in claims: print u'no country found' if monument.get('adm0').upper() in self.iso3166_1Codes: #print u'Found an item for the ISO code' adm0ItemTitle = u'Q%s' % (self.iso3166_1Codes.get( monument.get('adm0').upper()), ) adm0Item = pywikibot.ItemPage(self.repo, title=adm0ItemTitle) newclaim = pywikibot.Claim(self.repo, u'P17') newclaim.setTarget(adm0Item) pywikibot.output('Adding country claim to %s' % monumentItem) monumentItem.addClaim(newclaim) else: print u'country found' foundProv = False if u'P131' in claims and len(claims.get('P131')) == 1: if monument.get('adm1').upper() in self.iso3166_2Codes: if claims.get('P131')[0].getTarget().title( ) == u'Q%s' % (self.iso3166_2Codes.get( monument.get('adm1').upper()), ): print u'This item only contains a province claim' foundProv = True if u'P131' not in claims or foundProv: print u'no administrative thingie found' for adm in [ monument.get('adm1'), monument.get('adm2'), monument.get('adm3'), monument.get('adm4') ]: if adm: if adm.upper() in self.iso3166_2Codes: if not foundProv: print u'Found an item for the ISO code' admItemTitle = u'Q%s' % ( self.iso3166_2Codes.get( adm.upper()), ) admItem = pywikibot.ItemPage( self.repo, title=admItemTitle) newclaim = pywikibot.Claim( self.repo, u'P131') newclaim.setTarget(admItem) pywikibot.output( u'Adding %s to %s' % (admItem.title(), monumentItem.title())) monumentItem.addClaim(newclaim) #print adm1Item.get() else: adm = adm.replace(u'[', u'').replace(u']', u'') site = pywikibot.Site( monument.get('lang'), u'wikipedia') admLink = pywikibot.Link( adm, source=site, defaultNamespace=0) admPage = pywikibot.Page(admLink) if admPage.isRedirectPage(): admPage = pywikibot.Page( admPage.getRedirectTarget()) if not admPage.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (admPage.title(), )) elif admPage.isDisambig(): pywikibot.output( '[[%s]] is a disambiguation page so I can\'t link to it' % (admPage.title(), )) else: admItem = pywikibot.ItemPage.fromPage( admPage) if admItem.exists(): munFound = False if 'P31' in admItem.claims: for instClaim in admItem.claims.get( 'P31'): if instClaim.getTarget( ).title() == 'Q2039348': munFound = True if not munFound: # It's not an administrative division, but it might be in one if 'P131' in admItem.claims: for possAdmClaim in admItem.claims.get( 'P131'): possAdmItem = possAdmClaim.getTarget( ) possAdmItem.get() if 'P31' in possAdmItem.claims: for instClaim in possAdmItem.claims.get( 'P31'): if instClaim.getTarget( ).title( ) == 'Q2039348': admItem = possAdmItem munFound = True continue if munFound: newclaim = pywikibot.Claim( self.repo, u'P131') newclaim.setTarget(admItem) pywikibot.output( u'Adding %s to %s' % (admItem.title(), monumentItem.title())) monumentItem.addClaim( newclaim) else: print u'administrative thingie found' if monument.get('address') and u'P969' not in claims: if u'[' not in monument.get( 'address') and u']' not in monument.get( 'address') and u'|' not in monument.get( 'address'): newclaim = pywikibot.Claim(self.repo, u'P969') newclaim.setTarget(monument.get('address')) pywikibot.output(u'Adding %s to %s' % (monument.get('address'), monumentItem.title())) monumentItem.addClaim(newclaim) else: print u'Contains funky chars, skipping' print u'no address found' # Clean up the address and add it else: print u'address found' if monument.get('lat') and monument.get( 'lon') and u'P625' not in claims: print u'no coordinates found' # Build coordinates and add them coordinate = pywikibot.Coordinate(monument.get('lat'), monument.get('lon'), dim=100) newclaim = pywikibot.Claim(self.repo, u'P625') newclaim.setTarget(coordinate) pywikibot.output(u'Adding %s, %s to %s' % (coordinate.lat, coordinate.lon, monumentItem.title())) monumentItem.addClaim(newclaim) else: print u'coordinates found' if monument.get('image') and u'P18' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P18') commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(monument.get('image'), source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) else: newclaim.setTarget(image) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) monumentItem.addClaim(newclaim) else: print u'image found' # Europeana ID if u'P727' not in claims: europeanaID = u'2020718/DR_%s' % (monument.get('id'), ) newclaim = pywikibot.Claim(self.repo, u'P727') newclaim.setTarget(europeanaID) pywikibot.output('Adding Europeana ID claim to %s' % monumentItem) monumentItem.addClaim(newclaim) if monument.get('commonscat') and u'P373' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P373') commonssite = pywikibot.Site("commons", "commons") commonslink = pywikibot.Link( monument.get('commonscat'), source=commonssite, defaultNamespace=14) commonscat = pywikibot.Page(commonslink) if commonscat.isRedirectPage(): commonscat = pywikibot.Page( commonscat.getRedirectTarget()) if not commonscat.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (commonscat.title(), )) else: newclaim.setTarget( commonscat.title(withNamespace=False)) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) monumentItem.addClaim(newclaim) except: print u'F**k this shit, I am just going to contiue anyway' pass
def find_add(page): """ Returns (user, oldid) where * user is the user thatwho added the {{Déblocage}} template (pywikibot.User) * oldid is the oldid of the revision of this add (int) """ site = pywikibot.Site() unblock_found = True history = page.getVersionHistory() if len(history) == 1: [(id, timestamp, user, comment)] = history return (pywikibot.User(site, user), id) oldid = None requester = None for (id, timestamp, user, comment) in history: pywikibot.output("Analyzing id %i: timestamp is %s and user is %s" % (id, timestamp, user)) text = page.getOldVersion(id) templates_params_list = textlib.extract_templates_and_params(text) unblock_found = False for (template_name, dict_param) in templates_params_list: #pywikibot.output((template_name, dict_param)) try: print 0 template_page = pywikibot.Page(pywikibot.Link(template_name, site, defaultNamespace=10), site) print 1 pywikibot.output(template_page) pywikibot.output(template_page.title(withNamespace=False)) # TODO : auto-finding redirections if template_page.title(withNamespace=False) in [u"Déblocage", u"Unblock"]: # le modèle {{déblocage}} peut ne plus être actif print 2 if ((not dict_param.has_key('nocat')) or (dict_param.has_key('nocat') and dict_param['nocat'] in ["non", ''])) and not (dict_param.has_key('1') and dict_param['1'] in ['nocat', 'oui', 'non', u'traité', u'traité', u'traitée', u'traitée']): pywikibot.output('Found unblock request') pywikibot.output((template_name, dict_param)) unblock_found = True print 3 break except Exception, myexception: pywikibot.output(u'An error occurred while analyzing template %s' % template_name) pywikibot.output(u'%s %s'% (type(myexception), myexception.args)) print("id is %i" % id) if oldid: print("oldid is %i" % oldid) else: print "no oldid" if not unblock_found: if id == oldid: pywikibot.output("Last revision does not contain any {{Déblocage}} template!") return None else: return (requester, oldid) else: requester = pywikibot.User(site, user) oldid = id
def processArtist(artist, ulanwd, gndwd, repo): """ Get the artist info, look for ULAN, if """ itemPage = requests.get(artist.get('url')) ulanregex = u'\<a href\=\"http\:\/\/vocab\.getty\.edu\/page\/ulan\/(\d+)\"\>ULAN\<\/a\>' gndregex = u'\<a href\=\"http\:\/\/d-nb\.info\/gnd\/([^\"]+)\"\>GND\<\/a\>' wikiregex = u'\<a href\=\"https\:\/\/de\.wikipedia\.org\/wiki\/([^\"]+)">Wikipedia</a>' ulanmatch = re.search(ulanregex, itemPage.text) gndmatch = re.search(gndregex, itemPage.text) wikimatch = re.search(wikiregex, itemPage.text) if ulanmatch: ulanid = ulanmatch.group(1).encode(u'utf-8') # Force it to string pywikibot.output(u'Found an ULAN match on %s to %s' % (artist.get('url'), ulanid)) if ulanid in ulanwd: itemTitle = ulanwd.get(ulanid).get('qid') pywikibot.output(u'Found %s as the Wikidata item to link to' % (itemTitle, )) item = pywikibot.ItemPage(repo, title=itemTitle) if not item.exists(): return False if item.isRedirectPage(): item = item.getRedirectTarget() data = item.get() claims = data.get('claims') if u'P3421' in claims: # Already has Belvedere, great! return True newclaim = pywikibot.Claim(repo, u'P3421') newclaim.setTarget(artist.get('id')) pywikibot.output('Adding Belvedere %s claim to %s' % ( artist.get('id'), item.title(), )) # Default text is "Created claim: Belvedere identifier (P3421): 123, " summary = u'based on link to ULAN %s on entry "%s" on Belvedere website' % ( ulanid, artist.get(u'name'), ) item.addClaim(newclaim, summary=summary) return True if gndmatch: gndid = gndmatch.group(1).encode(u'utf-8') # Force it to string pywikibot.output(u'Found an GND match on %s to %s' % (artist.get('url'), gndid)) if gndid in gndwd: itemTitle = gndwd.get(gndid).get('qid') pywikibot.output(u'Found %s as the Wikidata item to link to' % (itemTitle, )) item = pywikibot.ItemPage(repo, title=itemTitle) if not item.exists(): return False if item.isRedirectPage(): item = item.getRedirectTarget() data = item.get() claims = data.get('claims') if u'P3421' in claims: # Already has Belvedere, great! return True newclaim = pywikibot.Claim(repo, u'P3421') newclaim.setTarget(artist.get('id')) pywikibot.output('Adding Belvedere %s claim to %s' % ( artist.get('id'), item.title(), )) # Default text is "Created claim: Belvedere identifier (P3421): 123, " summary = u'based on link to GND %s on entry "%s" on Belvedere website' % ( gndid, artist.get(u'name'), ) item.addClaim(newclaim, summary=summary) return True if wikimatch: articleTitle = u':de:%s' % (wikimatch.group(1), ) page = pywikibot.Page(pywikibot.Link(articleTitle)) if not page.exists(): return False if page.isRedirectPage(): page = page.getRedirectTarget() item = page.data_item() if not item or not item.exists(): return False if item.isRedirectPage(): item = item.getRedirectTarget() data = item.get() claims = data.get('claims') if u'P3421' in claims: # Already has Belvedere, great! return True newclaim = pywikibot.Claim(repo, u'P3421') newclaim.setTarget(artist.get('id')) pywikibot.output('Adding Belvedere %s claim to %s' % ( artist.get('id'), item.title(), )) # Default text is "Created claim: Belvedere identifier (P3421): 123, " summary = u'based on link to [[%s]] on entry "%s" on Belvedere website' % ( articleTitle, artist.get(u'name'), ) item.addClaim(newclaim, summary=summary) return True
def main(*args): # the option that's always selected when the bot wonders what to do with # a link. If it's None, the user is prompted (default behaviour). always = None alternatives = [] getAlternatives = True dnSkip = False # if the -file argument is used, page titles are dumped in this array. # otherwise it will only contain one page. generator = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] primary = False main_only = False # For sorting the linked pages, case can be ignored minimum = 0 for arg in pywikibot.handleArgs(*args): if arg.startswith('-primary:'): primary = True getAlternatives = False alternatives.append(arg[9:]) elif arg == '-primary': primary = True elif arg.startswith('-always:'): always = arg[8:] elif arg.startswith('-file'): if len(arg) == 5: generator = pagegenerators.TextfilePageGenerator(filename=None) else: generator = pagegenerators.TextfilePageGenerator( filename=arg[6:]) elif arg.startswith('-pos:'): if arg[5] != ':': mysite = pywikibot.Site() page = pywikibot.Page(pywikibot.Link(arg[5:], mysite)) if page.exists(): alternatives.append(page.title()) else: answer = pywikibot.inputChoice( u'Possibility %s does not actually exist. Use it anyway?' % page.title(), ['yes', 'no'], ['y', 'N'], 'N') if answer == 'y': alternatives.append(page.title()) else: alternatives.append(arg[5:]) elif arg == '-just': getAlternatives = False elif arg == '-dnskip': dnSkip = True elif arg == '-main': main_only = True elif arg.startswith('-min:'): minimum = int(arg[5:]) elif arg.startswith('-start'): try: if len(arg) <= len('-start:'): generator = pagegenerators.CategorizedPageGenerator( pywikibot.Site().disambcategory()) else: generator = pagegenerators.CategorizedPageGenerator( pywikibot.Site().disambcategory(), start=arg[7:]) generator = pagegenerators.NamespaceFilterPageGenerator( generator, [0]) except pywikibot.NoPage: pywikibot.output( "Disambiguation category for your wiki is not known.") raise elif arg.startswith("-"): pywikibot.output("Unrecognized command line argument: %s" % arg) # show help text and exit pywikibot.showHelp() else: pageTitle.append(arg) site = pywikibot.Site() site.login() # if the disambiguation page is given as a command line argument, # connect the title's parts with spaces if pageTitle != []: pageTitle = ' '.join(pageTitle) page = pywikibot.Page(pywikibot.Link(pageTitle, site)) generator = iter([page]) # if no disambiguation page was given as an argument, and none was # read from a file, query the user if not generator: pageTitle = pywikibot.input( u'On which disambiguation page do you want to work?') page = pywikibot.Page(pywikibot.Link(pageTitle, site)) generator = iter([page]) bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip, generator, primary, main_only, minimum=minimum) bot.run()
def handleArg(self, arg): """Parse one argument at a time. If it is recognized as an argument that specifies a generator, a generator is created and added to the accumulation list, and the function returns true. Otherwise, it returns false, so that caller can try parsing the argument. Call getCombinedGenerator() after all arguments have been parsed to get the final output generator. """ site = pywikibot.getSite() gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] if not fileLinksPageTitle: fileLinksPageTitle = i18n.input( 'pywikibot-enter-file-links-processing') if fileLinksPageTitle.startswith(site.namespace(6) + ":"): fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle) else: fileLinksPage = pywikibot.ImagePage(site, 'Image:' + fileLinksPageTitle) gen = FileLinksGenerator(fileLinksPage) elif arg.startswith('-unusedfiles'): if len(arg) == 12: gen = UnusedFilesGenerator() else: gen = UnusedFilesGenerator(number = int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number = int(arg[11:])) elif arg.startswith('-usercontribs'): gen = UserContributionsGenerator(arg[14:]) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: gen = WithoutInterwikiPageGenerator(number = int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = i18n.input('pywikibot-enter-page-processing') page = pywikibot.Page(pywikibot.Link(title, pywikibot.Site())) gen = InterwikiPageGenerator(page) elif arg.startswith('-recentchanges'): if len(arg) >= 15: gen = RecentChangesPageGenerator(total=int(arg[15:])) else: gen = RecentChangesPageGenerator(total=60) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename: textfilename = pywikibot.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) elif arg.startswith('-namespace'): if len(arg) == len('-namespace'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-namespace:'):].split(",")) return True elif arg.startswith('-ns'): if len(arg) == len('-ns'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-ns:'):].split(",")) return True elif arg.startswith('-step'): if len(arg) == len('-step'): self.step = int(pywikibot.input("What is the step value?")) else: self.step = int(arg[len('-step:'):]) return True elif arg.startswith('-limit'): if len(arg) == len('-limit'): self.limit = int(pywikibot.input("What is the limit value?")) else: self.limit = int(arg[len('-limit:'):]) return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse = True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse = True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) elif arg.startswith('-page'): if len(arg) == len('-page'): gen = [pywikibot.Page( pywikibot.Link( pywikibot.input( u'What page do you want to use?'), pywikibot.getSite()) )] else: gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):], pywikibot.getSite()) )] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): gen = UnCategorizedCategoryGenerator() elif arg.startswith('-uncat'): gen = UnCategorizedPageGenerator() elif arg.startswith('-ref'): referredPageTitle = arg[5:] if not referredPageTitle: referredPageTitle = pywikibot.input( u'Links to which page should be processed?') referredPage = pywikibot.Page(pywikibot.Link(referredPageTitle, pywikibot.Site())) gen = ReferringPageGenerator(referredPage) elif arg.startswith('-links'): linkingPageTitle = arg[7:] if not linkingPageTitle: linkingPageTitle = pywikibot.input( u'Links from which page should be processed?') linkingPage = pywikibot.Page(pywikibot.Link(linkingPageTitle, pywikibot.Site())) gen = LinkedPageGenerator(linkingPage) elif arg.startswith('-weblink'): url = arg[9:] if not url: url = pywikibot.input( u'Pages with which weblink should be processed?') gen = LinksearchPageGenerator(url) elif arg.startswith('-transcludes'): transclusionPageTitle = arg[len('-transcludes:'):] if not transclusionPageTitle: transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') transclusionPage = pywikibot.Page( pywikibot.Link(transclusionPageTitle, defaultNamespace=10, source=pywikibot.Site())) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') firstpagelink = pywikibot.Link(firstPageTitle, pywikibot.Site()) namespace = firstpagelink.namespace firstPageTitle = firstpagelink.title gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-prefixindex'): prefix = arg[13:] namespace = None if not prefix: prefix = pywikibot.input( u'What page names are you looking for?') gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or pywikibot.input( u'How many images do you want to load?') gen = NewimagesPageGenerator(total=int(limit)) elif arg.startswith('-newpages'): if len(arg) >= 10: gen = NewpagesPageGenerator(total=int(arg[10:])) else: gen = NewpagesPageGenerator(total=60) elif arg.startswith('-imagesused'): imagelinkstitle = arg[len('-imagesused:'):] if not imagelinkstitle: imagelinkstitle = pywikibot.input( u'Images on which page should be processed?') imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle, pywikibot.Site())) gen = ImagesPageGenerator(imagelinksPage) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: mediawikiQuery = pywikibot.input( u'What do you want to search for?') # In order to be useful, all namespaces are required gen = SearchPageGenerator(mediawikiQuery, namespaces = []) elif arg.startswith('-google'): gen = GoogleSearchPageGenerator(arg[8:]) elif arg.startswith('-titleregex'): if len(arg) == 6: regex = pywikibot.input( u'What page names are you looking for?') else: regex = arg[7:] gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex) elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) else: pass if gen: self.gens.append(gen) return True else: return False
def treat(self, refPage, disambPage): """ Parameters: disambPage - The disambiguation page or redirect we don't want anything to link to refPage - A page linking to disambPage Returns False if the user pressed q to completely quit the program. Otherwise, returns True. """ # TODO: break this function up into subroutines! dn_template_str = i18n.translate(self.mysite, dn_template) include = False unlink = False new_targets = [] try: text = refPage.get(throttle=False) ignoreReason = self.checkContents(text) if ignoreReason: pywikibot.output( '\n\nSkipping %s because it contains %s.\n\n' % (refPage.title(), ignoreReason)) else: include = True except pywikibot.IsRedirectPage: pywikibot.output(u'%s is a redirect to %s' % (refPage.title(), disambPage.title())) if disambPage.isRedirectPage(): target = self.alternatives[0] choice = pywikibot.inputChoice( u'Do you want to make redirect %s point to %s?' % (refPage.title(), target), ['yes', 'no'], ['y', 'N'], 'N') if choice == 'y': redir_text = '#%s [[%s]]' \ % (self.mysite.redirect(default=True), target) try: refPage.put_async(redir_text, comment=self.comment) except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.inputChoice( u'Do you want to work on pages linking to %s?' % refPage.title(), ['yes', 'no', 'change redirect'], ['y', 'N', 'c'], 'N') if choice == 'y': gen = ReferringPageGeneratorWithIgnore( refPage, self.primary) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' if not self.treat(refPage2, refPage): break elif choice == 'c': text = refPage.get(throttle=False, get_redirect=True) include = "redirect" except pywikibot.NoPage: pywikibot.output( u'Page [[%s]] does not seem to exist?! Skipping.' % refPage.title()) include = False if include in (True, "redirect"): # make a backup of the original text so we can show the changes later original_text = text n = 0 curpos = 0 dn = False edited = False # This loop will run until we have finished the current page while True: m = self.linkR.search(text, pos=curpos) if not m: if n == 0: pywikibot.output(u"No changes necessary in %s" % refPage.title()) return True else: # stop loop and save page break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 try: foundlink = pywikibot.Link(m.group('title'), disambPage.site) except pywikibot.Error: continue # ignore interwiki links if foundlink.site != disambPage.site: continue # Check whether the link found is to disambPage. try: if foundlink.canonical_title() != disambPage.title(): continue except pywikibot.Error: # must be a broken link pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" % (m.group('title'), refPage.title())) continue n += 1 # how many bytes should be displayed around the current link context = 60 #there's a {{dn}} here already already_dn = text[m.end():m.end() + 8].find( dn_template_str[:4]) > -1 if already_dn and self.dnSkip: continue # This loop will run while the user doesn't choose an option # that will actually change the page while True: # Show the title of the page where the link was found. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % refPage.title()) if not self.always: # at the beginning of the link, start red color. # at the end of the link, reset the color to default pywikibot.output(text[max(0, m.start() - context):m.start()] + '\03{lightred}' + text[m.start():m.end()] + '\03{default}' + text[m.end():m.end() + context]) if edited: choice = pywikibot.input( u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n" u" [t]ag template " + dn_template_str + ",\n" u" [m]ore context, [l]ist, [a]dd new, x=save in this form):" ) else: choice = pywikibot.input( u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n" u" [t]ag template " + dn_template_str + ",\n" u" [m]ore context, show [d]isambiguation page, [l]ist, [a]dd new):" ) else: choice = self.always if choice in ['a', 'A']: newAlternative = pywikibot.input(u'New alternative:') self.alternatives.append(newAlternative) self.listAlternatives() elif choice in ['e', 'E']: editor = editarticle.TextEditor() newText = editor.edit(text, jumpIndex=m.start(), highlight=disambPage.title()) # if user didn't press Cancel if newText and newText != text: text = newText break elif choice in ['d', 'D']: editor = editarticle.TextEditor() if disambPage.isRedirectPage(): disambredir = disambPage.getRedirectTarget() editor.edit(disambredir.get(), jumpIndex=m.start(), highlight=disambredir.title()) else: editor.edit(disambPage.get(), jumpIndex=m.start(), highlight=disambPage.title()) elif choice in ['l', 'L']: self.listAlternatives() elif choice in ['m', 'M']: # show more text around the link we're working on context *= 2 else: break if choice in ['e', 'E']: # user has edited the page and then pressed 'OK' edited = True curpos = 0 continue elif choice in ['n', 'N']: # skip this page if self.primary: # If run with the -primary argument, skip this # occurence next time. self.primaryIgnoreManager.ignore(refPage) return True elif choice in ['q', 'Q']: # quit the program return False elif choice in ['s', 'S']: # Next link on this page n -= 1 continue elif choice in ['x', 'X'] and edited: # Save the page as is break # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars # '?', '/' for old choice if choice in ['t', 'T', '?', '/']: # small chunk of text to search search_text = text[m.end():m.end() + context] # figure out where the link (and sentance) ends, put note # there end_of_word_match = re.search("\s", search_text) if end_of_word_match: position_split = end_of_word_match.start(0) else: position_split = 0 #insert dab needed template text = (text[:m.end() + position_split] + dn_template_str + text[m.end() + position_split:]) dn = True continue elif choice in ['u', 'U']: # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] unlink = True continue else: if len(choice) > 0 and choice[0] == 'r': # we want to throw away the original link text replaceit = True choice = choice[1:] elif include == "redirect": replaceit = True else: replaceit = False try: choice = int(choice) except ValueError: pywikibot.output(u"Unknown option") # step back to ask the user again what to do with the # current link curpos -= 1 continue if choice >= len(self.alternatives) or choice < 0: pywikibot.output( u"Choice out of range. Please select a number " u"between 0 and %i." % (len(self.alternatives) - 1)) # show list of possible choices self.listAlternatives() # step back to ask the user again what to do with the # current link curpos -= 1 continue new_page_title = self.alternatives[choice] repPl = pywikibot.Page( pywikibot.Link(new_page_title, disambPage.site)) if (new_page_title[0].isupper() or link_text[0].isupper()): new_page_title = repPl.title() else: new_page_title = repPl.title() new_page_title = (new_page_title[0].lower() + new_page_title[1:]) if new_page_title not in new_targets: new_targets.append(new_page_title) if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters # instead of a pipelink elif ((len(new_page_title) <= len(link_text)) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and (not section)): newlink = "[[%s]]%s" \ % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" \ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') else: pywikibot.output(u'\nThe following changes have been made:\n') pywikibot.showDiff(original_text, text) pywikibot.output(u'') # save the page self.setSummaryMessage(disambPage, new_targets, unlink, dn) try: refPage.put_async(text, comment=self.comment) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return True
def setpage(self): """Set page and page title.""" page_title = self.options.page or pywikibot.input('Page to edit:') self.page = pywikibot.Page(pywikibot.Link(page_title, self.site)) if not self.options.edit_redirect and self.page.isRedirectPage(): self.page = self.page.getRedirectTarget()
def test_invalid_link_source(self): """Test ProofreadPage from invalid Link as source.""" source = pywikibot.Link(self.not_existing_invalid['title'], source=self.site) self.assertRaises(ValueError, ProofreadPage, source)
def run(self): """ Starts the robot. """ for painting in self.generator: # Buh, for this one I know for sure it's in there #print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get( painting[u'id']), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } data['labels']['en'] = { 'language': 'en', 'value': painting[u'title'] } data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (painting[u'creator'], ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (painting[u'creator'], ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( painting[u'url'], ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output( u'Oops, already had that one. Trying again') data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s (%s, %s)' % (painting[u'creator'], painting[u'collectionshort'], painting[u'id']) } result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) # Add to self.paintingIds so that we don't create dupes self.paintingIds[painting[u'id']] = paintingItemTitle.replace( u'Q', u'') newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(self.collectionitem) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(self.collectionitem) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) # Add the date they got it as a qualifier to the collection if painting.get(u'acquisitiondate'): colqualifier = pywikibot.Claim(self.repo, u'P580') acdate = None if len(painting[u'acquisitiondate']) == 4 and painting[ u'acquisitiondate'].isnumeric(): # It's a year acdate = pywikibot.WbTime( year=painting[u'acquisitiondate']) elif len(painting[u'acquisitiondate'].split(u'-', 2)) == 3: (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2) acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday)) if acdate: colqualifier.setTarget(acdate) pywikibot.output( 'Adding new acquisition date qualifier claim to collection on %s' % paintingItem) collectionclaim.addQualifier(colqualifier) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims if painting.get(u'creator'): self.fixDescription(paintingItem, painting.get(u'creator')) # located in if u'P276' not in claims and painting.get(u'location'): newclaim = pywikibot.Claim(self.repo, u'P276') location = pywikibot.ItemPage(self.repo, painting.get(u'location')) newclaim.setTarget(location) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None try: for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en' ) == painting[u'creator'] or creatoritem.get( ).get('labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue except pywikibot.exceptions.APIError: print u'Search API is acting up, just let it be' pass if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'date'): if len( painting[u'date'] ) == 4 and painting[u'date'].isnumeric(): # It's a year newdate = pywikibot.WbTime(year=painting[u'date']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) # material used if u'P186' not in claims and painting.get(u'medium'): if painting.get(u'medium') == u'Oil on canvas': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim) # self.addReference(paintingItem, newclaim, uri) # Upload an image baby! BUT NOT NOW imagetitle = u'' if painting.get(u'imageurl') and u'P18' not in claims: commonssite = pywikibot.Site("commons", "commons") photo = Photo(painting[u'imageurl'], painting) titlefmt = u'%(creator)s - %(title)s - %(id)s - Minneapolis Institute of Arts.%(_ext)s' pagefmt = u'User:Multichill/Minneapolis Institute of Arts' duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output(u"Skipping duplicate of %r" % duplicates) imagetitle = duplicates[0] #return duplicates[0] else: imagetitle = self.cleanUpTitle( photo.getTitle(titlefmt)) pywikibot.output(imagetitle) description = photo.getDescription(pagefmt) pywikibot.output(description) handle, tempname = tempfile.mkstemp() with os.fdopen(handle, "wb") as t: t.write(photo.downloadPhoto().getvalue()) t.close() #tempname bot = upload.UploadRobot(url=tempname, description=description, useFilename=imagetitle, keepFilename=True, verifyDescription=False, uploadByUrl=False, targetSite=commonssite) #bot._contents = photo.downloadPhoto().getvalue() #bot._retrieved = True bot.run() if u'P18' not in claims and imagetitle: newclaim = pywikibot.Claim(self.repo, u'P18') imagelink = pywikibot.Link(imagetitle, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) newclaim.setTarget(image) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) paintingItem.addClaim(newclaim)
def procesPage(self, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % claim.getID()) # TODO FIXME: This is a very crude way of dupe # checking else: if claim.getType() == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if match: try: link = pywikibot.Link( match.group(1)) linkedPage = pywikibot.Page(link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget( ) linkedItem = pywikibot.ItemPage.fromPage( linkedPage) claim.setTarget(linkedItem) except pywikibot.exceptions.NoPage: pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (linkedItem.title(), )) continue elif claim.getType() == 'string': claim.setTarget(value.strip()) else: print "%s is not a supported datatype." % claim.getType( ) continue pywikibot.output( 'Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) if self.source: claim.addSource(self.source, bot=True)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # the option that's always selected when the bot wonders what to do with # a link. If it's None, the user is prompted (default behaviour). always = None alternatives = [] getAlternatives = True dnSkip = False generator = None primary = False main_only = False # For sorting the linked pages, case can be ignored minimum = 0 local_args = pywikibot.handle_args(args) generator_factory = pagegenerators.GeneratorFactory( positional_arg_name='page') for arg in local_args: if arg.startswith('-primary:'): primary = True getAlternatives = False alternatives.append(arg[9:]) elif arg == '-primary': primary = True elif arg.startswith('-always:'): always = arg[8:] elif arg.startswith('-pos:'): if arg[5] != ':': mysite = pywikibot.Site() page = pywikibot.Page(pywikibot.Link(arg[5:], mysite)) if page.exists(): alternatives.append(page.title()) else: if pywikibot.input_yn( u'Possibility %s does not actually exist. Use it ' 'anyway?' % page.title(), default=False, automatic_quit=False): alternatives.append(page.title()) else: alternatives.append(arg[5:]) elif arg == '-just': getAlternatives = False elif arg == '-dnskip': dnSkip = True elif arg == '-main': main_only = True elif arg.startswith('-min:'): minimum = int(arg[5:]) elif arg.startswith('-start'): try: generator = pagegenerators.CategorizedPageGenerator( pywikibot.Site().disambcategory(), start=arg[7:], namespaces=[0]) except pywikibot.NoPage: pywikibot.output( 'Disambiguation category for your wiki is not known.') raise else: generator_factory.handleArg(arg) site = pywikibot.Site() generator = generator_factory.getCombinedGenerator(generator) if not generator: pywikibot.bot.suggest_help(missing_generator=True) return False site.login() bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip, generator, primary, main_only, minimum=minimum) bot.run()
def treat(self, page, item): """Process a single page/item.""" if willstop: raise KeyboardInterrupt self.current_page = page item.get() if set(val[0] for val in self.fields.values()) <= set(item.claims.keys()): pywikibot.output('%s item %s has claims for all properties. ' 'Skipping.' % (page, item.title())) return templates = page.raw_extracted_templates for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) except pywikibot.exceptions.InvalidTitle: pywikibot.error( "Failed parsing template; '%s' should be the template name." % template) continue # We found the template we were looking for if template in self.templateTitles: for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue # This field contains something useful for us if field in self.fields: prop, options = self.fields[field] # Check if the property isn't already set claim = pywikibot.Claim(self.repo, prop) if claim.getID() in item.get().get('claims'): pywikibot.output( 'A claim for %s already exists. Skipping.' % claim.getID()) # TODO: Implement smarter approach to merging # harvested values with existing claims esp. # without overwriting humans unintentionally. else: if claim.type == 'wikibase-item': # Try to extract a valid page match = pywikibot.link_regex.search(value) if match: link_text = match.group(1) else: if self._get_option_with_fallback( options, 'islink'): link_text = value else: pywikibot.output( '%s field %s value %s is not a ' 'wikilink. Skipping.' % (claim.getID(), field, value)) continue linked_item = self._template_link_target( item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type in ('string', 'external-id'): claim.setTarget(value.strip()) elif claim.type == 'url': match = self.linkR.search(value) if not match: continue claim.setTarget(match.group('url')) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site( 'commons', 'commons') imagelink = pywikibot.Link(value, source=commonssite, defaultNamespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( "{0} doesn't exist. I can't link to it" ''.format(image.title(asLink=True))) continue claim.setTarget(image) else: pywikibot.output( '%s is not a supported datatype.' % claim.type) continue # A generator might yield pages from multiple sites self.user_add_claim(item, claim, page.site)
def treat_disamb_only(self, refPage, disambPage): """Resolve the links to disambPage but don't look for its redirects. @param disambPage: the disambiguation page or redirect we don't want anything to link to @type disambPage: pywikibot.Page @param refPage: a page linking to disambPage @type refPage: pywikibot.Page @return: "nextpage" if the user enters "n" to skip this page, "nochange" if the page needs no change, and "done" if the page is processed successfully @rtype: str """ # TODO: break this function up into subroutines! self.current_page = refPage include = False unlink_counter = 0 new_targets = [] try: text = refPage.get() ignoreReason = self.checkContents(text) if ignoreReason: pywikibot.output( '\n\nSkipping %s because it contains %s.\n\n' % (refPage.title(), ignoreReason)) else: include = True except pywikibot.IsRedirectPage: pywikibot.output(u'%s is a redirect to %s' % (refPage.title(), disambPage.title())) if disambPage.isRedirectPage(): target = self.alternatives[0] if pywikibot.input_yn(u'Do you want to make redirect %s point ' 'to %s?' % (refPage.title(), target), default=False, automatic_quit=False): redir_text = '#%s [[%s]]' \ % (self.mysite.redirect(), target) try: refPage.put(redir_text, summary=self.comment, asynchronous=True) except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.input_choice( u'Do you want to work on pages linking to %s?' % refPage.title(), [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n', automatic_quit=False) if choice == 'y': gen = ReferringPageGeneratorWithIgnore( refPage, self.primary, main_only=self.main_only) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' self.treat(refPage2, refPage) elif choice == 'c': text = refPage.get(get_redirect=True) include = "redirect" except pywikibot.NoPage: pywikibot.output( u'Page [[%s]] does not seem to exist?! Skipping.' % refPage.title()) include = False if include in (True, "redirect"): # save the original text so we can show the changes later original_text = text n = 0 curpos = 0 dn = False edited = False # This loop will run until we have finished the current page while True: m = self.linkR.search(text, pos=curpos) if not m: if n == 0: # No changes necessary for this disambiguation title. return 'nochange' else: # stop loop and save page break # Ensure that next time around we will not find this same hit. curpos = m.start() + 1 try: foundlink = pywikibot.Link(m.group('title'), disambPage.site) foundlink.parse() except pywikibot.Error: continue # ignore interwiki links if foundlink.site != disambPage.site: continue # Check whether the link found is to disambPage. try: if foundlink.canonical_title() != disambPage.title(): continue except pywikibot.Error: # must be a broken link pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" % (m.group('title'), refPage.title())) continue n += 1 # how many bytes should be displayed around the current link context = 60 # check if there's a dn-template here already if (self.dnSkip and self.dn_template_str and self.dn_template_str[:-2] in text[m.end():m.end() + len(self.dn_template_str) + 8]): continue edit = EditOption('edit page', 'e', text, m.start(), disambPage.title()) context_option = HighlightContextOption('more context', 'm', text, 60, start=m.start(), end=m.end()) context_option.before_question = True options = [ ListOption(self.alternatives, ''), ListOption(self.alternatives, 'r'), StandardOption('skip link', 's'), edit, StandardOption('next page', 'n'), StandardOption('unlink', 'u') ] if self.dn_template_str: # '?', '/' for old choice options += [ AliasOption('tag template %s' % self.dn_template_str, ['t', '?', '/']) ] options += [context_option] if not edited: options += [ ShowPageOption('show disambiguation page', 'd', m.start(), disambPage) ] options += [ OutputProxyOption('list', 'l', SequenceOutputter(self.alternatives)), AddAlternativeOption('add new', 'a', SequenceOutputter(self.alternatives)) ] if edited: options += [StandardOption('save in this form', 'x')] # TODO: Output context on each question answer = pywikibot.input_choice('Option', options, default=self.always, force=bool(self.always)) if answer == 'x': assert edited, 'invalid option before editing' break elif answer == 's': n -= 1 # TODO what's this for? continue elif answer == 'e': text = edit.new_text edited = True curpos = 0 continue elif answer == 'n': # skip this page if self.primary: # If run with the -primary argument, skip this # occurrence next time. self.primaryIgnoreManager.ignore(refPage) return 'nextpage' # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if answer == 't': assert self.dn_template_str # small chunk of text to search search_text = text[m.end():m.end() + context] # figure out where the link (and sentance) ends, put note # there end_of_word_match = re.search(r'\s', search_text) if end_of_word_match: position_split = end_of_word_match.start(0) else: position_split = 0 # insert dab needed template text = (text[:m.end() + position_split] + self.dn_template_str + text[m.end() + position_split:]) dn = True continue elif answer == 'u': # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] unlink_counter += 1 continue else: # Check that no option from above was missed assert isinstance(answer, tuple), 'only tuple answer left.' assert answer[0] in ['r', ''], 'only valid tuple answers.' if answer[0] == 'r': # we want to throw away the original link text replaceit = link_text == page_title elif include == "redirect": replaceit = True else: replaceit = False new_page_title = answer[1] repPl = pywikibot.Page( pywikibot.Link(new_page_title, disambPage.site)) if (new_page_title[0].isupper() or link_text[0].isupper()): new_page_title = repPl.title() else: new_page_title = repPl.title() new_page_title = first_lower(new_page_title) if new_page_title not in new_targets: new_targets.append(new_page_title) if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters # instead of a pipelink elif ((len(new_page_title) <= len(link_text)) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and (not section)): newlink = "[[%s]]%s" \ % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" \ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue # Todo: This line is unreachable (T155337) pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') else: pywikibot.output(u'\nThe following changes have been made:\n') pywikibot.showDiff(original_text, text) pywikibot.output(u'') # save the page self.setSummaryMessage(disambPage, new_targets, unlink_counter, dn) try: refPage.put(text, summary=self.comment, asynchronous=True) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return 'done'
def translate(page=None, hints=None, auto=True, removebrackets=False, site=None): """ Return a list of links to pages on other sites based on hints. Entries for single page titles list those pages. Page titles for entries such as "all:" or "xyz:" or "20:" are first built from the page title of 'page' and then listed. When 'removebrackets' is True, a trailing pair of brackets and the text between them is removed from the page title. If 'auto' is true, known year and date page titles are autotranslated to all known target languages and inserted into the list. """ result = set() assert page or site if site is None and page: site = page.site if hints: for h in hints: if ':' not in h: # argument given as -hint:xy where xy is a language code codes = h newname = '' else: codes, newname = h.split(':', 1) if newname == '': # if given as -hint:xy or -hint:xy:, assume that there should # be a page in language xy with the same title as the page # we're currently working on ... if page is None: continue newname = page.title(withNamespace=False) # ... unless we do want brackets if removebrackets: newname = re.sub( re.compile(r"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname) try: number = int(codes) codes = site.family.languages_by_size[:number] except ValueError: if codes == 'all': codes = site.family.languages_by_size elif codes in site.family.language_groups: codes = site.family.language_groups[codes] else: codes = codes.split(',') for newcode in codes: if newcode in site.languages(): if newcode != site.code: ns = page.namespace() if page else 0 x = pywikibot.Link(newname, site.getSite(code=newcode), defaultNamespace=ns) result.add(x) else: if config.verbose_output: pywikibot.output(u"Ignoring unknown language code %s" % newcode) # Autotranslate dates into all other languages, the rest will come from # existing interwiki links. if auto and page: # search inside all dictionaries for this link sitelang = page.site.code dictName, value = date.getAutoFormat(sitelang, page.title()) if dictName: if True: pywikibot.output( u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(), dictName, value)) for entryLang, entry in date.formats[dictName].items(): if entryLang not in site.languages(): continue if entryLang != sitelang: if True: newname = entry(value) x = pywikibot.Link( newname, pywikibot.Site(code=entryLang, fam=site.family)) result.add(x) return list(result)
def run(self): """Run the bot""" global destmap, catlist, catmap user = self.site.user() problems = [] newredirs = [] l = time.localtime() today = "%04d-%02d-%02d" % l[:3] edit_request_page = pywikibot.Page( self.site, u"User:%(user)s/category edit requests" % locals()) datafile = pywikibot.config.datafilepath("%s-catmovebot-data" % self.site.dbName()) try: inp = open(datafile, "rb") record = cPickle.load(inp) inp.close() except IOError: record = {} if record: cPickle.dump(record, open(datafile + ".bak", "wb"), -1) try: template_list = self.site.family.category_redirect_templates[ self.site.code] except KeyError: pywikibot.output(u"No redirect templates defined for %s" % self.site.sitename()) return # regex to match soft category redirects # note that any templates containing optional "category:" are # incorrect and will be fixed by the bot template_regex = re.compile( r"""{{\s*(?:%(prefix)s\s*:\s*)? # optional "template:" (?:%(template)s)\s*\| # catredir template name (\s*%(catns)s\s*:\s*)? # optional "category:" ([^|}]+) # redirect target cat (?:\|[^|}]*)*}} # optional arguments 2+, ignored """ % { 'prefix': self.site.namespace(10).lower(), 'template': "|".join(item.replace(" ", "[ _]+") for item in template_list), 'catns': self.site.namespace(14) }, re.I | re.X) # check for hard-redirected categories that are not already marked # with an appropriate template comment = i18n.twtranslate(self.site.lang, self.redir_comment) for page in pagegenerators.PreloadingGenerator(self.site.allpages( namespace=14, filterredir=True), step=250): # generator yields all hard redirect pages in namespace 14 if page.isCategoryRedirect(): # this is already a soft-redirect, so skip it (for now) continue try: target = page.getRedirectTarget() except pywikibot.CircularRedirect: target = page problems.append(u"# %s is a self-linked redirect" % page.title(asLink=True, textlink=True)) except RuntimeError: # race condition: someone else removed the redirect while we # were checking for it continue if target.namespace() == 14: # this is a hard-redirect to a category page newtext = (u"{{%(template)s|%(cat)s}}" % { 'cat': target.title(withNamespace=False), 'template': template_list[0] }) try: page.put(newtext, comment, minorEdit=True) self.log_text.append( u"* Added {{tl|%s}} to %s" % (template_list[0], page.title(asLink=True, textlink=True))) except pywikibot.Error as e: self.log_text.append( u"* Failed to add {{tl|%s}} to %s" % (template_list[0], page.title(asLink=True, textlink=True))) else: problems.append(u"# %s is a hard redirect to %s" % (page.title(asLink=True, textlink=True), target.title(asLink=True, textlink=True))) pywikibot.output("Done checking hard-redirect category pages.") comment = i18n.twtranslate(self.site.lang, self.move_comment) counts, destmap, catmap = {}, {}, {} catlist, nonemptypages = [], [] redircat = pywikibot.Category( pywikibot.Link( self.cat_redirect_cat[self.site.family.name][self.site.code], self.site)) # get a list of all members of the category-redirect category catpages = dict((c, None) for c in redircat.subcategories()) # check the category pages for redirected categories pywikibot.output(u"") pywikibot.output(u"Checking %s category redirect pages" % len(catpages)) for cat in catpages: cat_title = cat.title(withNamespace=False) if "category redirect" in cat_title: self.log_text.append(u"* Ignoring %s" % cat.title(asLink=True, textlink=True)) continue if hasattr(cat, "_catinfo"): # skip empty categories that don't return a "categoryinfo" key catdata = cat.categoryinfo if "size" in catdata and int(catdata['size']): # save those categories that have contents nonemptypages.append(cat) if cat_title not in record: # make sure every redirect has a record entry record[cat_title] = {today: None} try: newredirs.append("*# %s -> %s" % (cat.title(asLink=True, textlink=True), cat.getCategoryRedirectTarget().title( asLink=True, textlink=True))) except pywikibot.Error: pass # do a null edit on cat try: cat.put(cat.get(get_redirect=True)) except: pass # delete record entries for non-existent categories for cat_name in record.keys(): if pywikibot.Category(self.site, self.catprefix + cat_name) not in catpages: del record[cat_name] pywikibot.output(u"") pywikibot.output(u"Moving pages out of %s redirected categories." % len(nonemptypages)) for cat in pagegenerators.PreloadingGenerator(nonemptypages): try: if not cat.isCategoryRedirect(): self.log_text.append(u"* False positive: %s" % cat.title(asLink=True, textlink=True)) continue except pywikibot.Error: self.log_text.append(u"* Could not load %s; ignoring" % cat.title(asLink=True, textlink=True)) continue cat_title = cat.title(withNamespace=False) if not self.readyToEdit(cat): counts[cat_title] = None self.log_text.append(u"* Skipping %s; in cooldown period." % cat.title(asLink=True, textlink=True)) continue dest = cat.getCategoryRedirectTarget() if not dest.exists(): problems.append("# %s redirects to %s" % (cat.title(asLink=True, textlink=True), dest.title(asLink=True, textlink=True))) # do a null edit on cat to update any special redirect # categories this wiki might maintain try: cat.put(cat.get(get_redirect=True)) except: pass continue if dest.isCategoryRedirect(): double = dest.getCategoryRedirectTarget() if double == dest or double == cat: self.log_text.append( u"* Redirect loop from %s" % dest.title(asLink=True, textlink=True)) # do a null edit on cat try: cat.put(cat.get(get_redirect=True)) except: pass else: self.log_text.append( u"* Fixed double-redirect: %s -> %s -> %s" % (cat.title(asLink=True, textlink=True), dest.title(asLink=True, textlink=True), double.title(asLink=True, textlink=True))) oldtext = cat.get(get_redirect=True) # remove the old redirect from the old text, # leaving behind any non-redirect text oldtext = template_regex.sub("", oldtext) newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % { 'redirtemp': template_list[0], 'ncat': double.title(withNamespace=False) }) newtext = newtext + oldtext.strip() try: cat.put(newtext, i18n.twtranslate(self.site.lang, self.dbl_redir_comment), minorEdit=True) except pywikibot.Error as e: self.log_text.append("** Failed: %s" % e) continue found, moved = self.move_contents(cat_title, dest.title(withNamespace=False), editSummary=comment) if found is None: self.log_text.append(u"* [[:%s%s]]: error in move_contents" % (self.catprefix, cat_title)) elif found: record[cat_title][today] = found self.log_text.append(u"* [[:%s%s]]: %d found, %d moved" % (self.catprefix, cat_title, found, moved)) counts[cat_title] = found # do a null edit on cat try: cat.put(cat.get(get_redirect=True)) except: pass continue cPickle.dump(record, open(datafile, "wb"), -1) pywikibot.setAction( i18n.twtranslate(self.site.lang, self.maint_comment)) self.log_text.sort() problems.sort() newredirs.sort() self.log_page.put(u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n" % time.gmtime()[:6] + u"\n".join(self.log_text) + u"\n* New redirects since last report:\n" + u"\n".join(newredirs) + u"\n" + u"\n".join(problems) + u"\n" + self.get_log_text()) if self.edit_requests: edit_request_page.put( self.edit_request_text % { 'itemlist': u"\n" + u"\n".join((self.edit_request_item % item) for item in self.edit_requests) })
def parse_page_tuples(self, wikitext, user=None): """Parse page details apart from 'user:'******'li': current_user = None elif isinstance(node, mwparserfromhell.nodes.text.Text): if node.endswith('\n'): current_user = False elif isinstance(node, mwparserfromhell.nodes.wikilink.Wikilink): if current_user is False: pywikibot.debug( 'Link to "{0}" ignored as outside ' 'list'.format(node.title), _logger) continue obj = pywikibot.Link(node.title, self.site) if obj.namespace == -1: # the parser accepts 'special:prefixindex/' as a wildcard # this allows a prefix that doesnt match an existing page # to be a blue link, and can be clicked to see what pages # will be included in the whitelist name, sep, prefix = obj.title.partition('/') if name.lower() in self._prefixindex_aliases: if not prefix: if pywikibot.config.verbose_output: pywikibot.output(u'Whitelist everything') page = '' else: page = prefix if pywikibot.config.verbose_output: pywikibot.output(u'Whitelist prefixindex hack ' u'for: %s' % page) # p = pywikibot.Page(self.site, obj.target[20:]) # obj.namespace = p.namespace # obj.target = p.title() elif obj.namespace == 2 and not current_user: # if a target user hasn't been found yet, and the link is # 'user:'******'Whitelist user: %s' % current_user) continue else: page = obj.canonical_title() if current_user: if not user or current_user == user: if self.is_wikisource_author_page(page): if pywikibot.config.verbose_output: pywikibot.output('Whitelist author: %s' % page) page = LinkedPagesRule(page) else: if pywikibot.config.verbose_output: pywikibot.output(u'Whitelist page: %s' % page) if pywikibot.config.verbose_output: pywikibot.output('Adding {0}:{1}'.format( current_user, page)) whitelist[current_user].append(page) elif pywikibot.config.verbose_output: pywikibot.output(u'Discarding whitelist page for ' u'another user: %s' % page) else: raise Exception(u'No user set for page %s' % page) return dict(whitelist)
def treat(self, page, item): """Process a single page/item.""" self.current_page = page item.get() if set(self.fields.values()) <= set(item.claims.keys()): pywikibot.output( u'%s item %s has claims for all properties. Skipping' % (page, item.title())) return pagetext = page.get() templates = textlib.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) except pywikibot.exceptions.InvalidTitle: pywikibot.error( u"Failed parsing template; '%s' should be the template name." % template) continue # We found the template we were looking for if template in self.templateTitles: for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % claim.getID()) # TODO: Implement smarter approach to merging # harvested values with existing claims esp. # without overwriting humans unintentionally. else: if claim.type == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if not match: pywikibot.output( u'%s field %s value %s isnt a wikilink. Skipping' % (claim.getID(), field, value)) continue link_text = match.group(1) linked_item = self._template_link_target( item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type == 'string': claim.setTarget(value.strip()) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site( "commons", "commons") imagelink = pywikibot.Link(value, source=commonssite, defaultNamespace=6) image = pywikibot.FilePage(imagelink) if image.isRedirectPage(): image = pywikibot.FilePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) continue claim.setTarget(image) else: pywikibot.output( "%s is not a supported datatype." % claim.type) continue pywikibot.output( 'Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) # A generator might yield pages from multiple sites source = self.getSource(page.site) if source: claim.addSource(source, bot=True)
def new_title(self): """Return page object of the new title.""" if not hasattr(self, '_new_title'): self._new_title = pywikibot.Page( pywikibot.Link(self.data['move']['new_title'])) return self._new_title
def make_image_item(self, filename): commonssite = utils.create_site_instance("commons", "commons") imagelink = pywikibot.Link(filename, source=commonssite, defaultNamespace=6) return pywikibot.FilePage(imagelink)