def template_processor(self, page, introtext): """Process templates. Infoboxes in the introduction will be processed and removed. Other templates will just be removed (most often these are amboxes). page is needed as parameter for storing as result """ templates = pywikibot.extract_templates_and_params(introtext) for t in templates: if re.search(infobox, t[0]): # print t #debug only for k in t[1].keys(): # pywikibot.output(k) #debug only # pywikibot.output(t[1][k]) m = self.dateregexwithyear.search(t[1][k]) if m: # We have just found the date we are looking for :-) d = { 'page': page, 'year': int(m.group('year')), 'text': k + ' = ' + m.group() } self.data['infobox'].append(d) # pywikibot.output('\03{green}Bingó! ' + d['text'] + '\03{default}') # Removal (must be repeated for nested templates): while TEMP_REGEX.search(introtext): introtext = TEMP_REGEX.sub('',introtext) return introtext
def procesPage(self, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # Clean up template template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) # We found the template we were looking for if template in self.templateTitles: for field, value in fielddict.items(): field = field.strip() value = value.strip() # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % claim.getID()) # TODO FIXME: This is a very crude way of dupe # checking else: if claim.getType() == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if match: try: link = pywikibot.Link(match.group(1)) linkedPage = pywikibot.Page(link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() linkedItem = pywikibot.ItemPage.fromPage(linkedPage) claim.setTarget(linkedItem) except pywikibot.exceptions.NoPage: pywikibot.output('[[%s]] doesn\'t exist so I can\'t link to it' % (linkedItem.title(),)) continue elif claim.getType() == 'string': claim.setTarget(value.strip()) else: print "%s is not a supported datatype." % claim.getType() continue pywikibot.output('Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) # A generator might yield pages from multiple sites source = self.getSource(page.site.language()) if source: claim.addSource(source, bot=True)
def procesPage(self, page): """Process a single page/item.""" item = pywikibot.DataPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = self.fields[field] if claim in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim, )) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search( re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]' ), value) if match: try: link = match.group(1) linkedPage = pywikibot.Page( self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget( ) linkedItem = pywikibot.DataPage( linkedPage) pywikibot.output( 'Adding %s --> %s' % (claim, linkedItem.getID())) refs = self.setSource( self.site().language()) if refs: item.editclaim(str(claim), linkedItem.getID(), refs=set(refs)) else: item.editclaim( str(claim), linkedItem.getID()) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title())
def procesPage(self, page): """ Proces a single page """ item = pywikibot.DataPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = self.fields[field] if claim in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim,)) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search( re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]'), value) if match: try: link = match.group(1) linkedPage = pywikibot.Page(self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() linkedItem = pywikibot.DataPage(linkedPage) pywikibot.output('Adding %s --> %s' % (claim, linkedItem.getID())) if self.setSource(self.site().language()): item.editclaim( str(claim), linkedItem.getID(), refs={self.setSource( self.site().language())}) else: item.editclaim(str(claim), linkedItem.getID()) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title())
def removeTopTemplates(self, text): text = text.strip() templates = pywikibot.extract_templates_and_params(text) for name, params in templates: templateRegex = self.getRegexForTemplate(name, params) for templateText in re.findall(templateRegex, text): if text.startswith(templateText): return text[len(templateText):].strip() return text
def cosmeticChanges(self, sectionText): sectionText = sectionText.strip() templates = pywikibot.extract_templates_and_params(sectionText) for name, params in templates: if name in archiveConfig.templatesToSubstitute: templateRegex = self.getRegexForTemplate(name, params) for match in re.findall(templateRegex, sectionText): replaceText = u'{{subst:' + match[2:] sectionText = sectionText.replace(match, replaceText) return sectionText
def processCreatorTemplate(name, conf): trace = Trace(sys._getframe().f_code.co_name) site = pywikibot.Site() creator = pywikibot.Page(site, name) if creator.exists() == False: return u"" while creator.isRedirectPage(): creator = creator.getRedirectTarget() tls = pywikibot.extract_templates_and_params(creator.get()) for (template,params) in tls: print params if template != u"Creator": continue occupation = params[u"Occupation"] for valid in conf['validOccupations']: if occupation.find(valid) > -1: #print occupation return formatAuthor(name) + u" (" + conf['validOccupations'][valid] + u")" return u""
def procesPage(self, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for if template.replace(u'_', u' ')==self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output(u'A claim for %s already exists. Skipping' % (claim.getID(),)) #TODO FIXME: This is a very crude way of dupe checking else: # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if match: try: link = pywikibot.Link(match.group(1)) linkedPage = pywikibot.Page(link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() linkedItem = pywikibot.ItemPage.fromPage(linkedPage) claim.setTarget(linkedItem) pywikibot.output('Adding %s --> %s' % (claim.getID(), claim.getTarget().getID())) item.addClaim(claim) if self.source: claim.addSource(self.source, bot=True) except pywikibot.exceptions.NoPage: pywikibot.output('[[%s]] doesn\'t exist so I can\'t link to it' % (linkedItem.title(),))
def procesPage(self, site, page): """ Proces a single page """ pywikibot.output('Processing %s' % page) try: item = pywikibot.ItemPage.fromPage(page) except pywikibot.exceptions.NoPage: pywikibot.output(u'No wikidata for: %s ' % page) return if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) #pywikibot.output( u'Templates: %s' % templates) for (template, fielddict) in templates: # We found the template we were looking for linkedTemplate = pywikibot.Page(self.site, template, ns=10) try: if linkedTemplate.isRedirectPage(): template2 = linkedTemplate.getRedirectTarget().title() pywikibot.output( u'Template redirection from %s to %s' % (template, template2)) template = template2[9:] except pywikibot.exceptions.InvalidTitle: pywikibot.output("[[%s]] contains illegal char(s)" % template) if template.replace(u'_', u' ') == self.templateTitle: #pywikibot.output( u'Template: %s' % template) for field, value in fielddict.items(): # This field contains something useful for us field = field.strip() #pywikibot.output(' field <%s>' % field ) # pywikibot.output(' self.fields %s' % (field in self.fields)) if (value != "") and (field in self.fields): # Check if the property isn't already set #pywikibot.output(' attribut %s' % field) claim = self.fields[field] if claim[2:-2] in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim,)) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search(re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]'), value) #pywikibot.output(u' cherche %s ' % value) if True: try: value = value.strip() #Date treatement if claim == "[[P585]]" and value != "": try: pywikibot.output(u' Date: <%s> ' % value) laDate = parser.parse(value) pywikibot.output(u' Date: <%s> ' % laDate) repo = site.data_repository() # utile self.repo existe ? theClaim = pywikibot.Claim(repo, claim[2:-2]) # pywikibot.output(u' Year: %s, Month: %s, Day: %s ' % laDateText[0:3], laDateText[5:6], laDateText[7:8]) pywikibot.output('Adding %s --> %s' % (claim, laDate)) laDate = pywikibot.WbTime(year=laDate.year, month=laDate.month, day=laDate.day) theClaim.setTarget(laDate) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except ValueError: pywikibot.output(u' Impossible to parse this date : %s ' % value) continue continue if value[:2] == "[[" and value[-2:] == "]]": link = value[2:-2] else: link = value #pywikibot.output(u' link: <%s> ' % link) if link == "": continue #link = match.group(1) linkedPage = pywikibot.Page(self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() #pywikibot.output(u' linkedPage %s ' % linkedPage) linkedItem = pywikibot.ItemPage.fromPage(linkedPage) linkedItem.get() if not linkedItem.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % linkedPage) continue #value constraints treatement if (claim in self.valuesConstraints) and (linkedItem.getID() not in self.valuesConstraints[claim]): pywikibot.output(u'The value of the property %s is %s does not respect the constraint %s' % (claim, linkedItem.title(), self.valuesConstraints[claim])) continue #instance of constraint treatment if claim == "[[P541]]": linkedItem.get() # you need to call it to access any data. if linkedItem.claims and ('P31' in linkedItem.claims): if linkedItem.claims['P31'][0].getTarget().title(withNamespace=False) != "Q4164871": pywikibot.output(u'The P31 value is not Q4164871 but %s ' % linkedItem.claims['P31'][0].getTarget().title(withNamespace=True)) continue else: pywikibot.output(u'The P31 value is missing ') continue #pywikibot.output(u' linkedItem %s ' % linkedItem) #pywikibot.output(u' linkedItem.getID() %s ' % linkedItem.title()[1:]) pywikibot.output('Adding %s --> %s' % (claim, linkedItem.getID())) repo = site.data_repository() # utile self.repo existe ? theClaim = pywikibot.Claim(repo, claim[2:-2]) theClaim.setTarget(linkedItem) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedPage) except pywikibot.exceptions.InvalidTitle: pywikibot.output( "[[%s]] is an invalid title" % link)
def processPage(self, page): """ Process a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page return item.get() if set(self.fields.values()) <= set(item.claims.keys()): pywikibot.output(u'%s item %s has claims for all properties. Skipping' % (page, item.title())) else: pagetext = page.get() templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) except pywikibot.exceptions.InvalidTitle as e: pywikibot.error(u"Failed parsing template; '%s' should be the template name." % template) continue # We found the template we were looking for if template in self.templateTitles: for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % claim.getID()) # TODO: Implement smarter approach to merging # harvested values with existing claims esp. # without overwriting humans unintentionally. else: if claim.type == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if not match: pywikibot.output(u'%s field %s value %s isnt a wikilink. Skipping' % (claim.getID(), field, value)) continue link_text = match.group(1) linked_item = self._template_link_target(item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type == 'string': claim.setTarget(value.strip()) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(value, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output('[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(),)) continue claim.setTarget(image) else: pywikibot.output("%s is not a supported datatype." % claim.type) continue pywikibot.output('Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) # A generator might yield pages from multiple sites source = self.getSource(page.site) if source: claim.addSource(source, bot=True)
def procesPage(self, site, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for linkedTemplate = pywikibot.Page(self.site, template, ns=10) if linkedTemplate.isRedirectPage(): template2 = linkedTemplate.getRedirectTarget().title() pywikibot.output( u'Template redirection from %s to %s' % (template, template2)) template = template2[9:] if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us # pywikibot.output(' attribut %s' % field) if (value != "") and (field in self.fields): # Check if the property isn't already set claim = self.fields[field] if claim[2:-2] in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim,)) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search(re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]'), value) pywikibot.output(u' cherche %s ' % value) if True: try: #Date treatement #if claim == "[[P585]]": #try: #pywikibot.output(u' Date: %s ' % value) #pywikibot.output(u' Date: %s ' % parser.parse(value)) #theClaim = pywikibot.Claim(repo, claim[2:-2]) #theClaim.setTarget(parser.parse(value)) #item.addClaim(theClaim) #except ValueError #pywikibot.output(u' Impossible to parse this date : %s ' % value) #continue #continue if value[:2] == "[[" and value[-2:] == "]]": link = value[2:-2] else: link = value #link = match.group(1) linkedPage = pywikibot.Page(self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() pywikibot.output(u' linkedPage %s ' % linkedPage) linkedItem = pywikibot.ItemPage.fromPage(linkedPage) linkedItem.get() if not linkedItem.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % linkedPage) continue #value constraints treatement if (claim in self.valuesConstraints) and (linkedItem.getID() not in self.valuesConstraints[claim]): pywikibot.output(u'The value of the property %s is %s does not respect the constraint %s' % (claim, linkedItem.title(), self.valuesConstraints[claim])) continue #pywikibot.output(u' linkedItem %s ' % linkedItem) pywikibot.output(u' linkedItem.getID() %s ' % linkedItem.title()[1:]) pywikibot.output('Adding %s --> %s' % (claim, linkedItem.getID())) repo = site.data_repository() # utile self.repo existe ? theClaim = pywikibot.Claim(repo, claim[2:-2]) theClaim.setTarget(linkedItem) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title()) except pywikibot.exceptions.InvalidTitle: pywikibot.output( "[[%s]] is an invalid title" % link)
def procesPage(self, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % claim.getID()) # TODO FIXME: This is a very crude way of dupe # checking else: if claim.getType() == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if match: try: link = pywikibot.Link( match.group(1)) linkedPage = pywikibot.Page(link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget( ) linkedItem = pywikibot.ItemPage.fromPage( linkedPage) claim.setTarget(linkedItem) except pywikibot.exceptions.NoPage: pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (linkedItem.title(), )) continue elif claim.getType() == 'string': claim.setTarget(value.strip()) else: print "%s is not a supported datatype." % claim.getType( ) continue pywikibot.output( 'Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) if self.source: claim.addSource(self.source, bot=True)
def procesPage(self, site, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for linkedTemplate = pywikibot.Page(self.site, template, ns=10) if linkedTemplate.isRedirectPage(): template2 = linkedTemplate.getRedirectTarget().title() pywikibot.output(u'Template redirection from %s to %s' % (template, template2)) template = template2[9:] if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us # pywikibot.output(' attribut %s' % field) if (value != "") and (field in self.fields): # Check if the property isn't already set claim = self.fields[field] if claim[2:-2] in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim, )) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search( re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]' ), value) pywikibot.output(u' cherche %s ' % value) if True: try: #Date treatement #if claim == "[[P585]]": #try: #pywikibot.output(u' Date: %s ' % value) #pywikibot.output(u' Date: %s ' % parser.parse(value)) #theClaim = pywikibot.Claim(repo, claim[2:-2]) #theClaim.setTarget(parser.parse(value)) #item.addClaim(theClaim) #except ValueError #pywikibot.output(u' Impossible to parse this date : %s ' % value) #continue #continue if value[:2] == "[[" and value[ -2:] == "]]": link = value[2:-2] else: link = value #link = match.group(1) linkedPage = pywikibot.Page( self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget( ) pywikibot.output( u' linkedPage %s ' % linkedPage) linkedItem = pywikibot.ItemPage.fromPage( linkedPage) linkedItem.get() if not linkedItem.exists(): pywikibot.output( '%s doesn\'t have a wikidata item :' % linkedPage) continue #value constraints treatement if (claim in self.valuesConstraints ) and (linkedItem.getID() not in self. valuesConstraints[claim]): pywikibot.output( u'The value of the property %s is %s does not respect the constraint %s' % (claim, linkedItem.title(), self.valuesConstraints[claim]) ) continue #pywikibot.output(u' linkedItem %s ' % linkedItem) pywikibot.output( u' linkedItem.getID() %s ' % linkedItem.title()[1:]) pywikibot.output( 'Adding %s --> %s' % (claim, linkedItem.getID())) repo = site.data_repository( ) # utile self.repo existe ? theClaim = pywikibot.Claim( repo, claim[2:-2]) theClaim.setTarget(linkedItem) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title()) except pywikibot.exceptions.InvalidTitle: pywikibot.output( "[[%s]] is an invalid title" % link)
def processPage(self, page): """ Process a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page return item.get() if set(self.fields.values()) <= set(item.claims.keys()): pywikibot.output( u'%s item %s has claims for all properties. Skipping' % (page, item.title())) else: pagetext = page.get() templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # Clean up template try: template = pywikibot.Page(page.site, template, ns=10).title(withNamespace=False) except pywikibot.exceptions.InvalidTitle as e: pywikibot.error( u"Failed parsing template; '%s' should be the template name." % template) continue # We found the template we were looking for if template in self.templateTitles: for field, value in fielddict.items(): field = field.strip() value = value.strip() if not field or not value: continue # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = pywikibot.Claim(self.repo, self.fields[field]) if claim.getID() in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % claim.getID()) # TODO: Implement smarter approach to merging # harvested values with existing claims esp. # without overwriting humans unintentionally. else: if claim.type == 'wikibase-item': # Try to extract a valid page match = re.search(pywikibot.link_regex, value) if not match: pywikibot.output( u'%s field %s value %s isnt a wikilink. Skipping' % (claim.getID(), field, value)) continue link_text = match.group(1) linked_item = self._template_link_target( item, link_text) if not linked_item: continue claim.setTarget(linked_item) elif claim.type == 'string': claim.setTarget(value.strip()) elif claim.type == 'commonsMedia': commonssite = pywikibot.Site( "commons", "commons") imagelink = pywikibot.Link( value, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) continue claim.setTarget(image) else: pywikibot.output( "%s is not a supported datatype." % claim.type) continue pywikibot.output( 'Adding %s --> %s' % (claim.getID(), claim.getTarget())) item.addClaim(claim) # A generator might yield pages from multiple sites source = self.getSource(page.site) if source: claim.addSource(source, bot=True)