def test_methods(self): """Test basic Page methods on a Flow page.""" site = self.get_site() page = pywikibot.Page(site, u'Talk:Sandbox') self.assertEqual(page.exists(), True) page.get() self.assertEqual(page.isRedirectPage(), False)
def setSource(self, lang): ''' Get the source ''' page = pywikibot.Page(self.repo, 'Wikidata:List of wikis/python') source_values = json.loads(page.get()) source_values = source_values['wikipedia'] for langue in source_values: source_values[langue] = pywikibot.ItemPage(self.repo, source_values[langue]) if lang in source_values: self.source = pywikibot.Claim(self.repo, 'p143') self.source.setTarget(source_values.get(lang))
def analyse_une_section(self, page, match_debut): # TODO : - gérer ou du moins éviter les problèmes en cas de doublons de titres. text = page.get() # Permet de ne garder que le texte contenant les requêtes à étudier, # car plusieurs sections se trouvent sur la même page. if match_debut == u'Requêtes en cours d\'examen': text = text[0:text.index(u"= Requêtes à traiter =")] elif match_debut == u'Requêtes à traiter': text = text[text.index(u"= Requêtes à traiter ="):] titres = complements.extract_titles( text, beginning=None, match_title=self.match_titre_requete) sections = complements.extract_sections(text, titres) return {'titres': titres, 'sections': sections}
def analyse_une_section(self, page, match_debut): # TODO : - gérer ou du moins éviter les problèmes en cas de doublons de titres. text = page.get() # Permet de ne garder que le texte contenant les requêtes à étudier, # car plusieurs sections se trouvent sur la même page. if match_debut == u'Requêtes en cours d\'examen': text = text[0:text.index(u"= Requêtes à traiter =")] elif match_debut == u'Requêtes à traiter': text = text[text.index(u"= Requêtes à traiter ="):] titres = complements.extract_titles(text, beginning = None, match_title = self.match_titre_requete) sections = complements.extract_sections(text, titres) return { 'titres': titres, 'sections': sections }
def main(): locale.setlocale(locale.LC_ALL, 'fr_FR.utf8') site = pywikibot.Site() now = datetime.now() page = pywikibot.Page(site, u"Wikipédia:Le Bistro/%i %s" % (int(now.strftime("%d")), now.strftime("%B %Y").decode('utf-8'))) text = page.get() text_part = text[text.index(u"\n== Aujourd'hui, dans Wikipédia =="):] text_part_old = text_part text_part = text_part.replace(u"Actuellement, Wikipédia compte", u"Le ~~~~~, Wikipédia comptait") text_part = text_part.replace(u"{{NUMBEROFARTICLES}}", u"{{subst:NUMBEROFARTICLES}}") text_part = text_part.replace(u"{{Nombre d'articles de qualité}}", u"{{subst:formatnum:{{subst:#expr:{{subst:PAGESINCATEGORY:Article de qualité|R}}-3}}}}") text_part = text_part.replace(u"{{Nombre de bons articles}}", u"{{subst:formatnum:{{subst:#expr:{{subst:PAGESINCATEGORY:Bon article|R}}-3}}}}") text_part = text_part.replace(u"{{Nombre d'articles géolocalisés sur Terre}}", u"{{subst:formatnum:{{subst:#expr:{{subst:PAGESINCATEGORY:Article géolocalisé sur Terre|R}}}}}}") text = text.replace(text_part_old, text_part) page.put(text, comment = u"Statistiques fixes dans la section [[#Aujourd.27hui.2C_dans_Wikip.C3.A9dia|#Aujourd'hui, dans Wikipédia]]")
def main(): locale.setlocale(locale.LC_ALL, 'fr_FR.utf8') site = pywikibot.Site() now = datetime.now() page = pywikibot.Page( site, u"Wikipédia:Le Bistro/%i %s" % (int(now.strftime("%d")), now.strftime("%B %Y").decode('utf-8'))) text = page.get() text_part = text[text.index(u"\n== Aujourd'hui, dans Wikipédia =="):] text_part_old = text_part text_part = text_part.replace(u"Actuellement, Wikipédia compte", u"Le ~~~~~, Wikipédia comptait") text_part = text_part.replace(u"{{NUMBEROFARTICLES}}", u"{{subst:NUMBEROFARTICLES}}") text_part = text_part.replace( u"{{Nombre d'articles de qualité}}", u"{{subst:formatnum:{{subst:#expr:{{subst:PAGESINCATEGORY:Article de qualité|R}}-3}}}}" ) text_part = text_part.replace( u"{{Nombre de bons articles}}", u"{{subst:formatnum:{{subst:#expr:{{subst:PAGESINCATEGORY:Bon article|R}}-3}}}}" ) text_part = text_part.replace( u"{{Nombre d'articles géolocalisés sur Terre}}", u"{{subst:formatnum:{{subst:#expr:{{subst:PAGESINCATEGORY:Article géolocalisé sur Terre|R}}}}}}" ) text_part = text_part.replace(u"{{Wikipédia:Le Bistro/Labels}}", u"{{subst:Wikipédia:Le Bistro/Labels}}") text_part = text_part.replace(u"{{Wikipédia:Le Bistro/Test}}", u"{{subst:Wikipédia:Le Bistro/Test}}") text = text.replace(text_part_old, text_part) page.put( text, comment= u"Bot: Substitution des modèles afin de rendre fixes les statistiques fixes dans la section [[#Aujourd.27hui.2C_dans_Wikip.C3.A9dia|#Aujourd'hui, dans Wikipédia]]" )
def procesPage(self, site, page): """ Proces a single page """ pywikibot.output('Processing %s' % page) try: item = pywikibot.ItemPage.fromPage(page) except pywikibot.exceptions.NoPage: pywikibot.output(u'No wikidata for: %s ' % page) return if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) #pywikibot.output( u'Templates: %s' % templates) for (template, fielddict) in templates: # We found the template we were looking for linkedTemplate = pywikibot.Page(self.site, template, ns=10) try: if linkedTemplate.isRedirectPage(): template2 = linkedTemplate.getRedirectTarget().title() pywikibot.output( u'Template redirection from %s to %s' % (template, template2)) template = template2[9:] except pywikibot.exceptions.InvalidTitle: pywikibot.output("[[%s]] contains illegal char(s)" % template) if template.replace(u'_', u' ') == self.templateTitle: #pywikibot.output( u'Template: %s' % template) for field, value in fielddict.items(): # This field contains something useful for us field = field.strip() #pywikibot.output(' field <%s>' % field ) # pywikibot.output(' self.fields %s' % (field in self.fields)) if (value != "") and (field in self.fields): # Check if the property isn't already set #pywikibot.output(' attribut %s' % field) claim = self.fields[field] if claim[2:-2] in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim,)) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search(re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]'), value) #pywikibot.output(u' cherche %s ' % value) if True: try: value = value.strip() #Date treatement if claim == "[[P585]]" and value != "": try: pywikibot.output(u' Date: <%s> ' % value) laDate = parser.parse(value) pywikibot.output(u' Date: <%s> ' % laDate) repo = site.data_repository() # utile self.repo existe ? theClaim = pywikibot.Claim(repo, claim[2:-2]) # pywikibot.output(u' Year: %s, Month: %s, Day: %s ' % laDateText[0:3], laDateText[5:6], laDateText[7:8]) pywikibot.output('Adding %s --> %s' % (claim, laDate)) laDate = pywikibot.WbTime(year=laDate.year, month=laDate.month, day=laDate.day) theClaim.setTarget(laDate) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except ValueError: pywikibot.output(u' Impossible to parse this date : %s ' % value) continue continue if value[:2] == "[[" and value[-2:] == "]]": link = value[2:-2] else: link = value #pywikibot.output(u' link: <%s> ' % link) if link == "": continue #link = match.group(1) linkedPage = pywikibot.Page(self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() #pywikibot.output(u' linkedPage %s ' % linkedPage) linkedItem = pywikibot.ItemPage.fromPage(linkedPage) linkedItem.get() if not linkedItem.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % linkedPage) continue #value constraints treatement if (claim in self.valuesConstraints) and (linkedItem.getID() not in self.valuesConstraints[claim]): pywikibot.output(u'The value of the property %s is %s does not respect the constraint %s' % (claim, linkedItem.title(), self.valuesConstraints[claim])) continue #instance of constraint treatment if claim == "[[P541]]": linkedItem.get() # you need to call it to access any data. if linkedItem.claims and ('P31' in linkedItem.claims): if linkedItem.claims['P31'][0].getTarget().title(withNamespace=False) != "Q4164871": pywikibot.output(u'The P31 value is not Q4164871 but %s ' % linkedItem.claims['P31'][0].getTarget().title(withNamespace=True)) continue else: pywikibot.output(u'The P31 value is missing ') continue #pywikibot.output(u' linkedItem %s ' % linkedItem) #pywikibot.output(u' linkedItem.getID() %s ' % linkedItem.title()[1:]) pywikibot.output('Adding %s --> %s' % (claim, linkedItem.getID())) repo = site.data_repository() # utile self.repo existe ? theClaim = pywikibot.Claim(repo, claim[2:-2]) theClaim.setTarget(linkedItem) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedPage) except pywikibot.exceptions.InvalidTitle: pywikibot.output( "[[%s]] is an invalid title" % link)
def procesPage(self, site, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for linkedTemplate = pywikibot.Page(self.site, template, ns=10) if linkedTemplate.isRedirectPage(): template2 = linkedTemplate.getRedirectTarget().title() pywikibot.output( u'Template redirection from %s to %s' % (template, template2)) template = template2[9:] if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us # pywikibot.output(' attribut %s' % field) if (value != "") and (field in self.fields): # Check if the property isn't already set claim = self.fields[field] if claim[2:-2] in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim,)) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search(re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]'), value) pywikibot.output(u' cherche %s ' % value) if True: try: #Date treatement #if claim == "[[P585]]": #try: #pywikibot.output(u' Date: %s ' % value) #pywikibot.output(u' Date: %s ' % parser.parse(value)) #theClaim = pywikibot.Claim(repo, claim[2:-2]) #theClaim.setTarget(parser.parse(value)) #item.addClaim(theClaim) #except ValueError #pywikibot.output(u' Impossible to parse this date : %s ' % value) #continue #continue if value[:2] == "[[" and value[-2:] == "]]": link = value[2:-2] else: link = value #link = match.group(1) linkedPage = pywikibot.Page(self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget() pywikibot.output(u' linkedPage %s ' % linkedPage) linkedItem = pywikibot.ItemPage.fromPage(linkedPage) linkedItem.get() if not linkedItem.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % linkedPage) continue #value constraints treatement if (claim in self.valuesConstraints) and (linkedItem.getID() not in self.valuesConstraints[claim]): pywikibot.output(u'The value of the property %s is %s does not respect the constraint %s' % (claim, linkedItem.title(), self.valuesConstraints[claim])) continue #pywikibot.output(u' linkedItem %s ' % linkedItem) pywikibot.output(u' linkedItem.getID() %s ' % linkedItem.title()[1:]) pywikibot.output('Adding %s --> %s' % (claim, linkedItem.getID())) repo = site.data_repository() # utile self.repo existe ? theClaim = pywikibot.Claim(repo, claim[2:-2]) theClaim.setTarget(linkedItem) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title()) except pywikibot.exceptions.InvalidTitle: pywikibot.output( "[[%s]] is an invalid title" % link)
def procesPage(self, site, page): """ Proces a single page """ item = pywikibot.ItemPage.fromPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for linkedTemplate = pywikibot.Page(self.site, template, ns=10) if linkedTemplate.isRedirectPage(): template2 = linkedTemplate.getRedirectTarget().title() pywikibot.output(u'Template redirection from %s to %s' % (template, template2)) template = template2[9:] if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us # pywikibot.output(' attribut %s' % field) if (value != "") and (field in self.fields): # Check if the property isn't already set claim = self.fields[field] if claim[2:-2] in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim, )) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search( re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]' ), value) pywikibot.output(u' cherche %s ' % value) if True: try: #Date treatement #if claim == "[[P585]]": #try: #pywikibot.output(u' Date: %s ' % value) #pywikibot.output(u' Date: %s ' % parser.parse(value)) #theClaim = pywikibot.Claim(repo, claim[2:-2]) #theClaim.setTarget(parser.parse(value)) #item.addClaim(theClaim) #except ValueError #pywikibot.output(u' Impossible to parse this date : %s ' % value) #continue #continue if value[:2] == "[[" and value[ -2:] == "]]": link = value[2:-2] else: link = value #link = match.group(1) linkedPage = pywikibot.Page( self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget( ) pywikibot.output( u' linkedPage %s ' % linkedPage) linkedItem = pywikibot.ItemPage.fromPage( linkedPage) linkedItem.get() if not linkedItem.exists(): pywikibot.output( '%s doesn\'t have a wikidata item :' % linkedPage) continue #value constraints treatement if (claim in self.valuesConstraints ) and (linkedItem.getID() not in self. valuesConstraints[claim]): pywikibot.output( u'The value of the property %s is %s does not respect the constraint %s' % (claim, linkedItem.title(), self.valuesConstraints[claim]) ) continue #pywikibot.output(u' linkedItem %s ' % linkedItem) pywikibot.output( u' linkedItem.getID() %s ' % linkedItem.title()[1:]) pywikibot.output( 'Adding %s --> %s' % (claim, linkedItem.getID())) repo = site.data_repository( ) # utile self.repo existe ? theClaim = pywikibot.Claim( repo, claim[2:-2]) theClaim.setTarget(linkedItem) item.addClaim(theClaim) if self.source: theClaim.addSource(self.source, bot=True) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title()) except pywikibot.exceptions.InvalidTitle: pywikibot.output( "[[%s]] is an invalid title" % link)