def makeList(self): self.site = pywikibot.Site() self.cat = pywikibot.Category(self.site, 'Category:Nuclear power') self.gen = pagegenerators.CategorizedPageGenerator(self.cat) self.err = False someList = [] for page in self.gen: someList.append(page) self.cat = pywikibot.Category(self.site, 'Category:Nuclear technology') self.gen = pagegenerators.CategorizedPageGenerator(self.cat) for page in self.gen: someList.append(page) return someList
def getCategory(cat): tempDict = [] category = pywikibot.Category(site, cat) if cat == 'Kategoria:Szablony odmiany': categoryGen = pagegenerators.CategorizedPageGenerator(category, recurse=1) else: categoryGen = pagegenerators.CategorizedPageGenerator(category) for template in categoryGen: if template.namespace() == 'Template:': if cat != 'Kategoria:Szablony odmiany': tempDict.append('{{%s}}' % template.title(withNamespace=False)) else: tempDict.append('%s' % template.title(withNamespace=False)) return tempDict
def result(): if request.method == 'POST': catname = 'Catégorie:Portail:' + request.form[ 'portal'] + '/Articles liés' wikisite = pywikibot.Site('fr', u'wikipedia') wikicat = pywikibot.Category(wikisite, catname) pages = pagegenerators.CategorizedPageGenerator(wikicat, recurse=False) listArticles = [] for page in pages: title = page.title() print(title) # On recherche le contenu du modèle Portail paramscount = getTemplateContentCount('Portail', page) # Si le modèle n'a qu'un seul paramètre, on l'ajoute à la liste if paramscount == 1: listArticles.append(title) print("compteur : " + str(len(listArticles))) message = "Recherche récursive d'articles à un seul portail dans la Catégorie:" + catname + " : " message = message + str(len(listArticles)) + " article(s) concerné(s)" return render_template("result.html", message=message, articles=listArticles)
def main(site, Wikiproject, Category, Threshold): cat = pywikibot.Category(site, Category) gen = pagegenerators.CategorizedPageGenerator(cat) redlinks = {} for page in gen: article = page.toggleTalkPage() linkgen = article.linkedPages() for link in linkgen: if link.exists() != True: if link.title() in redlinks: redlinks[link.title()] = redlinks[link.title()] + 1 else: redlinks[link.title()] = 1 # Writing output to page listpage = pywikibot.Page( site, 'User:ProjectRequestedPagesBot/Most Requested ' + Wikiproject + ' pages') entries = len(redlinks) if entries < 1: listpage.put('No redlinks found', summary='No redlinks found', minorEdit=False) else: text = createPage(redlinks, entries, Threshold) listpage.put(text, summary='Adding the ' + entries + ' most requested articles in the ' + Wikiproject + ' scope', minorEdit=False) print text
def get_authors_list(): authors = [] category = pywikibot.Category(site, 'Categoria:Autori') gen = pagegenerators.CategorizedPageGenerator(category) for page in pagegenerators.PreloadingGenerator(gen): authors.append(page.title().replace('Autore:', '')) return authors
def main(*args): site = pywikibot.Site(u'en', u'wikipedia') repo = site.data_repository() # this is a DataSite object site.login() cat = pywikibot.Category(site, 'Category:Articles with short description') gen = pagegenerators.CategorizedPageGenerator(cat) f = open('WD.txt', 'w+') blacklist = ['16th Brigade (Australia)'] for page in gen: if page.title() in blacklist: continue try: item = pywikibot.ItemPage.fromPage( page) # this can be used for any page object item.get() except: f.write("%s\n" % page.title()) continue try: if not item.descriptions['en']: print(page.title()) addDesc(page, item) except KeyError: print(page.title()) addDesc(page, item) f.close()
async def download_category(category, search_depth, limit_number_of_articles, output_path): """Creates an article generator based on a specific category and iterates over it to write article. Parameters ---------- category : Category Name of the category search_depth: int Depth of search limit_number_of_articles: int Limits the number of total articles output_path: str Output folder """ logger.info("Downloading: " + category.title()) category_title = category.title().replace(":", "_") generated_pages = pagegenerators.CategorizedPageGenerator( category, recurse=search_depth, total=limit_number_of_articles) article_idx = 1 for page in tqdm(generated_pages): text = page.text await write_to_file(text, category_title, article_idx, output_path) article_idx = article_idx + 1
def main(): """Main loop. Get a generator and options. Work on all images in the generator.""" generator = None onlyFilter = False onlyUncat = False # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() global search_wikis global hint_wiki for arg in local_args: if arg == '-onlyfilter': onlyFilter = True elif arg == '-onlyuncat': onlyUncat = True elif arg.startswith('-hint:'): hint_wiki = arg[len('-hint:'):] elif arg.startswith('-onlyhint'): search_wikis = arg[len('-onlyhint:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: site = pywikibot.Site(u'commons', u'commons') generator = pagegenerators.CategorizedPageGenerator(pywikibot.Category( site, u'Category:Media needing categories'), recurse=True) initLists() categorizeImages(generator, onlyFilter, onlyUncat) pywikibot.output(u'All done')
def files_to_check(start: Optional[str] = None) -> pywikibot.page.BasePage: """Iterate list of files needing review from Commons""" category = pywikibot.Category(site, "Category:INaturalist review needed") for page in pagegenerators.CategorizedPageGenerator(category, namespaces=6, start=start): yield page
def main(): cat = pywikibot.Category(swwsite, "Categoria:Páginas sendo editadas") gen = pagegenerators.CategorizedPageGenerator(cat, recurse=False) dataLimite = 60 * 60 * 24 * 30 * 3 for page in gen: nome = page.title() if (nome == "Predefinição:Emuso/preload"): continue pywikibot.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title()) historico = page.revisions(reverse=True, content=True) jaAchei = False retirando = False for hist in historico: dataAki = str(hist.timestamp).split("T")[0] dataUTC = time.mktime( datetime.datetime.strptime(dataAki, "%Y-%m-%d").timetuple()) dataAgora = time.time() dataDif = int(dataAgora) - int(dataUTC) textoAki = hist.text textoAkiL = textoAki.lower() if (textoAkiL.find("{{emuso") > -1 and jaAchei == False): print("Achei!") jaAchei = True userAki = hist.user print(" ID: " + str(hist.revid) + "\n User: "******"\n Qndo: " + str(hist.timestamp)) userEmuso = hist.user if (dataDif > dataLimite): retirando = True else: pywikibot.output( "\03{lightgreen}Mantendo esta {{Emuso}}...\03{default}" ) retirando = False break elif (jaAchei == True and hist.user == userEmuso): if (dataDif > dataLimite): retirando = True else: pywikibot.output( "\03{lightgreen}Mantendo esta {{Emuso}}...\03{default}" ) retirando = False break else: print("Ainda não... ") if (retirando == True): pywikibot.output( "\03{red}Partiu retirar esta {{Emuso}}\03{default}!") conteudo = page.text if (conteudo.find("{{emuso|") > -1): conteudo = conteudo.replace("{{emuso", "{{Emuso") auxiliarEmuso = conteudo.split("{{Emuso") novoTexto = auxiliarEmuso[0] auxiliar = auxiliarEmuso[1].split("}}") novoTexto += "}}".join(auxiliar[1:]) page.text = novoTexto page.save(u'([[User:Thales César|Thales]]) 5.3 Emuso removida') time.sleep(3)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments :type args: str """ onlyUncat = False # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) site = pywikibot.Site('commons', 'commons') genFactory = pagegenerators.GeneratorFactory(site=site) for arg in local_args: if arg == '-onlyuncat': onlyUncat = True else: genFactory.handle_arg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = pagegenerators.CategorizedPageGenerator(pywikibot.Category( site, 'Media needing categories'), recurse=True) initLists() categorizeImages(generator, onlyUncat) pywikibot.output('All done')
def add_template(cat): gen = pagegenerators.CategorizedPageGenerator(pywikibot.Category( SITE, cat)) summary = "Adding {{Do not crop}} template, requested by Pigsonthewing @ commons.wikimedia.org/w/index.php?title=Commons:Bots/Work_requests&oldid=414102776" for file in gen: file_name = file.title() if file_name.startswith("File:"): page = pywikibot.Page(SITE, file_name) old_rev = page.oldest_revision if 'crop' in (old_rev.comment).lower(): out('already cropped', color="white") continue old_text = page.get() if 'do not crop' in old_text.lower(): out('already marked with DNC', color="white") continue end = findEndOfTemplate(old_text, "[Ss]pecimen") new_text = (old_text[:end] + "\n{{Do not crop}}\n" + old_text[end:]) try: commit( old_text, new_text, page, summary, ) except: continue
def main(): site = pywikibot.Site("es", "wikipedia") cat = pywikibot.Category(site, 'Categoría:Wikipedia:Fusionar') gen = pg.CategorizedPageGenerator(cat, namespaces=[0, 100]) articles = pg.PreloadingGenerator(gen) for page in articles: print("<<<<<<<<<<<<<< {0} ".format(page.title())) page_str = page.get() tmpl_list = pywikibot.textlib.extract_templates_and_params(page_str) templates_to_check = filter( lambda x: x[0].lower() in categories_fusionar, tmpl_list) for tipo in templates_to_check: plantilla, parametros = tipo if '1' not in parametros: tools.write(data='[!!] Falta parámetro en {0}'.format( str(page.title())), filename="fusionar.txt") continue else: try: nueva_pagina = pywikibot.Page(site, str(parametros['1'])) if(nueva_pagina.exists() == False): tools.write(data=str(page.title()), filename="fusionar.txt") else: print('La página {0} existe'.format( nueva_pagina.title())) except Exception: tools.write(data='[!] Error en {0}'.format( str(page.title())), filename="fusionar.txt")
def main(): cat = pywikibot.Category(site, 'Category:Courses') gen = pagegenerators.CategorizedPageGenerator(cat) global allcourses allcourses = {i.title()[:7]: i for i in gen} # allcourses = dict(allcourses.items()[0:20]) # Update existing courses alreadyExistingGrades = [] print 'Fetching existing grades' with progressbar.ProgressBar(max_value=len(allcourses)) as bar: for n, i in enumerate(allcourses): if re.findall(r'{{Grades.*[0-9].* }}', allcourses[i].text, re.DOTALL): alreadyExistingGrades.append(i) bar.update(n) for code in alreadyExistingGrades: try: if not currentGradesOnWiki(code) == newGrades[code]['grades']: updateGrades(code) except: pass # Add grades for new courses notExistingGrades = [ i for i in allcourses if i in newGrades and i not in alreadyExistingGrades ] for code in notExistingGrades: addGrades(code)
def createPageGenerator( self, firstPageTitle) -> Generator[pywikibot.Page, None, None]: """Generator to retrieve misspelling pages or misspelling redirects.""" mycode = self.site.code if mycode in self.misspellingCategory: categories = self.misspellingCategory[mycode] if isinstance(categories, UnicodeType): categories = (categories, ) generators = (pagegenerators.CategorizedPageGenerator( pywikibot.Category(self.site, misspellingCategoryTitle), recurse=True, start=firstPageTitle) for misspellingCategoryTitle in categories) elif mycode in self.misspellingTemplate: templates = self.misspellingTemplate[mycode] if isinstance(templates, UnicodeType): templates = (templates, ) generators = (pagegenerators.ReferringPageGenerator( pywikibot.Page(self.site, misspellingTemplateName, ns=10), onlyTemplateInclusion=True) for misspellingTemplateName in templates) if firstPageTitle: pywikibot.output( '-start parameter unsupported on this wiki because there ' 'is no category for misspellings.') else: pywikibot.output(HELP_MSG.format(site=self.site)) empty_gen = (i for i in []) return empty_gen generator = chain(*generators) preloadingGen = pagegenerators.PreloadingGenerator(generator) return preloadingGen
def main(): data = '20110310' site = pywikibot.getSite() cat = Category(site, 'Kategoria:francuski (indeks)') lista = pagegenerators.CategorizedPageGenerator(cat) #lista_stron1 = xmlreader.XmlDump('plwiktionary-%s-pages-articles.xml' % data) #lista = xmlreader.XmlDump.parse(lista_stron1) for a in lista: h = Haslo(a.title()) #h = HasloXML(a.title, a.text) if h.type != 4 and ' ' in h.title: h.langs() for c in h.list_lang: c.pola() if c.type != 2 and c.lang == 'hiszpański': if ('rzeczownik' in c.znaczenia.tresc) and ( 'rzeczownika' not in c.znaczenia.tresc): print('\n' + h.title) text = '*[[%s]]\n' % h.title file = open("log/rzeczownik.txt", 'a') file.write(text.encode("utf-8")) file.close
def main(): global mysite, linktrail, page start = [] for arg in pywikibot.handleArgs(): start.append(arg) if start: start = " ".join(start) else: start = "!" mysite = pywikibot.Site() linktrail = mysite.linktrail() try: generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start) except pywikibot.NoPage: pywikibot.output( "The bot does not know the disambiguation category for your wiki.") raise # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: if page.isRedirectPage(): continue linked = page.linkedPages() pagestodo.append((page, linked)) pagestoload += linked if len(pagestoload) > 49: pagestoload = pagegenerators.PreloadingGenerator(pagestoload) for page, links in pagestodo: workon(page, links) pagestoload = [] pagestodo = []
def createPageGenerator(self, firstPageTitle): mysite = pywikibot.Site() mylang = mysite.code if mylang in self.misspellingCategory: misspellingCategoryTitle = self.misspellingCategory[mylang] misspellingCategory = pywikibot.Category(mysite, misspellingCategoryTitle) generator = pagegenerators.CategorizedPageGenerator( misspellingCategory, recurse=True, start=firstPageTitle) elif mylang in self.misspellingTemplate: misspellingTemplateName = 'Template:%s' % self.misspellingTemplate[ mylang] misspellingTemplate = pywikibot.Page(mysite, misspellingTemplateName) generator = pagegenerators.ReferringPageGenerator( misspellingTemplate, onlyTemplateInclusion=True) if firstPageTitle: pywikibot.output( u'-start parameter unsupported on this wiki because there ' u'is no category for misspellings.') else: pywikibot.output(HELP_MSG.format(site=mysite)) empty_gen = (i for i in []) return empty_gen preloadingGen = pagegenerators.PreloadingGenerator(generator) return preloadingGen
def main(argv): global startCat global debug debug = False state = False parser = argparse.ArgumentParser() parser.add_argument('--debug', '-d', help='enable debugging output', action='store_true') parser.add_argument('--place', '-p', '--location', '-l', help='specify location to start (required)', required=True) args = parser.parse_args(argv[1:]) debug = args.debug startCat = startCat % args.place site = pywikibot.Site() cat = pywikibot.Category(site, startCat) gen = pagegenerators.CategorizedPageGenerator(cat) bot = PhotoCountyBot(state=args.place, generator=gen) bot.run()
def actuallistes(nomorigen, diccipa, diccigpcv, diccbic, diccsipca, diccmerimee, diccart, existents, pagprova=False): if re.match("llistes", nomorigen.casefold()): cat = pwb.Category(site, 'Category:' + nomorigen) print(cat) llistes = pagegenerators.CategorizedPageGenerator(cat, recurse=True) else: llistes = [pwb.Page(site, nomorigen)] for llista in llistes: print(llista) actuallista(llista, diccipa, diccigpcv, diccbic, diccsipca, diccmerimee, diccart, existents, pagprova=False) return ()
def get_authors_list(): authors = [] category = pywikibot.Category(site, 'Categoria:Autori') gen = pagegenerators.CategorizedPageGenerator(category) for page in pagegenerators.PreloadingGenerator(gen): authors.append(page.title(withNamespace=False)) return authors
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ local_args = pywikibot.handle_args(args) start = local_args[0] if local_args else '!' mysite = pywikibot.Site() try: mysite.disambcategory() except pywikibot.Error as e: pywikibot.bot.suggest_help(exception=e) return False generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start, content=True, namespaces=[0]) bot = DisambiguationRedirectBot(generator=generator) bot.run()
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.getSite() try: cat = maintenance_category[site.family.name][site.lang] except: pass else: if not namespaces: namespaces = [0] cat = catlib.Category(site, "%s:%s" % (site.category_namespace(), cat)) gen = pagegenerators.CategorizedPageGenerator(cat) if not gen: pywikibot.showHelp('noreferences') else: if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run()
def run(self): cat = pywikibot.Category(self.site, self.cat) gen = pagegenerators.CategorizedPageGenerator(cat) for page in pagegenerators.PreloadingGenerator(gen): ignored_extensions = ['.pdf', '.svg', '.ogg', 'webm'] if ignored_extensions.count(page.title()[-4:].lower()) == 0: self.treat(page)
def main(dry_run): always = False site = pywikibot.Site() cat = pywikibot.Category(site, 'Virtue Deeds') gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True) for page in gen: always = edit_page(page, 'Drono-bot:virtue conversion', always, dry_run)
def iterate_category( site, action, category_name=u'Категория:Списки культурного наследия России'): category = pywikibot.Category(site, category_name) generator = pagegenerators.CategorizedPageGenerator(category) for list_page in generator: action(list_page)
def test_intersect_newpages_csd(self): site = self.get_site() self.assertEqualItertools([ pagegenerators.NewpagesPageGenerator(site=site, total=10), pagegenerators.CategorizedPageGenerator( pywikibot.Category(site, 'Category:Candidates_for_speedy_deletion')) ])
def main(*args: str) -> None: """ Process command line arguments and perform task. If args is an empty list, sys.argv is used. :param args: command line arguments """ # Loading the comments global categoryToCheck, project_inserted options = {} generator = None # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) site = pywikibot.Site() if site.code not in project_inserted: pywikibot.output('Your project is not supported by this script.\n' 'You have to edit the script and add it!') return # Process pagegenerators arguments gen_factory = pagegenerators.GeneratorFactory(site) local_args = gen_factory.handle_args(local_args) # Process local args for arg in local_args: arg, sep, value = arg.partition(':') option = arg[1:] if arg in ('-always', '-move', '-show'): options[option] = True elif arg in ('-protectedpages', '-moveprotected'): protect_type = 'move' if option.startswith('move') else 'edit' generator = site.protectedpages(namespace=int(value or 0), type=protect_type) if not generator: generator = gen_factory.getCombinedGenerator() if not generator: # Define the category if no other generator has been set gen = [] categories = i18n.translate(site, categoryToCheck) for category_name in categories: cat = pywikibot.Category(site, category_name) # Define the generator gen.append(pagegenerators.CategorizedPageGenerator(cat)) generator = chain.from_iterable(gen) if not gen_factory.nopreload: generator = pagegenerators.PreloadingGenerator(generator, groupsize=60) bot = CheckerBot(site=site, generator=generator, **options) bot.run()
def get_candidate_items(): # Won't be definitive (can be non-items in this category) # Can still use as a filter. cat = pywikibot.Category(site, "Category:Items") # gen = pagegenerators.CategorizedPageGenerator(cat) return gen
def setup(self): """Refresh generator.""" generator = pagegenerators.CategorizedPageGenerator( self.csd_cat, start=self.saved_progress) # wrap another generator around it so that we won't produce orphaned # talk pages. generator = pagegenerators.PageWithTalkPageGenerator(generator) self.generator = pagegenerators.PreloadingGenerator(generator, groupsize=20) self.saved_progress = None