def main(): global odmOlafa odmOlafa = OdmianaOlafa() global site site = pywikibot.getSite() templatePageNdk = pywikibot.Page(site, 'Szablon:ndk') lista_ndk = pagegenerators.ReferringPageGenerator(templatePageNdk, True, True, True) templatePageDk = pywikibot.Page(site, 'Szablon:dk') lista_dk = pagegenerators.ReferringPageGenerator(templatePageDk, True, True, True) lista = set(list(lista_ndk) + list(lista_dk)) #lista = [pywikibot.Page(site, u'spróbować')] for a in lista: addAspekt(a.title())
def main(): ##Cleanup if path.isfile('hasno.csv'): remove('hasno.csv') site = pywikibot.Site('es', 'wikipedia') listaRevision = getCacheDump('has.csv') generator = pagegenerators.ReferringPageGenerator( pywikibot.Page(source=site, title='Template:Commonscat')) for p in generator: if p.namespace() not in [0, 104] or p.title() in listaRevision: print('<<< {0} skipped'.format(p.title())) continue elif pageHasP(p, 'P373') == False: print('>>> {0} has no P373'.format(p.title())) lista = hasTemplate(p, [ 'Commonscat', 'Commons cat', 'Categoría Commons', 'Commonscat-inline', 'Commons category', 'Commons category-inline' ]) parameters = (lista[0][1]) if len(parameters) > 0: category = parameters[0] else: category = p.title(withNamespace=False) printToCsv( line=[p.full_url(), getQ(p).full_url(), p.title(), category], archivo='hasno.csv') createJSON( 'hasno.csv', ['wikipedia', 'wikidata', 'article', 'category_commons']) else: print('{0} has P373'.format(p.title())) printToCsv(line=[p.title()], archivo='has.csv')
def __init__(self, configurl=u'https://raw.githubusercontent.com/wikimedia/labs-tools-heritage/master/erfgoedbot/monuments_config/nl_nl.json'): """ Grab generator based on search to work on. """ page = requests.get(configurl) self.monumentsconfig = page.json() #print (self.monumentsconfig) self.site = pywikibot.Site(self.monumentsconfig.get(u'lang'), self.monumentsconfig.get(u'project')) self.repo = self.site.data_repository() self.wikidatafield = None for field in self.monumentsconfig.get('fields'): if field.get(u'dest') and field.get(u'dest')=='wd_item': self.wikidatafield = field.get(u'source') self.property = u'P359' self.designation = u'Q916333' self.monuments = self.getMonumentsOnWikidata(self.property, self.designation) row_template = pywikibot.Page(self.site, '{0}:{1}'.format(self.site.namespace(10), self.monumentsconfig.get('rowTemplate'))) trans_gen = pagegenerators.ReferringPageGenerator(row_template, onlyTemplateInclusion=True) self.generator = pagegenerators.NamespaceFilterPageGenerator(trans_gen, self.monumentsconfig.get('namespaces'), site=self.site)
def main(): featured = False gen = None # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-featured': featured = True else: genFactory.handleArg(arg) mysite = pywikibot.Site() if mysite.sitename() == 'wikipedia:nl': pywikibot.output( u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}' ) sys.exit() if featured: featuredList = i18n.translate(mysite, featured_articles) ref = pywikibot.Page(pywikibot.Site(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) if not gen: gen = genFactory.getCombinedGenerator() if gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: pywikibot.showHelp('fixing_redirects')
def run(self): for template in self._templates_generator(): title = template.title(withNamespace=False).lower() for page in pagegenerators.ReferringPageGenerator(template, onlyTemplateInclusion=True): try: text = page.get() except pywikibot.Error: continue else: code = mwparserfromhell.parse(text) for temp in code.ifilter_templates(): if temp.has_param("nosubst") or temp.has_param("demo"): continue replace = False if temp.name.lower().strip() == title: replace = True if temp.name.lower().strip().startswith("template:") and temp.name.lower().strip()[9:] == title: replace = True if replace: temp.name = "subst:%s" % template.title() temp.add("subst", "subst:") if text != code: pywikibot.showDiff(text, code) try: page.put( code, "Bot: Substituting {{%s}}" % template.title(asLink=True, allowInterwiki=False) ) except pywikibot.Error: continue
def createPageGenerator( self, firstPageTitle) -> Generator[pywikibot.Page, None, None]: """Generator to retrieve misspelling pages or misspelling redirects.""" mycode = self.site.code if mycode in self.misspellingCategory: categories = self.misspellingCategory[mycode] if isinstance(categories, UnicodeType): categories = (categories, ) generators = (pagegenerators.CategorizedPageGenerator( pywikibot.Category(self.site, misspellingCategoryTitle), recurse=True, start=firstPageTitle) for misspellingCategoryTitle in categories) elif mycode in self.misspellingTemplate: templates = self.misspellingTemplate[mycode] if isinstance(templates, UnicodeType): templates = (templates, ) generators = (pagegenerators.ReferringPageGenerator( pywikibot.Page(self.site, misspellingTemplateName, ns=10), onlyTemplateInclusion=True) for misspellingTemplateName in templates) if firstPageTitle: pywikibot.output( '-start parameter unsupported on this wiki because there ' 'is no category for misspellings.') else: pywikibot.output(HELP_MSG.format(site=self.site)) empty_gen = (i for i in []) return empty_gen generator = chain(*generators) preloadingGen = pagegenerators.PreloadingGenerator(generator) return preloadingGen
def main(): num_fetch_threads = 5 cola = Queue(400) for i in range(num_fetch_threads): worker = Thread(target=procesador, args=( cola, i, )) worker.setDaemon(True) worker.start() site = pywikibot.Site('es', 'wikipedia') generator = pagegenerators.ReferringPageGenerator( pywikibot.Page(source=site, title='Template:Ficha de persona'), onlyTemplateInclusion=True) pages = pagegenerators.PreloadingGenerator(generator, getLimite(site)) listaRevisados = getCacheDump(dump='ficha_no.csv') #pages = [pywikibot.Page(site, 'Eduardo Duhalde')] for page in pages: if page.title() not in listaRevisados: cola.put(page) cola.join()
def getOorlogsmonumentenDataGenerator(): """ Generator to parse https://nl.wikipedia.org/w/index.php?title=Speciaal:VerwijzingenNaarHier/Sjabloon:Tabelrij_oorlogsmonument_Nederland&namespace=0&limit=500 """ site = pywikibot.Site('nl', 'wikipedia') row_template = pywikibot.Page( site, 'Template:Tabelrij oorlogsmonument Nederland') trans_gen = pagegenerators.ReferringPageGenerator( row_template, onlyTemplateInclusion=True) filtered_gen = pagegenerators.NamespaceFilterPageGenerator(trans_gen, [0], site=site) for page in filtered_gen: print page.title() templates = page.templatesWithParams() for (template, params) in templates: #print template if template.title( with_ns=False) == u'Tabelrij oorlogsmonument Nederland': # metadata = {} for param in params: #print param (field, _, value) = param.partition(u'=') # Remove leading or trailing spaces field = field.strip() metadata[field] = value #print field #print value yield metadata
def __init__(self, pageToUnlink, namespaces, always): self.pageToUnlink = pageToUnlink gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) self.generator = pagegenerators.PreloadingGenerator(gen) linktrail = pywikibot.getSite().linktrail() # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.always = always self.done = False self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking', self.pageToUnlink.title())
def test_non_item_gen(self): """Test TestPreloadingItemGenerator with ReferringPageGenerator.""" site = self.get_site() instance_of_page = pywikibot.Page(site, 'Property:P31') ref_gen = pagegenerators.ReferringPageGenerator(instance_of_page, total=5) gen = pagegenerators.PreloadingItemGenerator(ref_gen) self.assertTrue(all(isinstance(item, pywikibot.ItemPage) for item in gen))
def __init__(self, pageToUnlink, **kwargs): self.availableOptions.update({ 'namespaces': [], # Which namespaces should be processed? # default to [] which means all namespaces will be processed }) super(UnlinkBot, self).__init__(**kwargs) self.pageToUnlink = pageToUnlink linktrail = self.pageToUnlink.site.linktrail() gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if self.getOption('namespaces') != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, self.getOption('namespaces')) self.generator = pagegenerators.PreloadingGenerator(gen) # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.comment = i18n.twtranslate(self.pageToUnlink.site, 'unlink-unlinking', self.pageToUnlink.title())
def get_local_dict_list(self, variant): title = self.config[variant]['modernize_template'] page = self.get_page(title) for p in pagegen.ReferringPageGenerator(page, followRedirects=True, onlyTemplateInclusion=True): yield p
def createPageGenerator(self, firstPageTitle): mysite = pywikibot.Site() mylang = mysite.code if mylang in self.misspellingCategory: misspellingCategoryTitle = self.misspellingCategory[mylang] misspellingCategory = pywikibot.Category(mysite, misspellingCategoryTitle) generator = pagegenerators.CategorizedPageGenerator( misspellingCategory, recurse=True, start=firstPageTitle) elif mylang in self.misspellingTemplate: misspellingTemplateName = 'Template:%s' % self.misspellingTemplate[ mylang] misspellingTemplate = pywikibot.Page(mysite, misspellingTemplateName) generator = pagegenerators.ReferringPageGenerator( misspellingTemplate, onlyTemplateInclusion=True) if firstPageTitle: pywikibot.output( u'-start parameter unsupported on this wiki because there ' u'is no category for misspellings.') else: pywikibot.output(HELP_MSG.format(site=mysite)) empty_gen = (i for i in []) return empty_gen preloadingGen = pagegenerators.PreloadingGenerator(generator) return preloadingGen
def get_template_generator(lng, tpl): """Create a generator of articles linking to template.""" site = pwb.Site(lng, "wikipedia") tpl_name = "{}:{}".format(site.namespace(10), tpl) tpl_page = pwb.Page(site, tpl_name) ref_gen = pg.ReferringPageGenerator(tpl_page, onlyTemplateInclusion=True) filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0]) return site.preloadpages(filter_gen, pageprops=True)
def PagesToArchiveGenerator(sites): for site in sites: template = pywikibot.Page(site, archiveConfig.configTemplateName) pages = pagegenerators.ReferringPageGenerator( template, onlyTemplateInclusion=True) for page in pages: if not page.title() in skipList: yield page break
def makeGenerator(lang): templateNS = templateNSDict[lang] templateTitle = templateTitleDict[lang] tsite = pywikibot.Site(lang, 'wikipedia') templatePage = pywikibot.Page(tsite, templateNS+templateTitle) generator = pg.ReferringPageGenerator(templatePage, followRedirects=False, withTemplateInclusion=True, onlyTemplateInclusion=True, step=None, total=None, content=False) return generator
def list_template_usage(row_template_name): """Return a generator of main space pages transcluding a given template.""" site = wikipedia.getSite('de', 'wikipedia') rowTemplate = wikipedia.Page( site, u'%s:%s' % (site.namespace(10), row_template_name)) transGen = pagegenerators.ReferringPageGenerator( rowTemplate, onlyTemplateInclusion=True) filteredGen = pagegenerators.NamespaceFilterPageGenerator(transGen, [0]) generator = pagegenerators.PreloadingGenerator(filteredGen) return generator
def generate_transclusions(site, template, namespaces=[]): pywikibot.output(u'Fetching template transclusions...') transclusion_page = pywikibot.Page(site, template, ns=10) gen = pagegenerators.ReferringPageGenerator(transclusion_page, onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces, site) for page in gen: yield page
def main(): commonssite = pywikibot.Site(u'commons', u'commons') templatepage = pywikibot.Page(commonssite, title=u'Template:RKDimages') gen = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(templatepage, onlyTemplateInclusion=True), 6)) rkdimagesImporter = RKDImagesImporter(gen) rkdimagesImporter.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} generator = None checkcurrent = False ns = [] ns.append(14) # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-summary'): if len(arg) == 8: options['summary'] = pywikibot.input( u'What summary do you want to use?') else: options['summary'] = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True elif arg == '-always': options['always'] = True else: genFactory.handleArg(arg) if checkcurrent: primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.Site().code) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(pywikibot.Page( pywikibot.Site(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) if not generator: generator = genFactory.getCombinedGenerator() if generator: pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, **options) bot.run() else: pywikibot.showHelp()
def _templates_generator(self): generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(self.source_template, onlyTemplateInclusion=True), [10] ) for page in generator: template = page if template.title().endswith("/doc") and pywikibot.Page(self.site, template.title()[:-4]).exists(): template = pywikibot.Page(self.site, template.title()[:-4]) if template != self.source_template: yield template for redirect in template.getReferences(redirectsOnly=True, withTemplateInclusion=False): yield redirect
def iterate_games_pages(): """ :rtype: list of pywikibot.page.Page """ games_template_page = pw.Page(site, games_template_name, ns="תבנית") for game_page in pagegenerators.ReferringPageGenerator( games_template_page): if not game_page.title().startswith(games_page_prefix): logging.debug( f"Skipping ({game_page.title()} with uses '{games_template_name}' but does not start with '{games_page_prefix}' prefix" ) continue yield game_page
def getPageGenerator(self): if self.getOption('use_hash'): gen = self.useHashGenerator() else: nowCommonsTemplates = [pywikibot.Page(self.site, title, ns=10) for title in self.ncTemplates()] gens = [pg.ReferringPageGenerator(t, followRedirects=True, onlyTemplateInclusion=True) for t in nowCommonsTemplates] gen = pg.CombinedPageGenerator(gens) gen = pg.NamespaceFilterPageGenerator(gen, [6]) gen = pg.DuplicateFilterPageGenerator(gen) gen = pg.PreloadingGenerator(gen) return gen
def list_template_usage(site_obj, tmpl_name): """ Takes Site object and template name and returns a generator. The function expects a Site object (pywikibot.Site()) and a template name (String). It creates a list of all pages using that template and returns them as a generator. The generator will load 50 pages at a time for iteration. """ name = "{}:{}".format(site_obj.namespace(10), tmpl_name) tmpl_page = pywikibot.Page(site_obj, name) ref_gen = pg.ReferringPageGenerator(tmpl_page, onlyTemplateInclusion=True) filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0]) generator = site_obj.preloadpages(filter_gen, pageprops=True) return generator
def template_dict_generator(templates, namespaces): mysite = pywikibot.Site() # The names of the templates are the keys, and lists of pages # transcluding templates are the values. mytpl = mysite.getNamespaceIndex(mysite.template_namespace()) for template in templates: transcludingArray = [] gen = pagegenerators.ReferringPageGenerator( pywikibot.Page(mysite, template, ns=mytpl), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) for page in gen: transcludingArray.append(page) yield template, transcludingArray
def main(): """ Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. """ summary = None generator = None checkcurrent = False always = False ns = [] ns.append(14) # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True elif arg == '-always': always = True else: genFactory.handleArg(arg) if checkcurrent: primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.Site().code) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(pywikibot.Page( pywikibot.Site(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData(u'You have to specify the generator you ' u'want to use for the script!') pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, always, summary) bot.run()
def run(self): global err_cnt pages = [] message = '' dic = {} templates = self.templates bottomtxt = '%s' % self.bottomtxt post_template = self.post_template if len(templates) == 1: regex = '%s' % templates[0] else: regex = '(' part = '' for template in templates: regex += part + template part = '|' regex += ')' for template in templates: pagegen = pagegenerators.ReferringPageGenerator( pywikibot.Page(site, u'틀:%s' % template), onlyTemplateInclusion=True) for page in pagegen: pages.append(page) pages = sorted(set(pages)) message = u'' if pages: for page in pages: Time, line = pageparse(page, regex) dic[Time] = line keys = dic.keys() keys.sort(reverse=True) for key in keys: message += u'%s' % dic[key] else: message = u'* 목록에 토론이 없습니다.' pagetext = u"'''아래의 토론들은 여러 편집자의 참여와 관심을 필요로 하고 있습니다.'''\n----\n<onlyinclude>%s\n</onlyinclude>\n%s" % ( message, bottomtxt) templatepage = pywikibot.Page(site, '%s' % post_template) templatepage.put(pagetext, comment=u'수정, 현재 %s 개의 토론이 있습니다.' % str(len(pages) - err_cnt)) pywikibot.output( 'Update, %s current discussions\n Sleeping for 5 minutes' % str(len(pages) - err_cnt)) gc.collect()
def main(*args): ##Cleanup site = pywikibot.Site('es', 'wikipedia') local_args = pywikibot.handle_args(args) page = None for arg in local_args: if arg.startswith('-page:'): page = arg[6:] pages = [pywikibot.Page(source=site, title=page)] if page == None: tpl = pywikibot.Page(source=site, title='Template:Commonscat') pages = pagegenerators.ReferringPageGenerator(tpl) if path.isfile('hasno.csv'): remove('hasno.csv') if path.isfile('hasno.json'): remove('hasno.json') work(pages) write_result()
def main(): singlepage = [] gen = None start = None action = None for arg in pywikibot.handleArgs(): if arg == ('pages'): action = 'pages' elif arg == ('categories'): action = 'categories' elif arg.startswith('-start:'): start = pywikibot.Page(pywikibot.Site(), arg[7:]) gen = pagegenerators.AllpagesPageGenerator( start.title(withNamespace=False), namespace=start.namespace(), includeredirects=False) elif arg.startswith('-cat:'): cat = pywikibot.Category(pywikibot.Site(), 'Category:%s' % arg[5:]) gen = pagegenerators.CategorizedPageGenerator(cat) elif arg.startswith('-ref:'): ref = pywikibot.Page(pywikibot.Site(), arg[5:]) gen = pagegenerators.ReferringPageGenerator(ref) elif arg.startswith('-link:'): link = pywikibot.Page(pywikibot.Site(), arg[6:]) gen = pagegenerators.LinkedPageGenerator(link) elif arg.startswith('-page:'): singlepage = pywikibot.Page(pywikibot.Site(), arg[6:]) gen = iter([singlepage]) #else: #bug if action == 'pages': preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = CommonsLinkBot(preloadingGen, acceptall=False) bot.pages() elif action == 'categories': preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = CommonsLinkBot(preloadingGen, acceptall=False) bot.categories() else: pywikibot.showHelp(u'commons_link')
def iterate_games_pages(): """ :rtype: list of pywikibot.page.Page """ iterate_only_over_these_games = set() # Uncomment the next line in order to iterate only on this page # iterate_only_over_these_games.add("משחק: 16-09-2020 מכבי תל אביב נגד דינמו ברסט - מוקדמות ליגת האלופות") games_template_page = pw.Page(site, games_template_name, ns="תבנית") for index, game_page in enumerate( pagegenerators.ReferringPageGenerator(games_template_page)): logging.info(f"Page number: {index}") if not game_page.title().startswith(games_page_prefix): logging.debug( f"Skipping ({game_page.title()} with uses '{games_template_name}' but does not start with '{games_page_prefix}' prefix" ) continue if iterate_only_over_these_games: if game_page.title() in iterate_only_over_these_games: yield game_page else: yield game_page