def test_first_edit(self): expect = ( u'The Addams Family (pinball)', u'Talk:Nowy Sącz', u'Template:Template', ) gen = PagesFromTitlesGenerator(self.titles, self.site) gen = pagegenerators.EdittimeFilterPageGenerator( gen, first_edit_end=datetime.datetime(2006, 1, 1)) self.assertPagelistTitles(gen, titles=expect, site=self.site) gen = PagesFromTitlesGenerator(self.titles, self.site) gen = pagegenerators.EdittimeFilterPageGenerator( gen, first_edit_start=datetime.datetime(2006, 1, 1)) opposite_pages = list(gen) self.assertTrue( all(isinstance(p, pywikibot.Page) for p in opposite_pages)) self.assertTrue(all(p.title not in expect for p in opposite_pages))
def test_last_edit(self): two_days_ago = datetime.datetime.now() - datetime.timedelta(days=2) nine_days_ago = datetime.datetime.now() - datetime.timedelta(days=9) gen = PagesFromTitlesGenerator(['Wikipedia:Sandbox'], self.site) gen = pagegenerators.EdittimeFilterPageGenerator( gen, last_edit_start=two_days_ago) self.assertEqual(len(list(gen)), 1) gen = PagesFromTitlesGenerator(['Wikipedia:Sandbox'], self.site) gen = pagegenerators.EdittimeFilterPageGenerator( gen, last_edit_end=two_days_ago) self.assertEqual(len(list(gen)), 0) gen = PagesFromTitlesGenerator(['Template:Sidebox'], self.site) gen = pagegenerators.EdittimeFilterPageGenerator( gen, last_edit_end=nine_days_ago) self.assertEqual(len(list(gen)), 1) gen = PagesFromTitlesGenerator(['Template:Sidebox'], self.site) gen = pagegenerators.EdittimeFilterPageGenerator( gen, last_edit_start=nine_days_ago) self.assertEqual(len(list(gen)), 0)
def process_parameter(self, parameter): ''' Process the given template parameter supplied to the request template and return a set of articles referred to by the parameter. @param parameter: the parameter @type parameter: unicode ''' # Regular expression to match parameters on the form "category1= ..." # and so on, localised to the language we're talking catparam_regex = re.compile(r"\s*{catparam}\d*=".format( catparam=config.th_category[self.lang]), re.I) # Suffix to use when using categories to fetch pages of interest catname_suffix = config.wikiproject_suffix[self.lang] # Titles as a set to automatically ignore duplicates listed_titles = set() # All articles we've found (stored as pywikibot.Page objects) found_articles = set() # Support "category=" parameter by removing the # keyword and adding the category namespace name # to the title. if catparam_regex.match(parameter): parameter = self.site.category_namespace() \ + ":" + cataparam_regex.sub("", parameter) listed_titles.add(parameter) # Support category suffixes by adding that as well if catname_suffix: listed_titles.add('{param}{suffix}'.format( param=parameter, suffix=catname_suffix)) else: # Just this one title listed_titles.add(parameter) try: # Turn title set into a list and process articles/categories seed_pages = PagesFromTitlesGenerator(list(listed_titles), site=self.site) for seedpage in seed_pages: if not seedpage.exists(): logging.warning("listed {page} does not exist".format(page=seedpage.title())) continue if not seedpage.isCategory(): if seedpage.namespace() != 0: logging.warning("listed page {page} not in ns 0".format(page=seedpage.title())) elif seedpage.isRedirectPage(): try: found_articles.add(seedpage.getRedirectTarget()) except pywikibot.exceptions.NoPage: logging.warning("listed {page} redirects to a non-existent page".format(page=seedpage.title())) else: # Everything looks OK, add the page found_articles.add(seedpage); else: # We use the site's categorymembers() method # to get articles and talk pages, ordered by # when they were added to the category. # For large categories we should thereby get # the oldest and hopefully most developed # articles in our seed set of articles. if seedpage.isCategoryRedirect(): try: seedpage = seedpage.getCategoryRedirectTarget() except pywikibot.exceptions.NoPage: logging.warning("listed {title} redirects to a non-existent category".format(title=seedpage.title())) continue # skip this category cat_articles = set(); for catmember in self.site.categorymembers(seedpage, namespaces=[0,1], sortby="timestamp", reverse=True): if catmember.namespace() == 0: cat_articles.add(catmember) else: cat_articles.add(catmember.toggleTalkPage()) if len(cat_articles) >= 256: break # Add the category's articles to our result found_articles = found_articles.union(cat_articles) except pywikibot.Error: logging.warning("Failed to instantiate and iterate over list generator, or something else went wrong") # okay, done return found_articles