def main(*args): site = pywikibot.Site() fetcher = CategoryFetcher(site) checker = TemplateChecker() checker.load_config("config/templates.json") checker_bot = CheckerBot(checker, site) all_categories = fetcher.get_categories() callbacks = ArticleIteratorCallbacks( category_callback=checker_bot.cb_store_category_result, article_callback=checker_bot.cb_check_article, logging_callback=pywikibot.log, ) article_iterator = ArticleIterator(callbacks, categories=all_categories) parser = ArticleIteratorArgumentParser(article_iterator, fetcher) for argument in pywikibot.handle_args(list(args)): if parser.check_argument(argument): continue elif argument.find("-outputpage:") == 0: checker_bot.outputpage = argument[12:] elif argument.find("-exclude-articles:") == 0: page = pywikibot.Page(site, argument[18:]) article_iterator.excluded_articles = load_excluded_articles_from_wiki(page) article_iterator.iterate_categories() if article_iterator.categories != all_categories: # Don't update summary page if only single categories were crawled return summary = checker_bot.generate_summary_page() if checker_bot.outputpage: checker_bot.save_wikipage(summary, checker_bot.outputpage + u"/Zusammenfassung") else: pywikibot.output(u"Zusammenfassung") pywikibot.output(u"===============") pywikibot.output(summary) pywikibot.output(checker_bot.generate_config_table())
def main(*args): utf8_writer = codecs.getwriter('utf8') single_categories = False output_destination = utf8_writer(sys.stdout) formatstring = "{}\n" categories_only = False for arg in pywikibot.handle_args(list(args)): if arg == "-out:categories": single_categories = True elif arg == "-out:file": output_destination = codecs.open("Denkmallistenliste.txt", "w", 'utf-8') elif arg == "-fmt:wiki": formatstring = "[[{}]]\n" elif arg == "-fmt:url": formatstring = "https://de.wikipedia.org/wiki/{}\n" elif arg == "-list-cat-only": categories_only = True site = pywikibot.Site() fetcher = CategoryFetcher(site) if categories_only and not single_categories: export_to_file(output_destination, fetcher.get_categories(False), formatstring) return if single_categories: for category in fetcher.get_categories(): with codecs.open(category.title() + u".txt", "w", 'utf-8') as outfile: export_to_file(outfile, category.articles(), formatstring) else: collection = ArticleCollection() callbacks = ArticleIteratorCallbacks(article_callback=collection.cb_add_article) article_iterator = ArticleIterator(callbacks, fetcher.get_categories()) article_iterator.iterate_categories() export_to_file(output_destination, collection.article_list, formatstring)
def test_article_iterator_iterates_over_categories(self): callbacks = Mock() iterator = ArticleIterator(callbacks) category = Mock() category.articles.return_value = [] iterator.categories = [category] iterator.iterate_categories() callbacks.category.assert_called_once_with(category=category, counter=0, article_iterator=iterator)
def test_article_iterator_returns_correct_counter(self): category_callback = Mock() callbacks = ArticleIteratorCallbacks(category_callback=category_callback) iterator = ArticleIterator(callbacks) articles = [Mock()] * 10 category = Mock() category.articles.return_value = articles iterator.categories = [category] iterator.iterate_categories() category_callback.assert_called_once_with(category=category, counter=10, article_iterator=iterator)
def test_article_iterator_with_multiple_categories_stops_at_limit(self): category_callback = Mock() callbacks = ArticleIteratorCallbacks(category_callback=category_callback) iterator = ArticleIterator(callbacks) iterator.limit = 10 articles = [Mock()] * 10 category = Mock() category.articles.return_value = articles iterator.categories = [category, category] iterator.iterate_categories() category_callback.assert_called_once_with(category=category, counter=10, article_iterator=iterator)
def test_article_iterator_iterates_over_articles(self): callbacks = Mock() iterator = ArticleIterator(callbacks) article1 = Mock() article2 = Mock() category = Mock() category.articles.return_value = [article1, article2] iterator.categories = [category] iterator.iterate_categories() callbacks.article.assert_any_call(article=article1, category=category, counter=0, article_iterator=iterator) callbacks.article.assert_any_call(article=article2, category=category, counter=1, article_iterator=iterator)
def test_excluded_articles_are_skipped(self): article_callback = Mock() callbacks = ArticleIteratorCallbacks(article_callback=article_callback) iterator = ArticleIterator(callbacks) article1 = Mock() article2 = Mock() article1.title.return_value = "Foo" article2.title.return_value = "Bar" category = Mock() category.articles.return_value = [article1, article2] iterator.categories = [category] iterator.excluded_articles = ["Foo"] iterator.iterate_categories() article_callback.assert_called_once_with(article=article2, category=category, counter=0, article_iterator=iterator)
def test_category_limit_is_respected_together_with_limit(self): category_callback = Mock() article_callback = Mock() callbacks = ArticleIteratorCallbacks(category_callback=category_callback, article_callback=article_callback) iterator = ArticleIterator(callbacks) iterator.limit = 5 iterator.articles_per_category_limit = 3 articles = [Mock()] * 10 category = Mock() category.articles.return_value = articles iterator.categories = [category, category, category, category] iterator.iterate_categories() self.assertEqual(article_callback.call_count, 5) self.assertEqual(category_callback.call_count, 2)
def test_article_iterator_logs_every_n_articles(self): log_callback = Mock() callbacks = ArticleIteratorCallbacks(logging_callback=log_callback) iterator = ArticleIterator(callbacks) iterator.log_every_n = 1 article1 = Mock() article2 = Mock() article1.title.return_value = "Foo" article2.title.return_value = "Bar" category = Mock() category.articles.return_value = [article1, article2] iterator.categories = [category] iterator.iterate_categories() log_callback.assert_any_call(u"Fetching page 0 (Foo)") log_callback.assert_any_call(u"Fetching page 1 (Bar)")