Exemplo n.º 1
0
def main(*args):
    utf8_writer = codecs.getwriter('utf8')
    single_categories = False
    output_destination = utf8_writer(sys.stdout)
    formatstring = "{}\n"
    categories_only = False
    for arg in pywikibot.handle_args(list(args)):
        if arg == "-out:categories":
            single_categories = True
        elif arg == "-out:file":
            output_destination = codecs.open("Denkmallistenliste.txt", "w", 'utf-8')
        elif arg == "-fmt:wiki":
            formatstring = "[[{}]]\n"
        elif arg == "-fmt:url":
            formatstring = "https://de.wikipedia.org/wiki/{}\n"
        elif arg == "-list-cat-only":
            categories_only = True

    site = pywikibot.Site()
    fetcher = CategoryFetcher(site)

    if categories_only and not single_categories:
        export_to_file(output_destination, fetcher.get_categories(False), formatstring)
        return

    if single_categories:
        for category in fetcher.get_categories():
            with codecs.open(category.title() + u".txt", "w", 'utf-8') as outfile:
                export_to_file(outfile, category.articles(), formatstring)
    else:
        collection = ArticleCollection()
        callbacks = ArticleIteratorCallbacks(article_callback=collection.cb_add_article)
        article_iterator = ArticleIterator(callbacks, fetcher.get_categories())
        article_iterator.iterate_categories()
        export_to_file(output_destination, collection.article_list, formatstring)
Exemplo n.º 2
0
def main(*args):
    site = pywikibot.Site()
    fetcher = CategoryFetcher(site)
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    checker_bot = CheckerBot(checker, site)
    all_categories = fetcher.get_categories()
    callbacks = ArticleIteratorCallbacks(
        category_callback=checker_bot.cb_store_category_result,
        article_callback=checker_bot.cb_check_article,
        logging_callback=pywikibot.log,
    )
    article_iterator = ArticleIterator(callbacks, categories=all_categories)
    parser = ArticleIteratorArgumentParser(article_iterator, fetcher)
    for argument in pywikibot.handle_args(list(args)):
        if parser.check_argument(argument):
            continue
        elif argument.find("-outputpage:") == 0:
            checker_bot.outputpage = argument[12:]
        elif argument.find("-exclude-articles:") == 0:
            page = pywikibot.Page(site, argument[18:])
            article_iterator.excluded_articles = load_excluded_articles_from_wiki(page)
    article_iterator.iterate_categories()

    if article_iterator.categories != all_categories:   # Don't update summary page if only single categories were crawled
        return
    summary = checker_bot.generate_summary_page()
    if checker_bot.outputpage:
        checker_bot.save_wikipage(summary, checker_bot.outputpage + u"/Zusammenfassung")
    else:
        pywikibot.output(u"Zusammenfassung")
        pywikibot.output(u"===============")
        pywikibot.output(summary)
        pywikibot.output(checker_bot.generate_config_table())
Exemplo n.º 3
0
def main(*args):
    wikipedia_site = pywikibot.Site()  # Use the site configured in params/user-config
    commons_site = pywikibot.Site("commons", "commons")
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    commons_bot = CommonsBot(wikipedia_site, checker)
    callbacks = ArticleIteratorCallbacks(
        logging_callback=pywikibot.log,
        article_callback=commons_bot.cb_check_article
    )
    article_iterator = ArticleIterator(callbacks)
    article_iterator.log_every_n = 1
    parser = ArticleIteratorArgumentParser(article_iterator, None)
    run_cmd = commons_bot.run_continuously
    category_name = u"Images from Wiki Loves Monuments 2015 in Germany"
    start_time = first_day_of_month()
    for argument in pywikibot.handle_args(args):
        if argument.find("-category:") == 0:
            category_name = argument[10:]
            continue
        elif parser.check_argument(argument):
            continue
        elif argument.find("-start-at:") == 0:
            start_time_iso = argument[10:] + "T0:00:00Z"
            start_time = pywikibot.Timestamp.fromISOformat(start_time_iso)
        elif argument.find("-sleep-seconds:") == 0 and int(argument[15:]) > 0:
            commons_bot.sleep_seconds = int(argument[15:])
        elif argument == "-once":
            run_cmd = commons_bot.run_once
        elif argument == "-local-media":
            commons_site = wikipedia_site
    category = pywikibot.Category(commons_site, category_name)
    run_cmd(article_iterator, start_time, category)
 def test_article_iterator_iterates_over_categories(self):
     callbacks = Mock()
     iterator = ArticleIterator(callbacks)
     category = Mock()
     category.articles.return_value = []
     iterator.categories = [category]
     iterator.iterate_categories()
     callbacks.category.assert_called_once_with(category=category, counter=0, article_iterator=iterator)
 def test_article_iterator_returns_correct_counter(self):
     category_callback = Mock()
     callbacks = ArticleIteratorCallbacks(category_callback=category_callback)
     iterator = ArticleIterator(callbacks)
     articles = [Mock()] * 10
     category = Mock()
     category.articles.return_value = articles
     iterator.categories = [category]
     iterator.iterate_categories()
     category_callback.assert_called_once_with(category=category, counter=10, article_iterator=iterator)
 def test_article_iterator_with_multiple_categories_stops_at_limit(self):
     category_callback = Mock()
     callbacks = ArticleIteratorCallbacks(category_callback=category_callback)
     iterator = ArticleIterator(callbacks)
     iterator.limit = 10
     articles = [Mock()] * 10
     category = Mock()
     category.articles.return_value = articles
     iterator.categories = [category, category]
     iterator.iterate_categories()
     category_callback.assert_called_once_with(category=category, counter=10, article_iterator=iterator)
 def test_article_iterator_iterates_over_articles(self):
     callbacks = Mock()
     iterator = ArticleIterator(callbacks)
     article1 = Mock()
     article2 = Mock()
     category = Mock()
     category.articles.return_value = [article1, article2]
     iterator.categories = [category]
     iterator.iterate_categories()
     callbacks.article.assert_any_call(article=article1, category=category, counter=0, article_iterator=iterator)
     callbacks.article.assert_any_call(article=article2, category=category, counter=1, article_iterator=iterator)
 def test_excluded_articles_are_skipped(self):
     article_callback = Mock()
     callbacks = ArticleIteratorCallbacks(article_callback=article_callback)
     iterator = ArticleIterator(callbacks)
     article1 = Mock()
     article2 = Mock()
     article1.title.return_value = "Foo"
     article2.title.return_value = "Bar"
     category = Mock()
     category.articles.return_value = [article1, article2]
     iterator.categories = [category]
     iterator.excluded_articles = ["Foo"]
     iterator.iterate_categories()
     article_callback.assert_called_once_with(article=article2, category=category, counter=0, article_iterator=iterator)
 def test_article_iterator_logs_every_n_articles(self):
     log_callback = Mock()
     callbacks = ArticleIteratorCallbacks(logging_callback=log_callback)
     iterator = ArticleIterator(callbacks)
     iterator.log_every_n = 1
     article1 = Mock()
     article2 = Mock()
     article1.title.return_value = "Foo"
     article2.title.return_value = "Bar"
     category = Mock()
     category.articles.return_value = [article1, article2]
     iterator.categories = [category]
     iterator.iterate_categories()
     log_callback.assert_any_call(u"Fetching page 0 (Foo)")
     log_callback.assert_any_call(u"Fetching page 1 (Bar)")
 def test_category_limit_is_respected_together_with_limit(self):
     category_callback = Mock()
     article_callback = Mock()
     callbacks = ArticleIteratorCallbacks(category_callback=category_callback, article_callback=article_callback)
     iterator = ArticleIterator(callbacks)
     iterator.limit = 5
     iterator.articles_per_category_limit = 3
     articles = [Mock()] * 10
     category = Mock()
     category.articles.return_value = articles
     iterator.categories = [category, category, category, category]
     iterator.iterate_categories()
     self.assertEqual(article_callback.call_count, 5)
     self.assertEqual(category_callback.call_count, 2)