def test_process_next_previous_process_two(self): with mock.patch(BASE_TASK_PYWIKIBOT_PAGE, new_callable=mock.MagicMock) as page_mock: self.page_mock.text = """{{REDaten |VG=Bla |NF=Blub }} {{REAutor|Autor.}}""" self.page_mock.title_str = "Re:Title" page_mock.return_value.exists.side_effect = [True, False] re_page = RePage(self.page_mock) task = DEALTask(None, self.logger) compare({"success": True, "changed": False}, task.run(re_page)) compare([("Blub", "Title")], task.data) self.page_mock.text = """{{REDaten |VG=Bla |NF=Blub }} {{REAutor|Autor.}}""" self.page_mock.title_str = "Re:Title2" page_mock.return_value.exists.side_effect = [False, True] re_page = RePage(self.page_mock) compare({"success": True, "changed": False}, task.run(re_page)) compare([("Blub", "Title"), ("Bla", "Title2")], task.data)
def test_wrong_structure_corrupt_template(self): self.text_mock.return_value = "{{REDaten}}\ntext0\n{{REAutor|Autor1." with self.assertRaises(ReDatenException): RePage(self.page_mock) self.text_mock.return_value = "{{REDaten\ntext0\n{{REAutor|Autor1.}}" with self.assertRaises(ReDatenException): RePage(self.page_mock)
def test_page_no_lock(self): self.text_mock.return_value = ARTICLE_TEMPLATE self.page_mock.protection.return_value = {} re_page = RePage(self.page_mock) re_page[0].text = "bla" re_page.save("reason")
def test_add_error_cat_no_dublicate_category(self): self.text_mock.return_value = f"{ARTICLE_TEMPLATE}" \ f"\n[[Kategorie:Name_of_Cat]]" re_page = RePage(self.page_mock) re_page.add_error_category("Name_of_Cat") compare(2, len(re_page)) compare("[[Kategorie:Name_of_Cat]]", re_page[1])
def test_save_because_of_changes(self): before = "{{REDaten}}\ntext\n{{REAutor|Autor.}}" self.text_mock.return_value = before re_page = RePage(self.page_mock) re_page.save("reason") self.text_mock.assert_called_with(ARTICLE_TEMPLATE) self.page_mock.save.assert_called_once_with(summary="reason", botflag=True)
def test_remove_error_cat_other_cat_exists(self): self.text_mock.return_value = f"{ARTICLE_TEMPLATE}" \ f"\n[[Kategorie:Name_of_Cat]]<!--note-->" \ f"\n[[Kategorie:Other_Cat]]<!--note-->" re_page = RePage(self.page_mock) re_page.remove_error_category("Name_of_Cat") compare(2, len(re_page)) compare("[[Kategorie:Other_Cat]]<!--note-->", re_page[1])
def test_clean_article_list(self): self.text_mock.return_value = ARTICLE_TEMPLATE + "tada." + ARTICLE_TEMPLATE re_page = RePage(self.page_mock) self.assertEqual(3, len(re_page)) re_page[1] = "" re_page.clean_articles() self.assertEqual(2, len(re_page)) self.assertFalse("tada" in str(re_page))
def _save_re_page(self, re_page: RePage, list_of_done_tasks: List[str]): if not self.debug: save_message = f"ReScanner hat folgende Aufgaben bearbeitet: {', '.join(list_of_done_tasks)}" self.logger.debug(save_message) try: re_page.save(save_message) except ReDatenException: self.logger.error("RePage can't be saved.")
def test_add_error_cat_with_already_there(self): self.text_mock.return_value = f"{ARTICLE_TEMPLATE}" \ f"\n[[Kategorie:Name_of_Cat]]<!--note-->" \ f"\n[[Kategorie:Other_Cat]]<!--other_error-->" re_page = RePage(self.page_mock) re_page.add_error_category("Name_of_Cat", "note") compare(2, len(re_page)) compare( "[[Kategorie:Name_of_Cat]]<!--note-->" "\n[[Kategorie:Other_Cat]]<!--other_error-->", re_page[1])
def test_page_is_locked_detect_it(self): self.text_mock.return_value = ARTICLE_TEMPLATE self.page_mock.protection.return_value = { "edit": ("sysop", "infinity"), "move": ("sysop", "infinity") } re_page = RePage(self.page_mock) re_page[0].text = "bla" with self.assertRaises(ReDatenException): re_page.save("reason")
def test_page_is_locked(self): self.text_mock.return_value = ARTICLE_TEMPLATE def side_effect(summary, botflag): raise pywikibot.exceptions.LockedPageError(self.page_mock) self.page_mock.save.side_effect = side_effect re_page = RePage(self.page_mock) re_page[0].text = "bla" with self.assertRaises(ReDatenException): re_page.save("reason")
def test_process_two_tasks_alter_one(self): self.page_mock.text = "{{REDaten}}\ntext\n{{REAutor|Autor.}}" re_page1 = RePage(self.page_mock) self.page_mock.text = "{{REDaten}}\nother stuff\n{{REAutor|Autor.}}" re_page2 = RePage(self.page_mock) with LogCapture(): with self.ALNAAltereNotAllTask(None, self.logger) as task: compare({"success": True, "changed": True}, task.run(re_page1)) compare({ "success": True, "changed": False }, task.run(re_page2))
def task(self) -> bool: active_tasks = self._activate_tasks() error_task = ERROTask(wiki=self.wiki, debug=self.debug, logger=self.logger) lemma_list = self.compile_lemma_list() self.logger.info("Start processing the lemmas.") processed_lemmas = 0 for idx, lemma in enumerate(lemma_list): self.logger.debug( f"Process [https://de.wikisource.org/wiki/{lemma} {lemma}]") list_of_done_tasks = [] try: re_page = RePage(pywikibot.Page(self.wiki, lemma)) except ReDatenException: error = traceback.format_exc().splitlines()[-1] self.logger.error( f"The initiation of [[{lemma}]] went wrong: {error}") error_task.append_error(lemma, error) # remove Key from database if it was saved before with suppress(KeyError): del self.data[lemma] continue except pywikibot.exceptions.TimeoutError: self.logger.error(f"Timeout at lemma ({lemma}) creation") continue if re_page.has_changed(): list_of_done_tasks.append("BASE") for task in active_tasks: processed_task = self._process_task(task, re_page, lemma) if processed_task: list_of_done_tasks.append(processed_task) if list_of_done_tasks and re_page.is_writable: processed_lemmas += 1 if not self.debug: self._save_re_page(re_page, list_of_done_tasks) self._add_lemma_to_data(lemma) if self._watchdog(): self.logger.info( f"{idx} Lemmas processed, {processed_lemmas} changed.") self.logger.info( f"Oldest processed item: {datetime.now() - self.get_oldest_datetime()}" ) break for task in active_tasks: task.finish_task() error_task.finish_task() return True
def test_lemma(self): self.page_mock.title.return_value = "RE:Page" self.text_mock.return_value = ARTICLE_TEMPLATE re_page = RePage(self.page_mock) compare("RE:Page", re_page.lemma) compare("Page", re_page.lemma_without_prefix) compare("[[RE:Page|Page]]", re_page.lemma_as_link)
def test_back_to_str_combined_with_additional_text(self): before = "1{{REDaten}}\ntext\n{{REAutor|Autor.}}2{{REDaten}}\ntext1\n{{REAutor|Autor1.}}3" self.text_mock.return_value = before after = "1\n" + ARTICLE_TEMPLATE \ + "\n2\n" + ARTICLE_TEMPLATE.replace("text", "text1").replace("Autor.", "Autor1.") \ + "\n3" self.assertEqual(after, str(RePage(self.page_mock)))
def test_process_task_alter_text(self): self.page_mock.text = "{{REDaten}}\ntext\n{{REAutor|Autor.}}" re_page = RePage(self.page_mock) with self.MINIAlterTask(None, self.logger) as task: result = task.run(re_page) self.assertTrue(result["success"]) self.assertTrue(result["changed"])
def test_fetch_from_properties_self_append(self): with LogCapture(): copy_tst_data("I_1_self_append", "I_1") self.page_mock.title_str = "RE:Aal" self.page_mock.text = """{{REDaten |BAND=I,1 |VORGÄNGER=Lemma Previous |NACHFOLGER=Lemma Next |WP=Aal_wp_link |WS=Aal_ws_link |SORTIERUNG=Aal |VERWEIS=ON }} text. {{REAutor|OFF}} {{REDaten |BAND=I,1 |VORGÄNGER=Lemma Previous2 |NACHFOLGER=Lemma Next2 }} text. {{REAutor|OFF}}""" task = SCANTask(None, self.logger) task.re_page = RePage(self.page_mock) task._process_from_article_list() post_lemma = task.registers["I,1"].get_lemma_by_name("Aal") compare("w:de:Aal_wp_link", post_lemma.lemma_dict["wp_link"]) compare("s:de:Aal_ws_link", post_lemma.lemma_dict["ws_link"]) compare("Aal", post_lemma.lemma_dict["sort_key"]) compare(True, post_lemma.lemma_dict["redirect"]) compare("Lemma Previous", post_lemma.lemma_dict["previous"]) compare("Lemma Next", post_lemma.lemma_dict["next"]) post_lemma_append = task.registers["I,1"].get_lemma_by_name("Aal", self_supplement=True) compare("Lemma Previous2", post_lemma_append.lemma_dict["previous"]) compare("Lemma Next2", post_lemma_append.lemma_dict["next"])
def test_fetch_from_properties(self): with LogCapture(): self.page_mock.title_str = "RE:Aal" self.page_mock.text = """{{REDaten |BAND=I,1 |VORGÄNGER=Lemma Previous |NACHFOLGER=Lemma Next |WP=Aal_wp_link |WS=Aal_ws_link |SORTIERUNG=Aal |VERWEIS=ON |KORREKTURSTAND=korrigiert |KURZTEXT=Short Description |KEINE_SCHÖPFUNGSHÖHE=ON }} text. {{REAutor|OFF}}""" task = SCANTask(None, self.logger) task.re_page = RePage(self.page_mock) task._process_from_article_list() post_lemma = task.registers["I,1"].get_lemma_by_name("Aal") compare("w:de:Aal_wp_link", post_lemma.lemma_dict["wp_link"]) compare("s:de:Aal_ws_link", post_lemma.lemma_dict["ws_link"]) compare("Aal", post_lemma.lemma_dict["sort_key"]) compare(2, post_lemma.lemma_dict["proof_read"]) compare(True, post_lemma.lemma_dict["redirect"]) compare("Lemma Previous", post_lemma.lemma_dict["previous"]) compare("Lemma Next", post_lemma.lemma_dict["next"]) compare("Short Description", post_lemma.lemma_dict["short_description"]) compare(True, post_lemma.lemma_dict["no_creative_height"])
def test_develop(self): WS_WIKI = pywikibot.Site(code="de", fam="wikisource", user="******") # lemma = pywikibot.Page(WS_WIKI, "RE:Aba 1") # existing wikidata_item lemma = pywikibot.Page(WS_WIKI, "RE:Wilhelm Kroll †") # existing wikidata_item print(json.dumps(lemma.data_item().toJSON(), indent=2)) re_value = DATATask(WS_WIKI, self.logger, True).run(RePage(lemma)) compare(re_value["success"], True)
def test_hash(self): self.text_mock.return_value = ARTICLE_TEMPLATE re_page = RePage(self.page_mock) pre_hash = hash(re_page) re_page[0].text = "bada" self.assertNotEqual(pre_hash, hash(re_page)) pre_hash = hash(re_page) re_page[0]["BAND"].value = "tada" self.assertNotEqual(pre_hash, hash(re_page)) pre_hash = hash(re_page) article_text = "{{REAbschnitt}}\ntext\n{{REAutor|Some Author.}}" article = Article.from_text(article_text) re_page.append(article) self.assertNotEqual(pre_hash, hash(re_page))
def test_execute_with_exception_altered(self): self.page_mock.text = "{{REDaten}}\ntext\n{{REAutor|Autor.}}" re_page = RePage(self.page_mock) with LogCapture(): with self.EXCEAlteredTask(None, self.logger) as task: result = task.run(re_page) self.assertFalse(result["success"]) self.assertTrue(result["changed"])
def test_proof_read(self, text, result): self.page_mock.title_str = "RE:Aal" self.page_mock.text = text re_page = RePage(self.page_mock) article = re_page.splitted_article_list[0] task = SCANTask(None, self.logger) task.re_page = re_page compare(result, task._fetch_proof_read(article))
def test_sortkey(self): self.page_mock.text = """{{REDaten |BAND=I,1 |SORTIERUNG=Abalas limen }} text. {{REAutor|OFF}}""" article = RePage(self.page_mock).splitted_article_list[0] compare(({"sort_key": "Abalas limen"}, []), SCANTask._fetch_sort_key(article)) self.page_mock.text = """{{REDaten |BAND=I,1 }} text. {{REAutor|OFF}}""" article = RePage(self.page_mock).splitted_article_list[0] compare(({}, ["sort_key"]), SCANTask._fetch_sort_key(article))
def test_pages(self, text, expect): task = SCANTask(None, self.logger) self.page_mock.title_str = "RE:Aal" self.page_mock.text = text re_page = RePage(self.page_mock) task.re_page = re_page article = re_page.splitted_article_list[0] compare(expect, task._fetch_pages(article))
def test_double_article(self): self.text_mock.return_value = "{{REDaten}}\ntext0\n{{REAutor|Autor0.}}\n{{REDaten}}\n" \ "text1\n{{REAutor|Autor1.}}" re_page = RePage(self.page_mock) re_article_0 = re_page[0] re_article_1 = re_page[1] self.assertEqual("text0", re_article_0.text) self.assertEqual("text1", re_article_1.text)
def test_existing_short_description_to_lemma(self): self.page_mock.text = """{{REDaten |KURZTEXT=Test}} {{REAutor|Autor.}}""" self.page_mock.title_str = "Re:Aachen" re_page = RePage(self.page_mock) task = KURZTask(None, self.logger) compare({"success": True, "changed": False}, task.run(re_page)) compare("Test", re_page.first_article["KURZTEXT"].value)
def test_existing_verweis_dont_add(self): self.page_mock.text = """{{REDaten |VERWEIS=ON}} {{REAutor|Autor.}}""" self.page_mock.title_str = "Re:Aachen" re_page = RePage(self.page_mock) task = KURZTask(None, self.logger) compare({"success": True, "changed": False}, task.run(re_page)) compare("", re_page.first_article["KURZTEXT"].value)
def task(self): # pragma: no cover error_task = ERROTask(wiki=self.wiki, debug=False, logger=self.logger) for lemma in self.search_pages(): page = Page(self.wiki, lemma["title"]) temp_text = page.text try: temp_text = self.convert_all(temp_text) page.text = temp_text re_page = RePage(page) if not self.debug: re_page.save("Entfernen veralteter Vorlagen.") except (ReDatenException, ValueError): error = traceback.format_exc().splitlines()[-1] error_task.task(lemma["title"], error) error_task.finish_task() if self.search_pages(): return False return True
def test_get_wd_sitelink(self): WS_WIKI = pywikibot.Site(code="de", fam="wikisource", user="******") self.task.re_page = RePage(pywikibot.Page(WS_WIKI, "RE:Demetrios 79")) compare(({'wp_link': 'w:en:Demetrius the Chronographer'}, []), self.task._fetch_wp_link(self.task.re_page.splitted_article_list[0])) compare(({'ws_link': 's:de:Apokryphen/Demetrius der Chronograph'}, []), self.task._fetch_ws_link(self.task.re_page.splitted_article_list[0])) compare(({'wd_link': 'd:Q3705296'}, []), self.task._fetch_wd_link(self.task.re_page.splitted_article_list[0]))
def _create_mock_page(text: str = None, title: str = None): mock_item = MagicMock() if text: text_mock = PropertyMock(return_value=text) type(mock_item).text = text_mock if title: title_mock = Mock(return_value=title) type(mock_item).title = title_mock return RePage(mock_item)