def task(self) -> bool: active_tasks = self._activate_tasks() error_task = ERROTask(wiki=self.wiki, debug=self.debug, logger=self.logger) lemma_list = self.compile_lemma_list() self.logger.info("Start processing the lemmas.") processed_lemmas = 0 for idx, lemma in enumerate(lemma_list): self.logger.debug( f"Process [https://de.wikisource.org/wiki/{lemma} {lemma}]") list_of_done_tasks = [] try: re_page = RePage(pywikibot.Page(self.wiki, lemma)) except ReDatenException: error = traceback.format_exc().splitlines()[-1] self.logger.error( f"The initiation of [[{lemma}]] went wrong: {error}") error_task.append_error(lemma, error) # remove Key from database if it was saved before with suppress(KeyError): del self.data[lemma] continue except pywikibot.exceptions.TimeoutError: self.logger.error(f"Timeout at lemma ({lemma}) creation") continue if re_page.has_changed(): list_of_done_tasks.append("BASE") for task in active_tasks: processed_task = self._process_task(task, re_page, lemma) if processed_task: list_of_done_tasks.append(processed_task) if list_of_done_tasks and re_page.is_writable: processed_lemmas += 1 if not self.debug: self._save_re_page(re_page, list_of_done_tasks) self._add_lemma_to_data(lemma) if self._watchdog(): self.logger.info( f"{idx} Lemmas processed, {processed_lemmas} changed.") self.logger.info( f"Oldest processed item: {datetime.now() - self.get_oldest_datetime()}" ) break for task in active_tasks: task.finish_task() error_task.finish_task() return True
def test_has_not_changed(self): self.text_mock.return_value = ARTICLE_TEMPLATE re_page = RePage(self.page_mock) self.assertFalse(re_page.has_changed())
def test_has_changed(self): self.text_mock.return_value = "{{REDaten}}text{{REAutor|Autor.}}" re_page = RePage(self.page_mock) self.assertTrue(re_page.has_changed())