def test_index(self): """Test index property.""" # Page with Index. page = ProofreadPage(self.site, self.valid['title']) index_page = IndexPage(self.site, self.valid['index']) # Test property. self.assertEqual(page.index, index_page) # Test deleter del page.index self.assertFalse(hasattr(page, '_index')) # Test setter with wrong type. self.assertRaises(TypeError, setattr, page, 'index', 'invalid index') # Test setter with correct type. page.index = index_page self.assertEqual(page.index, index_page) # Page without Index. page = ProofreadPage(self.site, self.existing_multilinked['title']) index_page_1 = IndexPage(self.site, self.existing_multilinked['index_1']) index_page_2 = IndexPage(self.site, self.existing_multilinked['index_2']) self.assertEqual(page.index, index_page_1) self.assertNotEqual(page.index, index_page_2) self.assertEqual(page._index, (index_page_1, [index_page_2])) # Page without Index. page = ProofreadPage(self.site, self.existing_unlinked['title']) self.assertIs(page.index, None) self.assertEqual(page._index, (None, []))
def test_url_image(self): """Test fetching of url image of the scan of ProofreadPage.""" page = ProofreadPage(self.site, self.valid['title']) self.assertEqual(page.url_image, self.valid['url_image']) page = ProofreadPage(self.site, self.valid_redlink['title']) self.assertEqual(page.url_image, self.valid_redlink['url_image']) page = ProofreadPage(self.site, self.existing_unlinked['title']) # test Exception in property. self.assertRaises(ValueError, getattr, page, 'url_image')
def test_valid_parsing(self): """Test ProofreadPage page parsing functions.""" page = ProofreadPage(self.site, self.valid['title']) self.assertEqual(page.ql, self.valid['ql']) self.assertEqual(page.user, self.valid['user']) self.assertEqual(page.header, self.valid['header']) self.assertEqual(page.footer, self.valid['footer'])
def test_applicable_quality_level(self): """Test Page.quality_level when applicable.""" site = self.get_site() title = 'Page:Popular Science Monthly Volume 49.djvu/1' page = ProofreadPage(site, title) self.assertEqual(page.content_model, 'proofread-page') self.assertEqual(page.quality_level, 0)
def process_pages(self, temp_data): for idx, lemma in enumerate(self.lemmas): try: hit = self.regex_page.search(lemma["title"]) year = hit.group(1) page = hit.group(2) if year not in self.data["pages"].keys(): self.data["pages"][year] = {} proofread_lemma = ProofreadPage(self.wiki, f"Seite:{lemma['title']}") if self.debug: self.logger.debug( f"{idx + 1}/{len(self.lemmas)} Page {page}({year}) " f"has quality level {proofread_lemma.quality_level} " f"_ Seite:{lemma['title']}") ref = search_for_refs(proofread_lemma.text) page_dict = {"q": int(proofread_lemma.quality_level)} if ref: self.logger.debug( f"There are refs ({ref}) @ {year}, {page}") page_dict.update({"r": ref}) self.data["pages"][year][page] = page_dict if year not in temp_data.keys(): temp_data[year] = [] temp_data[year].append(page) except Exception as error: # pylint: disable=broad-except self.logger.error( f"wasn't able to process {lemma['title']}, error: {error}")
def test_preload_from_not_existing_page(self): """Test ProofreadPage page decomposing/composing text.""" page = ProofreadPage(self.site, 'dummy test page') self.assertEqual( page.text, '<noinclude><pagequality level="1" user="" />' '<div class="pagetext">\n\n\n</noinclude>' '<noinclude><references/></div></noinclude>')
def test_preload_from_empty_text(self): """Test ProofreadPage page decomposing/composing text.""" page = ProofreadPage(self.site, 'dummy test page') page.text = '' self.assertEqual( page.text, '<noinclude><pagequality level="1" user="******" />' '<div class="pagetext">\n\n\n</noinclude>' '<noinclude></div></noinclude>' % self.site.username())
def gen(self): """Generate pages from specified page interval.""" for page_number in self.page_number_gen(): title = '{prefix}/{number}'.format(prefix=self._prefix, number=page_number) page = ProofreadPage(self._index.site, title) page.page_number = page_number # remember page number in djvu file yield page
def test_valid_link_source(self): """Test ProofreadPage from valid Link as source.""" source = pywikibot.Link(self.valid['title'], source=self.site, default_namespace=self.site.proofread_page_ns) page = ProofreadPage(source) self.assertEqual(page.title(with_ns=False), source.title) self.assertEqual(page.namespace(), source.namespace)
def test_invalid_not_existing_page_source(self): """Test ProofreadPage from invalid not existing Page as source.""" # namespace is forced source = pywikibot.Page(self.site, self.not_existing_invalid['title']) fixed_source = pywikibot.Page(self.site, source.title(with_ns=False), ns=self.site.proofread_page_ns) page = ProofreadPage(fixed_source) self.assertEqual(page.title(), fixed_source.title())
def test_parse_title(self, key): """Test ProofreadPage_parse_title() function.""" data = self.sites[key] title = data['title'] base, base_ext, num = data['tuple'] page = ProofreadPage(self.site, title) self.assertEqual(page._base, base) self.assertEqual(page._base_ext, base_ext) self.assertEqual(page._num, num)
def test_preload_from_empty_text(self): """Test ProofreadPage page decomposing/composing text.""" page = ProofreadPage(self.site, 'Page:dummy test page') page.text = '' class_pagetext, div = self.class_pagetext_fmt[page._full_header._has_div] self.assertEqual(page.text, self.fmt.format(user=self.site.username(), class_pagetext=class_pagetext, references='', div_end=div))
def test_preload_from_not_existing_page(self): """Test ProofreadPage page decomposing/composing text.""" page = ProofreadPage(self.site, 'Page:dummy test page') # Fetch page text to instantiate page._full_header, in order to allow # for proper test result preparation. page.text class_pagetext, div = self.class_pagetext_fmt[page._full_header._has_div] self.assertEqual(page.text, self.fmt.format(user=self.site.username(), class_pagetext=class_pagetext, references='<references/>', div_end=div))
def setUpClass(cls): """Prepare get_page dataset for tests.""" super(TestIndexPageMappings, cls).setUpClass() for key, site_def in cls.sites.items(): site = cls.get_site(name=key) base_title = site_def['page'] # 'get_page' has same structure as 'get_number'. site_def['get_page'] = [] for label, page_numbers in site_def['get_number']: page_set = {ProofreadPage(site, base_title.format(i)) for i in page_numbers} site_def['get_page'].append([label, page_set])
def test_get_labels(self, key): """Test IndexPage page get_label_from_* functions.""" data = self.sites[key] num, title_num, label = data['get_label'] index_page = IndexPage(self.site, self.sites[key]['index']) page_title = self.sites[key]['page'].format(title_num) proofread_page = ProofreadPage(self.site, page_title) # Get label from number. self.assertEqual(index_page.get_label_from_page_number(num), label) # Error if number does not exists. self.assertRaises(KeyError, index_page.get_label_from_page_number, -1) # Get label from page. self.assertEqual(index_page.get_label_from_page(proofread_page), label) # Error if page does not exists. self.assertRaises(KeyError, index_page.get_label_from_page, None)
def test_page_gen(self, key): """Test Index page generator.""" data = self.sites[key] num, title_num, label = data['get_label'] index_page = IndexPage(self.site, self.sites[key]['index']) page_title = self.sites[key]['page'].format(title_num) proofread_page = ProofreadPage(self.site, page_title) # Check start/end limits. self.assertRaises(ValueError, index_page.page_gen, -1, 2) self.assertRaises(ValueError, index_page.page_gen, 1, -1) self.assertRaises(ValueError, index_page.page_gen, 2, 1) # Check quality filters. gen = index_page.page_gen(num, num, filter_ql=range(5)) self.assertEqual(list(gen), [proofread_page]) gen = index_page.page_gen(num, num, filter_ql=[0]) self.assertEqual(list(gen), [])
def test_json_format(self): """Test conversion to json format.""" page = ProofreadPage(self.site, self.valid['title']) rvargs = {'rvprop': 'ids|flags|timestamp|user|comment|content', 'rvcontentformat': 'application/json', 'titles': page, } rvgen = self.site._generator(api.PropertyGenerator, type_arg='info|revisions', total=1, **rvargs) rvgen.set_maximum_items(-1) # suppress use of rvlimit parameter try: pagedict = next(iter(rvgen)) loaded_text = pagedict.get('revisions')[0].get('*') except (StopIteration, TypeError, KeyError, ValueError, IndexError): page_text = '' page_text = page._page_to_json() self.assertEqual(json.loads(page_text), json.loads(loaded_text))
def setUp(self): """Set up test case.""" self._page = ProofreadPage( self.site, 'Page:Popular Science Monthly Volume 1.djvu/12') super(TestLoadRevisionsCachingProofreadPage, self).setUp()
def setUp(self): """Set up test case.""" self._page = ProofreadPage( self.site, 'Page:Popular Science Monthly Volume 1.djvu/12') super(TestBasePageMethodsProofreadPage, self).setUp()
def setUp(self): """Test setUp.""" site = self.get_site() title = self.data['title'] self.page = ProofreadPage(site, title) super(TestPageOCR, self).setUp()
def test_invalid_existing_page_source(self): """Test ProofreadPage from invalid existing Page as source.""" source = pywikibot.Page(self.site, self.existing_invalid['title']) with self.assertRaises(ValueError): ProofreadPage(source)
def test_invalid_not_existing_page_source_wrong_ns(self): """Test ProofreadPage from Page not existing in non-Page ns.""" source = pywikibot.Page(self.site, self.not_existing_invalid['title1']) with self.assertRaises(ValueError): ProofreadPage(source)
def test_decompose_recompose_text(self): """Test ProofreadPage page decomposing/composing text.""" page = ProofreadPage(self.site, self.valid['title']) plain_text = pywikibot.Page(self.site, self.valid['title']).text assert page.text self.assertEqual(plain_text, page.text)
def test_invalid_link_source(self): """Test ProofreadPage from invalid Link as source.""" source = pywikibot.Link(self.not_existing_invalid['title'], source=self.site) with self.assertRaises(ValueError): ProofreadPage(source)
def test_div_in_footer(self): """Test ProofreadPage page parsing functions.""" page = ProofreadPage(self.site, self.div_in_footer['title']) self.assertTrue(page.footer.endswith('</div>'))
def test_invalid_site_source(self): """Test ProofreadPage from invalid Site as source.""" with self.assertRaises(pywikibot.UnknownExtension): ProofreadPage(self.site, 'title')
def setUpClass(cls): """Prepare tests by creating page instances.""" super(TestIndexPageMappingsRedlinks, cls).setUpClass() cls.index = IndexPage(cls.site, cls.index_name) cls.pages = [ProofreadPage(cls.site, page) for page in cls.page_names] cls.missing = ProofreadPage(cls.site, cls.missing_name)
def test_valid_site_source(self): """Test ProofreadPage from valid Site as source.""" page = ProofreadPage(self.site, 'Page:dummy test page') self.assertEqual(page.namespace(), self.site.proofread_page_ns)