Exemplos de ProofreadPage._do_hocr em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: pywikibot.proofreadpage

Classe / Tipo: ProofreadPage

Método / Função: _do_hocr

Exemplos em hotexamples.com: 2

ProofreadPage._do_hocr em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de pywikibot.proofreadpage.ProofreadPage._do_hocr em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

ProofreadPage(28)

_do_hocr(2)

_do_ocr(2)

index(2)

ocr(2)

text(2)

title(2)

_page_to_json(1)

namespace(1)

page_number(1)

Métodos Frequentes

ProofreadPage (28)

_do_hocr (2)

_do_ocr (2)

index (2)

ocr (2)

text (2)

title (2)

_page_to_json (1)

namespace (1)

page_number (1)

Exemplo n.º 1

0

Exibir arquivo

class TestPageOCR(TestCase): """Test page ocr functions.""" family = 'wikisource' code = 'en' cached = True data = {'title': 'Page:Popular Science Monthly Volume 1.djvu/10', 'hocr': (False, 'ENTERED, according to Act of Congress, in the ' 'year 1872,\nBY D. APPLETON & CO.,\nIn the Ofﬁce ' 'of the Librarian of Congress, at ' 'Washington.\n\n'), 'ocr': (False, 'lam-mam, according to Act of Congress, in the ' 'year 157-2,\nBY D. APPLEION Av CO.,\nIn the ' 'Of\ufb01ce or the Librarian of ' 'Congress, at Washington.\n\n'), 'googleOCR': (False, 'ENTERED, according to Act of Congress, in ' 'the year 1572,\nBY D. APPLETON & CO.\n' 'In the Office of the Librarian of ' 'Congress, at Washington.\n4 334\n'), } def setUp(self): """Test setUp.""" site = self.get_site() title = self.data['title'] self.page = ProofreadPage(site, title) super(TestPageOCR, self).setUp() def test_ocr_exceptions(self): """Test page.ocr() exceptions.""" self.assertRaises(TypeError, self.page.ocr, ocr_tool='dummy') def test_do_hocr(self): """Test page._do_hocr().""" error, text = self.page._do_hocr() ref_error, ref_text = self.data['hocr'] self.assertEqual(error, ref_error) self.assertEqual(text, ref_text) def test_do_ocr_phetools_raw_request(self): """Test page._do_ocr connection with wmflabs.""" uri = ('https://tools.wmflabs.org/phetools/ocr.php?cmd=ocr' '&url=https://upload.wikimedia.org/wikipedia/commons/' 'thumb/a/ac/Popular_Science_Monthly_Volume_1.djvu/' 'page10-1024px-Popular_Science_Monthly_Volume_1.djvu.jpg' '&lang=en&user=None') response = http.fetch(uri) self.assertEqual(response.status, 200) def test_do_ocr_phetools(self): """Test page._do_ocr(ocr_tool='phetools').""" error, text = self.page._do_ocr(ocr_tool='phetools') ref_error, ref_text = self.data['ocr'] self.assertEqual(error, ref_error) self.assertEqual(text, ref_text) def test_do_ocr_googleocr(self): """Test page._do_ocr(ocr_tool='googleOCR').""" error, text = self.page._do_ocr(ocr_tool='googleOCR') ref_error, ref_text = self.data['googleOCR'] self.assertEqual(error, ref_error) self.assertEqual(text, ref_text) def test_ocr_googleocr(self): """Test page.ocr(ocr_tool='googleOCR').""" text = self.page.ocr(ocr_tool='googleOCR') ref_error, ref_text = self.data['googleOCR'] self.assertEqual(text, ref_text)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: proofreadpage_tests.py Projeto: xaster-Kies/pywikibot

class TestPageOCR(BS4TestCase): """Test page ocr functions.""" family = 'wikisource' code = 'en' cached = True data = { 'title': 'Page:Popular Science Monthly Volume 1.djvu/10', 'hocr': (False, 'ENTERED, according to Act of Congress, in the ' 'year 1872,\nBY D. APPLETON & CO.,\nIn the Ofﬁce ' 'of the Librarian of Congress, at ' 'Washington.\n\n'), 'ocr': (False, 'EsTEnen, according to Act of Congress, in the ' 'year 1872,\nBy D. APPLETON & CO.,\nIn the ' 'Office of the Librarian of Congress, at ' 'Washington.\n\u000c'), 'googleOCR': (False, 'ENTERED, according to Act of Congress, in ' 'the year 1572,\nBY D. APPLETON & CO.\n' 'In the Office of the Librarian of ' 'Congress, at Washington.\n4 334\n'), } def setUp(self): """Test setUp.""" site = self.get_site() title = self.data['title'] self.page = ProofreadPage(site, title) super().setUp() def test_ocr_exceptions(self): """Test page.ocr() exceptions.""" self.assertRaises(TypeError, self.page.ocr, ocr_tool='dummy') def test_do_hocr(self): """Test page._do_hocr().""" error, text = self.page._do_hocr() if error: self.skipTest(text) ref_error, ref_text = self.data['hocr'] self.assertEqual(error, ref_error) s = difflib.SequenceMatcher(None, text, ref_text) self.assertGreater(s.ratio(), 0.9) def test_do_ocr_phetools(self): """Test page._do_ocr(ocr_tool='phetools').""" error, text = self.page._do_ocr(ocr_tool='phetools') ref_error, ref_text = self.data['ocr'] if error: self.skipTest(text) self.assertEqual(error, ref_error) s = difflib.SequenceMatcher(None, text, ref_text) self.assertGreater(s.ratio(), 0.9) def test_do_ocr_googleocr(self): """Test page._do_ocr(ocr_tool='googleOCR').""" error, text = self.page._do_ocr(ocr_tool='googleOCR') if error: self.skipTest(text) ref_error, ref_text = self.data['googleOCR'] self.assertEqual(error, ref_error) s = difflib.SequenceMatcher(None, text, ref_text) self.assertGreater(s.ratio(), 0.9) def test_ocr_googleocr(self): """Test page.ocr(ocr_tool='googleOCR').""" try: text = self.page.ocr(ocr_tool='googleOCR') except Exception as exc: self.assertIsInstance(exc, ValueError) else: ref_error, ref_text = self.data['googleOCR'] s = difflib.SequenceMatcher(None, text, ref_text) self.assertGreater(s.ratio(), 0.9)