Пример #1
0
 def test_bmp(self):
     parser = RasterisedDocumentParser(None)
     parser.parse(os.path.join(self.SAMPLE_FILES, "simple.bmp"),
                  "image/bmp")
     self.assertTrue(os.path.isfile(parser.archive_path))
     self.assertTrue("this is a test document" in parser.get_text().lower())
Пример #2
0
 def test_skip_noarchive_notext(self):
     parser = RasterisedDocumentParser(None)
     parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf")
     self.assertTrue(os.path.join(parser.archive_path))
     self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2", "page 3"])
Пример #3
0
 def test_multi_page_pages_force(self):
     parser = RasterisedDocumentParser(None)
     parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf")
     self.assertTrue(os.path.isfile(parser.archive_path))
     self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2", "page 3"])
Пример #4
0
 def test_multi_page_analog_pages_redo(self):
     parser = RasterisedDocumentParser(None)
     parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf")
     self.assertTrue(os.path.isfile(parser.archive_path))
     self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
     self.assertFalse("page 3" in parser.get_text().lower())
Пример #5
0
    def test_with_form_force(self):
        parser = RasterisedDocumentParser(None)

        parser.parse(os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf")

        self.assertContainsStrings(parser.get_text(), ["Please enter your name in here:", "This is a PDF document with a form."])