def test_for_loop(self): pdf = pdftotext.PDF(get_file("two_page.pdf")) result = "" for page in pdf: result = result + page self.assertIn("one", result) self.assertIn("two", result)
def test_read_landscape_0(self): pdf = pdftotext.PDF(get_file("landscape_0.pdf")) result = pdf[0] self.assertIn("a", result) self.assertIn("b", result) self.assertIn("c", result) self.assertIn("d", result)
def test_read_portrait(self): pdf = pdftotext.PDF(get_file("portrait.pdf")) result = pdf[0] self.assertIn("a", result) self.assertIn("b", result) self.assertIn("c", result) self.assertIn("d", result)
def test_read_landscape_90(self): pdf = pdftotext.PDF(get_file("landscape_90.pdf"), layout=pdftotext.TextLayout.raw, encoding='latin1') result = pdf[0] self.assertIn("a", result) self.assertIn("b", result) self.assertIn("c", result) self.assertIn("d", result)
def test_raw_vs_not(self): filename = "table.pdf" pdf = pdftotext.PDF(get_file(filename)) raw_pdf = pdftotext.PDF(get_file(filename), layout=pdftotext.TextLayout.raw) self.assertNotEqual(pdf[0], raw_pdf[0])
def test_iter(self): pdf = pdftotext.PDF(get_file('two_page.pdf')) self.assertEqual([x.strip() for x in list(pdf)], ['one.', 'two.'])
def test_read(self): pdf = pdftotext.PDF(get_file("abcde.pdf")) result = pdf[0] self.assertIn("abcde", result)
def test_init_invalid_pdf_file(self): pdf_file = io.BytesIO(b"wrong") with self.assertRaises(TypeError): pdftotext.PDF(pdf_file)
def test_init_corrupt_pdf_file(self): with self.assertRaises(ValueError): pdftotext.PDF(get_file("corrupt.pdf"))
def test_double_init_success(self): pdf = pdftotext.PDF(get_file("abcde.pdf")) pdf.__init__(get_file("blank.pdf")) self.assertEqual(len(pdf), 1)
def test_init_file_in_text_mode(self): text_file = io.StringIO(u"wrong") with self.assertRaises(TypeError): pdftotext.PDF(text_file)
def test_read_corrupt_page(self): with self.assertRaises(IndexError): pdf = pdftotext.PDF(get_file("corrupt_page.pdf")) pdf[0]
def test_pdf_read_wrong_arg_type(self): pdf = pdftotext.PDF(get_file("blank.pdf")) with self.assertRaises(TypeError): pdf["wrong"]
def test_read_page_two(self): pdf = pdftotext.PDF(get_file("two_page.pdf")) result = pdf[1] self.assertIn("two", result)
def test_list_last_element(self): pdf = pdftotext.PDF(get_file("two_page.pdf")) self.assertIn("two", pdf[-1])
def test_list_invalid_element(self): pdf = pdftotext.PDF(get_file("two_page.pdf")) with self.assertRaises(IndexError): pdf[2]
def test_list_length(self): pdf = pdftotext.PDF(get_file("two_page.pdf")) self.assertEqual(len(pdf), 2)
def test_length_one(self): pdf = pdftotext.PDF(get_file("blank.pdf")) self.assertEqual(len(pdf), 1)
def test_raw_is_not_default(self): filename = "table.pdf" pdf_default = pdftotext.PDF(get_file(filename)) pdf_raw_false = pdftotext.PDF(get_file(filename), layout=pdftotext.TextLayout.physical) self.assertEqual(pdf_default[0], pdf_raw_false[0])
def test_pdf_read_invalid_page_number(self): pdf = pdftotext.PDF(get_file("blank.pdf")) with self.assertRaises(IndexError): pdf[100]