def testPdfParserPhysical(self): """Get Pdf physical structure.""" url = "file://%s" % pdf_file_name pdf_parser = PdfParser(pdf_file_name, url, pdf_file_name) phys = pdf_parser.get_physical_structure() self.assertEqual(phys[0]['label'], pdf_file_name, "Physical Structure "\ "missmatch: %s != %s" % (phys[0]['label'], pdf_file_name))
def testPdfParserLogical(self): """Get Pdf logical structure.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) logic = pdf_parser.get_logical_structure() first_section = logic[0]["label"] self.assertEqual( first_section, "Introduction", "TOC is not well " "detected: %s != %s" % (first_section, "Introduction") )
def testPdfParserLogical(self): """Get Pdf logical structure.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) logic = pdf_parser.get_logical_structure() first_section = logic[0]['label'] self.assertEqual (first_section, 'Introduction', "TOC is not well "\ "detected: %s != %s" %(first_section, 'Introduction'))
def testPdfParserMeta(self): """Get Pdf Metadata.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) meta = pdf_parser.get_metadata() title = meta['title'] self.assertEqual(title, u'Multivio: Project description', "Metadata has "\ "not been correctly detected %s != %s" % (title, u'Multivio: Project description'))
def testPdfParserMetaFileSize(self): """Get Pdf Metadata and test file size.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) meta = pdf_parser.get_metadata() file_size = meta["fileSize"] ref_size = os.path.getsize(pdf_file_name) self.assertEqual( file_size, ref_size, "File size has " "not been correctly detected %s != %s" % (file_size, ref_size) )
def testPdfParserMetaFileSize(self): """Get Pdf Metadata and test file size.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) meta = pdf_parser.get_metadata() file_size = meta['fileSize'] ref_size = os.path.getsize(pdf_file_name) self.assertEqual(file_size, ref_size, "File size has "\ "not been correctly detected %s != %s" % (file_size, ref_size))
def testPdfParserPhysical(self): """Get Pdf physical structure.""" url = "file://%s" % pdf_file_name pdf_parser = PdfParser(pdf_file_name, url, pdf_file_name) phys = pdf_parser.get_physical_structure() self.assertEqual( phys[0]["label"], pdf_file_name, "Physical Structure " "missmatch: %s != %s" % (phys[0]["label"], pdf_file_name), )
def testPdfParserMeta(self): """Get Pdf Metadata.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) meta = pdf_parser.get_metadata() title = meta["title"] self.assertEqual( title, u"Multivio: Project description", "Metadata has " "not been correctly detected %s != %s" % (title, u"Multivio: Project description"), )
def testPdfParserLogicalSegfault(self): """Get Pdf logical structure for a malformed pdf: http://www.tendancesit.com/pdf/19.pdf.""" file_name = "examples/toc_segfault.pdf" try: file(file_name) except IOError: raise Exception( "Please go the examples directory and run: get_examples.sh") pdf_parser = PdfParser(file_name, "file://%s" % file_name, file_name) logic = pdf_parser.get_logical_structure() first_section = logic[0]['label'] first_section_ref = 'Tendances IT 19 WEB.pdf' self.assertEqual (first_section, first_section_ref, "TOC is not well "\ "detected: %s != %s" %(first_section, first_section_ref))
def testPdfParserLogicalSegfault(self): """Get Pdf logical structure for a malformed pdf: http://www.tendancesit.com/pdf/19.pdf.""" file_name = "examples/toc_segfault.pdf" try: file(file_name) except IOError: raise Exception("Please go the examples directory and run: get_examples.sh") pdf_parser = PdfParser(file_name, "file://%s" % file_name, file_name) logic = pdf_parser.get_logical_structure() first_section = logic[0]["label"] first_section_ref = "Tendances IT 19 WEB.pdf" self.assertEqual( first_section, first_section_ref, "TOC is not well " "detected: %s != %s" % (first_section, first_section_ref), )
def testPdfParser(self): """Check PdfParser instance.""" pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name) self.assert_(pdf_parser, "Can not create simple Parser Object")