class JobRunnerTestCase(unittest.TestCase): def setUp(self): self.metadata_extractor = MetadataExtractor() def test_extract_data(self): info = self.metadata_extractor.extract('./pdf/realPdf.pdf') self.assertIsNotNone(info['metadata']) self.assertIsNotNone(info['text']) def test_get_pdf_index_info(self): info = self.metadata_extractor.extract('./pdf/M13_LAMAISON.pdf') pdf_index_info = self.metadata_extractor.get_index_info(info) self.assertEqual(pdf_index_info['Author'], "Maxime") self.assertIsNotNone(pdf_index_info['contenu'], "bla bla")
def index_pdf(pdf_path): metadata_extractor = MetadataExtractor() mongo_api = MongoApi("localhost", 27017) info = metadata_extractor.extract(pdf_path) index_info = metadata_extractor.get_index_info(info) mongo_api.push(index_info)
def setUp(self): self.metadata_extractor = MetadataExtractor()