Exemplo n.º 1
0
class JobRunnerTestCase(unittest.TestCase):

    def setUp(self):
        self.metadata_extractor = MetadataExtractor()

    def test_extract_data(self):
        info = self.metadata_extractor.extract('./pdf/realPdf.pdf')
        self.assertIsNotNone(info['metadata'])
        self.assertIsNotNone(info['text'])

    def test_get_pdf_index_info(self):
        info = self.metadata_extractor.extract('./pdf/M13_LAMAISON.pdf')
        pdf_index_info = self.metadata_extractor.get_index_info(info)
        self.assertEqual(pdf_index_info['Author'], "Maxime")
        self.assertIsNotNone(pdf_index_info['contenu'], "bla bla")
Exemplo n.º 2
0
def index_pdf(pdf_path):
    metadata_extractor = MetadataExtractor()
    mongo_api = MongoApi("localhost", 27017)
    info = metadata_extractor.extract(pdf_path)
    index_info = metadata_extractor.get_index_info(info)
    mongo_api.push(index_info)
Exemplo n.º 3
0
 def setUp(self):
     self.metadata_extractor = MetadataExtractor()