def test_image_support(self):
     results = pdf_extraction(PDF, images=True)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert 'images' in results.keys()
     assert isinstance(results.get('images'), list)
     assert isinstance(results.get('images')[0], PIL.JpegImagePlugin.JpegImageFile)
Пример #2
0
 def test_url_support(self):
     url = "https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf"
     results = pdf_extraction(url)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert isinstance(results.get('text'), six.string_types)
     assert isinstance(results.get('metadata'), dict)
Пример #3
0
 def test_image_support(self):
     results = pdf_extraction(PDF, images=True)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert 'images' in results.keys()
     assert isinstance(results.get('images'), list)
     assert isinstance(results.get('images')[0], PpmImageFile)
 def test_url_support(self):
     url = "https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf"
     results = pdf_extraction(url)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert isinstance(results.get('text'), six.string_types)
     assert isinstance(results.get('metadata'), dict)
 def test_image_support(self):
     results = pdf_extraction(PDF, raw_text=True, images=True)
     assert "raw_text" in results.keys()
     assert "metadata" in results.keys()
     assert "images" in results.keys()
     assert isinstance(results.get("images"), list)
     assert isinstance(
         results.get("images")[0], PIL.JpegImagePlugin.JpegImageFile)
Пример #6
0
 def test_table_support(self):
     results = pdf_extraction(PDF, tables=True)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert 'tables' in results.keys()
     assert isinstance(results.get('tables'), list)
Пример #7
0
 def test_pdf_extraction(self):
     results = pdf_extraction(PDF)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert isinstance(results.get('text'), six.string_types)
     assert isinstance(results.get('metadata'), dict)
 def test_table_support(self):
     results = pdf_extraction(PDF, tables=True)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert 'tables' in results.keys()
     assert isinstance(results.get('tables'), list)
 def test_pdf_extraction(self):
     results = pdf_extraction(PDF)
     assert 'text' in results.keys()
     assert 'metadata' in results.keys()
     assert isinstance(results.get('text'), six.string_types)
     assert isinstance(results.get('metadata'), dict)
Пример #10
0
import indicoio, os, json

indicoio.config.api_key = '27df1eee04c5b65fb3113e9458d1d701'

fileDir = os.path.dirname(os.path.realpath('__file__'))
fileResumeTxt = open(os.path.join(fileDir, "data/resume.txt"), 'w')

resume = "data/resumePDF.pdf"
print(json.dumps(indicoio.pdf_extraction(resume)))
 def test_table_support(self):
     results = pdf_extraction(PDF, raw_text=True, tables=True)
     assert "raw_text" in results.keys()
     assert "metadata" in results.keys()
     assert "tables" in results.keys()
     assert isinstance(results.get("tables"), list)
 def test_pdf_extraction_batch(self):
     results = pdf_extraction([PDF])
     assert isinstance(results, list)
 def test_pdf_extraction(self):
     results = pdf_extraction(PDF, raw_text=True)
     assert "raw_text" in results.keys()
     assert "metadata" in results.keys()
     assert isinstance(results.get("raw_text"), six.string_types)
     assert isinstance(results.get("metadata"), dict)