Пример #1
0
 def getPDFInfoForTestString(self, filename):
     fp = open(filename, 'rb') 
     codec = 'utf-8'
     laparams = LAParams()
     parser = PDFParser(fp)
     doc = PDFDocument()
     parser.set_document(doc)
     doc.set_parser(parser)
     doc.initialize('')
     if not doc.is_extractable:
         raise PDFTextExtractionNotAllowed
     rsrcmgr = PDFResourceManager()
     device = TagExtractor2Memory(rsrcmgr, codec=codec)
     interpreter = PDFPageInterpreter(rsrcmgr, device)
     #outfp.write(filename[-11:-4]+"\n")
     #print filename[-11:-4]+"\n"#uncomment for testing
     PDFInfo=''
     for i,page in enumerate(doc.get_pages()):
         PDFInfo+=interpreter.process_page_to_mem(page)
         if i==10:
             return PDFInfo