示例#1
0
def testScannedPdfPageExtraction(self):
    pdfSeparate = PdfSeparate("tests/sample-scanned.pdf", self.outdir)
    pdfSeparate.extractPages()
    self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.pdf")))
    self.assertTrue(os.path.isfile(os.path.join(self.outdir, "2.pdf")))
    self.assertTrue(os.path.isfile(os.path.join(self.outdir, "3.pdf")))
    self.assertTrue(os.path.isfile(os.path.join(self.outdir, "4.pdf")))
    self.assertTrue(os.path.isfile(os.path.join(self.outdir, "5.pdf")))
def testScannedPdfPageExtraction(self):
        pdfSeparate = PdfSeparate('tests/sample-scanned.pdf', self.outdir)
        pdfSeparate.extractPages()
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"1.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"2.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"3.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"4.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"5.pdf")))
示例#3
0
 def separatePdfPages(self):
     self.logger.info('Calling PdfTkseparate: Separating pdf to pages at %s', os.path.join(self.outputDir,'pages'))
     pdfTkSeparate = PdfTkSeparate(self.filePath, os.path.join(self.outputDir,'pages'))
     pdfTkProcessStatus = pdfTkSeparate.extractPages()
     self.logger.info('PdfTkseparate Status: %s', pdfTkProcessStatus)
     if pdfTkProcessStatus != 0:
         self.logger.info('Calling Pdfseparate: Separating pdf to pages at %s', os.path.join(self.outputDir,'pages'))
         pdfSeparate = PdfSeparate(self.filePath, os.path.join(self.outputDir,'pages'))
         pdfSeparate.extractPages()
 def testScannedPdfPageForUnauthorisec(self):
     pdfSeparate = PdfSeparate("tests/sample-scanned-1.pdf", self.indir)
     pdfSeparate.extractPages()
     self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf")))
     try:
         abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1, "english")
         abbyyPdf.setApplicationCredentials("nouser", "nopassword")
         abbyyPdf.processPdfPage(1)
     except HTTPError as e:
         self.assertEqual(e.code, 401)
         self.assertEqual(e.reason, "Unauthorized")
 def testScannedPdfPageForUnauthorisec(self):
     pdfSeparate = PdfSeparate('tests/sample-scanned-1.pdf', self.indir)
     pdfSeparate.extractPages()
     self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf")))
     try:
         abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1,
                                          "english")
         abbyyPdf.setApplicationCredentials('nouser', 'nopassword')
         abbyyPdf.processPdfPage(1)
     except HTTPError as e:
         self.assertEqual(e.code, 401)
         self.assertEqual(e.reason, "Unauthorized")
    def testScanned44PdfPageForNetwork(self):
        pdfSeparate = PdfSeparate('tests/sample-scanned-44pages.pdf', self.indir)
        pdfSeparate.extractPages()
        self.assertTrue(os.path.isfile(os.path.join(self.indir,"1.pdf")))

        try:
            abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 44, "english")
            abbyyPdf.setApplicationCredentials(self.configParser.get('abbyy','appid'), self.configParser.get('abbyy','password'))
            abbyyPdf.extractPages();
            self.assertTrue(os.path.isfile(os.path.join(self.outdir,"1.txt")))
            self.assertTrue(os.path.isfile(os.path.join(self.outdir,"44.txt")))
        except Exception:
            pass
    def testScannedPdfPage(self):
        pdfSeparate = PdfSeparate("tests/sample-scanned-1.pdf", self.indir)
        pdfSeparate.extractPages()
        self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf")))

        try:
            abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1, "english")
            abbyyPdf.setApplicationCredentials(
                self.configParser.get("abbyy", "appid"), self.configParser.get("abbyy", "password")
            )
            abbyyPdf.processPdfPage(1)
            self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.txt")))
        except Exception:
            pass
    def testScannedPdfPage(self):
        pdfSeparate = PdfSeparate('tests/sample-scanned-1.pdf', self.indir)
        pdfSeparate.extractPages()
        self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf")))

        try:
            abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1,
                                             "english")
            abbyyPdf.setApplicationCredentials(
                self.configParser.get('abbyy', 'appid'),
                self.configParser.get('abbyy', 'password'))
            abbyyPdf.processPdfPage(1)
            self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.txt")))
        except Exception:
            pass