Python PdfFileReader.convert示例

def correctOrientationProc(orientation_FileName):
    global Rotated_FolderPath
    global imagePath
    global imagePath1
    global imageName
    global imageName1
    Rotated_FolderPath = "Rotated"
    if not os.path.exists(Rotated_FolderPath):
        os.makedirs(Rotated_FolderPath)
    global extractData
    tryingCount = 1
    correctOri = 0
    OrientationDegrees = -1
    temp_filename = orientation_FileName.rsplit(".", 1)
    while (tryingCount <= 4):
        if correctOri == 0:
            if tryingCount == 1:
                ORIENTATION_FILENAME = orientation_FileName
                orient_FilePath = os.path.join(splitFolderPath,
                                               orientation_FileName)
                try:
                    pdf = wi(filename=orient_FilePath, resolution=450)
                    pdfImage = pdf.convert('jpeg')
                except Exception as e:
                    time.sleep(15)
                    pdf = wi(filename=orient_FilePath, resolution=450)
                    pdfImage = pdf.convert('jpeg')
                imageBlobs = []
                for img in pdfImage.sequence:
                    imgPage = wi(image=img)
                    try:
                        imageBlobs.append(imgPage.make_blob('jpeg'))
                    except Exception as e:
                        print(e)

                var = []
                for imgBlob in imageBlobs:
                    im = Image.open(io.BytesIO(imgBlob))
                    text = pytesseract.image_to_string(im, lang='eng')
                    var.append(text)
                    if not typeOfOutput == 1:
                        imageName = temp_filename[0] + desiredExtensionOutput
                        imagePath = os.path.join(
                            desiredOutputPath,
                            temp_filename[0] + desiredExtensionOutput)
                        im.save(imagePath)
                    im.close()
                extractData = str(var)
                extractData = extractData.replace('\\n', ' ')
                extractData = extractData.lower()
                global f
                f = open(orient_FilePath, "rb")
                pdf = PdfFileReader(f)
                pageObj = pdf.getPage(0)
                pdf_writer = PdfFileWriter()
                OrientationDegrees = pageObj.get('/Rotate')
                temp_degree = 0
                if OrientationDegrees == 0:
                    temp_degree = 1
                elif OrientationDegrees == 90:
                    temp_degree = 1
                elif OrientationDegrees == 180:
                    temp_degree = 0
                elif OrientationDegrees == 270:
                    temp_degree = 0
                else:
                    temp_degree = 1

                if temp_degree == 1:
                    pdf_writer = PdfFileWriter()
                    imageName1 = temp_filename[
                        0] + "_rotated_1" + desiredExtensionOutput
                    filename = temp_filename[0] + "_rotated_1.pdf"
                    filename = os.path.join(Rotated_FolderPath, filename)
                    RotatedForced90_name = filename
                    pageObj.rotateClockwise(270)
                    pdf_writer.addPage(pageObj)
                    newFile = open(filename, 'wb')
                    fileHandles.append(newFile)
                    pdf_writer.write(newFile)
                    newFile.close()
                    #tryingCount=2

                    try:
                        pdf = wi(filename=filename, resolution=450)
                        pdfImage = pdf.convert('jpeg')
                    except Exception as e:
                        time.sleep(15)
                        pdf = wi(filename=filename, resolution=450)
                        pdfImage = pdf.convert('jpeg')
                    imageBlobs = []
                    for img in pdfImage.sequence:
                        imgPage = wi(image=img)
                        try:
                            imageBlobs.append(imgPage.make_blob('jpeg'))
                        except Exception as e:
                            print(e)

                    var = []
                    for imgBlob in imageBlobs:
                        im = Image.open(io.BytesIO(imgBlob))
                        text = pytesseract.image_to_string(im, lang='eng')
                        var.append(text)
                        if not typeOfOutput == 1:
                            imagePath1 = os.path.join(desiredOutputPath,
                                                      imageName1)

                            im.save(imagePath1)
                        im.close()
                    extractData1 = str(var)
                    extractData1 = extractData1.replace('\\n', ' ')
                    extractData1 = extractData1.lower()
                    mainCount = 0
                    rotatedCount = 0
                    #print(extractData)
                    for index in range(0, len(orientation_keyword)):
                        if orientation_keyword[index].lower(
                        ) in extractData.lower():
                            mainCount = mainCount + 1
                    #print(mainCount)
                    #print("---------------")
                    #print(extractData1)

                    for index in range(0, len(orientation_keyword)):
                        if orientation_keyword[index].lower(
                        ) in extractData1.lower():
                            rotatedCount = rotatedCount + 1
                    #print(rotatedCount)
                    if rotatedCount >= mainCount:
                        tryingCount = 2
                        extractData = extractData1
                        print("Forcefully rotating the page by 270 degrees")

            else:
                imageName1 = temp_filename[0] + "_rotated_" + str(
                    tryingCount - 1) + desiredExtensionOutput
                filename = temp_filename[0] + "_rotated_" + str(tryingCount -
                                                                1)
                filename = filename + ".pdf"
                FILENAME = filename
                filename = os.path.join(Rotated_FolderPath, filename)
                try:
                    pdf = wi(filename=filename, resolution=450)
                    pdfImage = pdf.convert('jpeg')
                except:
                    iserror = 1
                    noOfTries = 1
                    while (iserror == 1 and noOfTries <= 3):
                        try:
                            iserror = 0
                            time.sleep(5)
                            pdf = wi(filename=filename, resolution=450)
                            pdfImage = pdf.convert('jpeg')
                        except:
                            noOfTries = noOfTries + 1
                            iserror = 1
                            time.sleep(5)
                imageBlobs = []
                for img in pdfImage.sequence:
                    imgPage = wi(image=img)
                    try:
                        imageBlobs.append(imgPage.make_blob('jpeg'))
                    except Exception as e:
                        print(e)

                var = []
                for imgBlob in imageBlobs:
                    im = Image.open(io.BytesIO(imgBlob))
                    text = pytesseract.image_to_string(im, lang='eng')
                    var.append(text)
                    if not typeOfOutput == 1:
                        imagePath1 = os.path.join(desiredOutputPath,
                                                  imageName1)
                        im.save(imagePath1)
                    im.close()
                extractData = str(var)

            filename1 = temp_filename[0] + "_rotated_" + str(tryingCount -
                                                             1) + ".pdf"
            extractData = extractData.replace("\\n", " ")
            keywordFound = 0
            for index in range(0, len(orientation_keyword)):
                if orientation_keyword[index].lower() in extractData.lower():
                    keywordFound = 1
                    correctOri = 1
                    break
            if keywordFound == 0:
                degree = 90
                pageObj.rotateClockwise(degree)
                #print(pageObj.extractText())
                output = PdfFileWriter()
                output.addPage(pageObj)
                temp = orient_fileName.rsplit(".", 1)
                filename1 = temp[0] + "_rotated_" + str(tryingCount) + ".pdf"
                filename = os.path.join(Rotated_FolderPath, filename1)
                newFile = open(filename, 'wb')
                output.write(newFile)
                newFile.close()
                f.close()
                f = open(filename, "rb")
                fileHandles.append(f)
                pdf = PdfFileReader(f)
                pageObj = pdf.getPage(0)

                tryingCount = tryingCount + 1

            if (keywordFound == 1 or tryingCount == 6):
                finalfilename = ""
                if tryingCount == 1 or tryingCount == 6:
                    if not typeOfOutput == 1:
                        shutil.copy(imagePath, orientationFolderPath)
                        finalfilename = imagePath
                        classification_ExtractedData(extractData, imagePath,
                                                     imageName)
                    else:
                        shutil.copy(orient_FilePath, orientationFolderPath)
                        finalfilename = orient_FilePath
                        classification_ExtractedData(extractData,
                                                     orient_FilePath,
                                                     orientation_FileName)
                    f.close()
                    break
                else:

                    im.close()
                    if not typeOfOutput == 1:
                        shutil.copy(imagePath1, orientationFolderPath)
                        finalfilename = imagePath1
                        classification_ExtractedData(extractData, imagePath1,
                                                     imageName1)
                    else:

                        shutil.copy(filename, orientationFolderPath)
                        finalfilename = filename
                        classification_ExtractedData(extractData, filename,
                                                     filename1)
                    f.close()

                    break