def correctOrientationProc(orientation_FileName): global Rotated_FolderPath global imagePath global imagePath1 global imageName global imageName1 Rotated_FolderPath = "Rotated" if not os.path.exists(Rotated_FolderPath): os.makedirs(Rotated_FolderPath) global extractData tryingCount = 1 correctOri = 0 OrientationDegrees = -1 temp_filename = orientation_FileName.rsplit(".", 1) while (tryingCount <= 4): if correctOri == 0: if tryingCount == 1: ORIENTATION_FILENAME = orientation_FileName orient_FilePath = os.path.join(splitFolderPath, orientation_FileName) try: pdf = wi(filename=orient_FilePath, resolution=450) pdfImage = pdf.convert('jpeg') except Exception as e: time.sleep(15) pdf = wi(filename=orient_FilePath, resolution=450) pdfImage = pdf.convert('jpeg') imageBlobs = [] for img in pdfImage.sequence: imgPage = wi(image=img) try: imageBlobs.append(imgPage.make_blob('jpeg')) except Exception as e: print(e) var = [] for imgBlob in imageBlobs: im = Image.open(io.BytesIO(imgBlob)) text = pytesseract.image_to_string(im, lang='eng') var.append(text) if not typeOfOutput == 1: imageName = temp_filename[0] + desiredExtensionOutput imagePath = os.path.join( desiredOutputPath, temp_filename[0] + desiredExtensionOutput) im.save(imagePath) im.close() extractData = str(var) extractData = extractData.replace('\\n', ' ') extractData = extractData.lower() global f f = open(orient_FilePath, "rb") pdf = PdfFileReader(f) pageObj = pdf.getPage(0) pdf_writer = PdfFileWriter() OrientationDegrees = pageObj.get('/Rotate') temp_degree = 0 if OrientationDegrees == 0: temp_degree = 1 elif OrientationDegrees == 90: temp_degree = 1 elif OrientationDegrees == 180: temp_degree = 0 elif OrientationDegrees == 270: temp_degree = 0 else: temp_degree = 1 if temp_degree == 1: pdf_writer = PdfFileWriter() imageName1 = temp_filename[ 0] + "_rotated_1" + desiredExtensionOutput filename = temp_filename[0] + "_rotated_1.pdf" filename = os.path.join(Rotated_FolderPath, filename) RotatedForced90_name = filename pageObj.rotateClockwise(270) pdf_writer.addPage(pageObj) newFile = open(filename, 'wb') fileHandles.append(newFile) pdf_writer.write(newFile) newFile.close() #tryingCount=2 try: pdf = wi(filename=filename, resolution=450) pdfImage = pdf.convert('jpeg') except Exception as e: time.sleep(15) pdf = wi(filename=filename, resolution=450) pdfImage = pdf.convert('jpeg') imageBlobs = [] for img in pdfImage.sequence: imgPage = wi(image=img) try: imageBlobs.append(imgPage.make_blob('jpeg')) except Exception as e: print(e) var = [] for imgBlob in imageBlobs: im = Image.open(io.BytesIO(imgBlob)) text = pytesseract.image_to_string(im, lang='eng') var.append(text) if not typeOfOutput == 1: imagePath1 = os.path.join(desiredOutputPath, imageName1) im.save(imagePath1) im.close() extractData1 = str(var) extractData1 = extractData1.replace('\\n', ' ') extractData1 = extractData1.lower() mainCount = 0 rotatedCount = 0 #print(extractData) for index in range(0, len(orientation_keyword)): if orientation_keyword[index].lower( ) in extractData.lower(): mainCount = mainCount + 1 #print(mainCount) #print("---------------") #print(extractData1) for index in range(0, len(orientation_keyword)): if orientation_keyword[index].lower( ) in extractData1.lower(): rotatedCount = rotatedCount + 1 #print(rotatedCount) if rotatedCount >= mainCount: tryingCount = 2 extractData = extractData1 print("Forcefully rotating the page by 270 degrees") else: imageName1 = temp_filename[0] + "_rotated_" + str( tryingCount - 1) + desiredExtensionOutput filename = temp_filename[0] + "_rotated_" + str(tryingCount - 1) filename = filename + ".pdf" FILENAME = filename filename = os.path.join(Rotated_FolderPath, filename) try: pdf = wi(filename=filename, resolution=450) pdfImage = pdf.convert('jpeg') except: iserror = 1 noOfTries = 1 while (iserror == 1 and noOfTries <= 3): try: iserror = 0 time.sleep(5) pdf = wi(filename=filename, resolution=450) pdfImage = pdf.convert('jpeg') except: noOfTries = noOfTries + 1 iserror = 1 time.sleep(5) imageBlobs = [] for img in pdfImage.sequence: imgPage = wi(image=img) try: imageBlobs.append(imgPage.make_blob('jpeg')) except Exception as e: print(e) var = [] for imgBlob in imageBlobs: im = Image.open(io.BytesIO(imgBlob)) text = pytesseract.image_to_string(im, lang='eng') var.append(text) if not typeOfOutput == 1: imagePath1 = os.path.join(desiredOutputPath, imageName1) im.save(imagePath1) im.close() extractData = str(var) filename1 = temp_filename[0] + "_rotated_" + str(tryingCount - 1) + ".pdf" extractData = extractData.replace("\\n", " ") keywordFound = 0 for index in range(0, len(orientation_keyword)): if orientation_keyword[index].lower() in extractData.lower(): keywordFound = 1 correctOri = 1 break if keywordFound == 0: degree = 90 pageObj.rotateClockwise(degree) #print(pageObj.extractText()) output = PdfFileWriter() output.addPage(pageObj) temp = orient_fileName.rsplit(".", 1) filename1 = temp[0] + "_rotated_" + str(tryingCount) + ".pdf" filename = os.path.join(Rotated_FolderPath, filename1) newFile = open(filename, 'wb') output.write(newFile) newFile.close() f.close() f = open(filename, "rb") fileHandles.append(f) pdf = PdfFileReader(f) pageObj = pdf.getPage(0) tryingCount = tryingCount + 1 if (keywordFound == 1 or tryingCount == 6): finalfilename = "" if tryingCount == 1 or tryingCount == 6: if not typeOfOutput == 1: shutil.copy(imagePath, orientationFolderPath) finalfilename = imagePath classification_ExtractedData(extractData, imagePath, imageName) else: shutil.copy(orient_FilePath, orientationFolderPath) finalfilename = orient_FilePath classification_ExtractedData(extractData, orient_FilePath, orientation_FileName) f.close() break else: im.close() if not typeOfOutput == 1: shutil.copy(imagePath1, orientationFolderPath) finalfilename = imagePath1 classification_ExtractedData(extractData, imagePath1, imageName1) else: shutil.copy(filename, orientationFolderPath) finalfilename = filename classification_ExtractedData(extractData, filename, filename1) f.close() break