def main(argv): if len(argv) >= 4: num_vis = int(argv[2]) if len(argv) == num_vis*2 + 3: filename = argv[1] ini = [] fin = [] it = 3 for i in range(0, num_vis): ini.append(int(argv[it])) fin.append(int(argv[it+1])) it+=2 else: print ('usage:\n python pdf_2jpeg.py <pdf> <numero de visualizaçoes> <pagina inicial 1> <pagina final1> ... <pagina inicial N> <pagina final N>') return else: print ('usage:\n python pdf_2jpeg.py <pdf> <numero de visualizaçoes> <pagina inicial 1> <pagina final1> ... <pagina inicial N> <pagina final N>') return directory = filename.strip('.pdf') + '/images' pages = convert_from_path(filename, 500) if not os.path.exists(directory): os.makedirs(directory) for i in range(0, num_vis): current_dir = directory+'/vis-'+str(i) if not os.path.exists(current_dir): os.makedirs(current_dir) for j in range(ini[i], fin[i]): pages[j].save(current_dir+'/out'+(str(j).zfill(2))+'.jpg', 'JPEG') return
def pdf2png(self,pdfPath,out_file): """ PDFをpngに変換 出力ファイルは%d必須 """ if "%d" not in out_file : raise ValueError("複数ファイル対応のため、'%d'必須です") # 早速変換 images = convert_from_path(pdfPath) # 保存 for i,img in enumerate(images) : outpath = out_file % (i,) img.save(outpath, "png") # リサイズ # PILで読み込んで img = Image.open(outpath) # Doリサイズ size = int(A4[0]*1.6), int(A4[1]*1.6) # A4の倍にする img_resized = img.resize(size, Image.LANCZOS) img_resized.save(outpath)
import pytesseract from pdf2image import convert_from_path import glob pdfs = glob.glob(r"yourPath\*.pdf") for pdf_path in pdfs: pages = convert_from_path(pdf_path, 500) for pageNum,imgBlob in enumerate(pages): text = pytesseract.image_to_string(imgBlob,lang='eng') with open(f'{pdf_path[:-4]}_page{pageNum}.txt', 'w') as the_file: the_file.write(text)
def pdf_to_img(pdf_file): return pdf2image.convert_from_path(pdf_file)
def convertPDF(input_path, output_path, temp_path): ''' Converts a pdf file into a text file ''' file_name = input_path.stem PDF_file = str(input_path) ''' Converting PDF to images ''' # Store all the pages of the PDF in a variable pages = convert_from_path(PDF_file) # Counter to store images of each page of PDF to image image_counter = 1 # Iterate through all the pages stored above for page in pages: # Declaring filename for each page of PDF as JPG image_dir = temp_path imagefilename = image_dir+file_name+"_page_"+str(image_counter)+".jpg" # Save the image of the page in system page.save(imagefilename, 'JPEG') # Increment the counter to update filename image_counter = image_counter + 1 ''' Recognizing text from the images using OCR ''' # Variable to get count of total number of pages filelimit = image_counter-1 # Creating a text file to write the output output_dir = output_path outfile = Path(output_dir, file_name+".txt") # Open the file in append mode so that # All contents of all images are added to the same file f = open(str(outfile), "a") # Iterate from 1 to total number of pages for i in range(1, filelimit + 1): # Set filename to recognize text from imagefilename = image_dir+file_name+"_page_"+str(i)+".jpg" # Recognize the text as string in image using pytesserct text = str(((pytesseract.image_to_string(Image.open(imagefilename))))) # The recognized text is stored in variable text # Any string processing may be applied on text # Here, basic formatting has been done: # In many PDFs, at line ending, if a word can't # be written fully, a 'hyphen' is added. # The rest of the word is written in the next line text = text.replace('-\n', '') # Finally, write the processed text to the file. f.write(text) # Close the file after writing all the text. f.close()
def test_conversion_from_path(self): start_time = time.time() images_from_path = convert_from_path('./tests/test.pdf') self.assertTrue(len(images_from_path) == 1) print('test_conversion_from_path: {} sec'.format(time.time() - start_time))
def test_empty_if_file_not_found(self): start_time = time.time() with self.assertRaises(Exception): convert_from_path('./tests/totally_a_real_file_in_folder.xyz') print('test_empty_if_file_not_found: {} sec'.format(time.time() - start_time))
r"C:\Axis AI Challenge @ Akash_Abhishek\INPUT FILES") if each.endswith('.jpeg') ] results_tiff = [ each for each in os.listdir( r"C:\Axis AI Challenge @ Akash_Abhishek\INPUT FILES") if each.endswith('.tiff') ] results_tif = [ each for each in os.listdir( r"C:\Axis AI Challenge @ Akash_Abhishek\INPUT FILES") if each.endswith('.tif') ] pdf = [ convert_from_path(in_file, 500) for in_file in glob.glob( r"C:\Axis AI Challenge @ Akash_Abhishek\INPUT FILES\*.pdf") ] total_no_pdf = len(pdf) for i in range(0, len(pdf)): pages = pdf[i] name = results_pdf[i] name_list = os.path.splitext(name)[0] file_name.append(name_list) for j in range(0, len(pages)): page = pages[j] page.save( r"C:\Axis AI Challenge @ Akash_Abhishek\PROCESSED FILES 1\{}\PDF_{} page_{}.jpg" .format(i + 1, i + 1, j + 1))
pdf2convert = filename else: print('There are more than two pdf files in the directory') time.sleep(delay_b4_exit) exit() return pdf2convert spare_files = [os.path.basename(__file__)] pdf2use = get_pdf_to_use(dir2use) if pdf2use == None: print('There are no pdf files in the current directory') time.sleep(delay_b4_exit) exit() spare_files.append(pdf2use) query = input("All the files in {} will be deleted except {}. Type yes to proceed and any other key to exit\n".format(dir2use, spare_files)) if query != 'yes': exit() clean_dir(dir2use, spare_files=spare_files) print('converting {} to images '.format(pdf2use)) output_file_name = str(pdf2use).replace('.pdf', '') pdf2image.convert_from_path(os.path.join(dir2use, pdf2use), output_folder=dir2use, output_file=output_file_name, fmt='jpeg') print('conversion successfull') time.sleep(delay_b4_exit)
import tempfile from os.path import isfile, join from os import listdir import cv2 import pytesseract from pdf2image import convert_from_path FOLDER = 'samples/' ONLY_FILES = [f for f in listdir(FOLDER) if isfile(join(FOLDER, f))] for current_file in ONLY_FILES: full_file = FOLDER + current_file with tempfile.TemporaryDirectory() as path: images_from_path = convert_from_path(full_file, fmt='jpeg', output_folder=path) for image in images_from_path: cv_image = cv2.imread(image.filename) gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] text = pytesseract.image_to_string(gray) print(text)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=total_number_of_logos) # number of classes/logos, needs to be updated if another logo is added yolo.load_weights('./weights/yolov3-custom.tf').expect_partial() # file path to weights class_names = [c.strip() for c in open('./data/labels/custom.names').readlines()] # file path to classes list, needs to be updated if another logo is added if FLAGS.count: count = FLAGS.count excel = [] images = [] for i in range(count): con = convert_from_path('data/pdf/test (' + str(i+1) + ').pdf', output_folder='data/images', fmt="jpg", single_file=True, output_file='test (' + str(i+1) + ')') excel.append('data/excel/test (' + str(i+1) + ').xlsx') images.append('data/images/test (' + str(i+1) + ').jpg') raw_images = [] for image in images: img_raw = tf.image.decode_image( open(image, 'rb').read(), channels=3) raw_images.append(img_raw) i = 0 # index number for main loop logos = [] # list of detected logos for each image approvals = [] # list of excel data for each image for raw_img in raw_images: img = tf.expand_dims(raw_img, 0) img = transform_images(img, 416) # image size t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite('./detections/detection (' + str(i+1) + ').jpg', img) # image output # LABEL EXTRACTION temp_names = [] # temporary list for each image's logo detections for j in range(nums[0]): repeat = True temp_pair = [] # temporary list for each logo and its status if (j > 0): for k in range(len(temp_names)): if (class_names[int(classes[0][j])] == temp_names[k][0]): repeat = False break if (repeat): # if not a repeated logo, update main logo list temp_pair.append(class_names[int(classes[0][j])]) # append logo temp_pair.append(False) # append status temp_names.append(temp_pair) # append pair logos.append(temp_names) # append names list to main logo list # EXCEL EXTRACTION wb = load_workbook(excel[i]) sheet = wb.active rows = sheet.max_row temp_sheet = [] # temporary list for each image's excel data for j in range(rows-1): temp_rows = [] # temporary list for each row's excel data temp_rows.append(str(sheet.cell(row=j+2, column=4).value).upper().strip()) temp_rows.append(str(sheet.cell(row=j+2, column=5).value).upper().strip()) temp_rows.append("00FF0000") # Red by default temp_sheet.append(temp_rows) approvals.append(temp_sheet) # append sheet list to main approvals list # EXCEL TRANSLATION for j in range(len(approvals[i])): if (approvals[i][j][0] in extola): temp_trans = extola[approvals[i][j][0]] else: temp_trans = ["NAL"] # No Associated Logo approvals[i][j][0] = temp_trans # EXCEL COMPARED TO LABEL # "APPROVAL STATUS" "On label" "Not on label" # "APPROVED" "Green" "Red" # "NO REQUIREMENTS" "Red" "Green" # "APPROVAL NOT APPLICABLE" "Red" "Green" # "APPROVAL NOT REQUIRED" "Red" "Green" # "CONTACT CISCO PARTNER/IOR" "Red" "Green" # "NOT APPROVED" "Red" "Green" # "PENDING" "Red" "Green" # "RENEWAL IN PROGESS" "Red" "Green" # "NONE"/"UNKNOWN" "Red" "Red" # # "00FF0000" (Red) needs attention # "0000FF00" (Green) good to go # "000000FF" (Blue) missing logo # for j in range(len(approvals[i])): flag = True k = 0 temp_count = 0 while (flag): if (k == len(logos[i])): # logo not on label flag = False if (approvals[i][j][1] == "APPROVED"): approvals[i][j][2] = "00FF0000" # Red elif ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")or(approvals[i][j][1] == "NO REQUIREMENTS")): approvals[i][j][2] = "0000FF00" # Green elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")): approvals[i][j][2] = "00FF0000" # Red sheet.cell(row=j+2, column=5).value = "Unknown" elif (approvals[i][j][0][0] == "NAL"): # no logo to detect flag = False if ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")or(approvals[i][j][1] == "APPROVED")or(approvals[i][j][1] == "NO REQUIREMENTS")): approvals[i][j][2] = "0000FF00" # Green elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")): approvals[i][j][2] = "00FF0000" # Red sheet.cell(row=j+2, column=5).value = "Unknown" else: # continue or logo on label for X in range(len(approvals[i][j][0])): if (approvals[i][j][0][X] == logos[i][k][0]): # logo on label logos[i][k][1] = True temp_count+=1 if (temp_count == len(approvals[i][j][0])): flag = False if ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")): approvals[i][j][2] = "00FF0000" # Red elif ((temp_count == len(approvals[i][j][0]))and(approvals[i][j][1] == "APPROVED")or(approvals[i][j][1] == "NO REQUIREMENTS")): approvals[i][j][2] = "0000FF00" # Green elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")): approvals[i][j][2] = "00FF0000" # Red sheet.cell(row=j+2, column=5).value = "Unknown" k+=1 sheet.cell(row=j+2, column=5).fill = PatternFill(start_color=approvals[i][j][2], end_color=approvals[i][j][2], fill_type='solid') # LABEL COMPARED TO EXCEL new_row=1 for j in range(len(logos[i])): if (logos[i][j][1] == False): # not on excel so add it in a new row sheet.cell(row=new_row+rows, column=1).value = str(sheet.cell(row=rows, column=1).value) #1 Product Name sheet.cell(row=new_row+rows, column=3).value = str(sheet.cell(row=rows, column=3).value) #3 Desc sheet.cell(row=new_row+rows, column=4).value = logos[i][j][0] #4 Country sheet.cell(row=new_row+rows, column=5).value = "Unknown" #5 Approval Status sheet.cell(row=new_row+rows, column=5).fill = PatternFill(start_color="000000FF", end_color="000000FF", fill_type='solid') #5 Blue for k in range(5): sheet.cell(row=new_row+rows, column=k+6).value = str(sheet.cell(row=rows, column=k+6).value) #6-10 new_row+=1 wb.save(excel[i]) i+=1 # DISPLAY for j in range(i): print("\nL" + str(j+1) + ": ", end="") temp_print = [] for k in range(len(logos[j])): temp_print.append(logos[j][k][0]) print(temp_print, "\nE" + str(j+1) + ": ", end="") temp_print = [] for k in range(len(approvals[j])): temp_print.append(approvals[j][k][0]) print(temp_print) print("")
def add_timestamp_filename(filename): filename = filename.split(".") file = filename[0] file_ext = filename[1] now = datetime.now() timestamp = str(now.timestamp()) timestamp = timestamp.replace(".", "") filename = file + timestamp + "." + file_ext return filename # # pdf_file = "p75.pdf" # # files = [] # with(Image(filename=pdf_file, resolution=500)) as conn: # for index, image in enumerate(conn.sequence): # image_name = os.path.splitext(pdf_file)[0] + str(index + 1) + '.png' # Image(image).save(filename=image_name) # files.append(image_name) from pdf2image import convert_from_path images = convert_from_path("p75.pdf", 500, poppler_path=r'D:\poppler-0.68.0\bin') for i, image in enumerate(images): fname = 'image' + str(i) + '.png' image.save(fname, "PNG")
file)) and filename != '.DS_Store': i += 1 print("Processing " + filename + " " + str(i) + " of " + str(noFiles)) pdf_file = open(sys.argv[1] + '/' + filename, 'rb') read_pdf = PyPDF2.PdfFileReader(pdf_file) number_of_pages = read_pdf.getNumPages() print(number_of_pages) for x in range(number_of_pages): print("Processing Page" + str(x) + " of " + str(number_of_pages)) page = read_pdf.getPage(x) print(filename) basename = os.path.basename(sys.argv[1] + '/' + filename) new_name, _ = os.path.splitext(basename) d = sys.argv[1] + '/images/' # nn = d+new_name + "_pg_" + str(x) nn = d + new_name print(sys.argv[1] + "/" + filename) print(nn) images = convert_from_path(sys.argv[1] + "/" + filename) finalFilename = d + new_name + ".jpg" for image in images: image.save(finalFilename, 'JPEG') # text = textract.process(sys.argv[1]+"/"+filename, method='tesseract', language='eng') # # f= open(d+new_name + "_pg_" + str(x) + ".txt", "wb") # f= open(d+new_name + ".txt", "wb") # f.write(text) # f.close
import os from pdf2image import convert_from_path rootdir = '/home/cso/Downloads/customer_pdfs' for subdir, dirs, files in os.walk(rootdir): for file in files: full_path = os.path.join(subdir, file) if full_path.endswith('.pdf'): converted_name = os.path.join( subdir, '%s_conv.jpg' % (file.replace('.pdf', ''))) converted_name_2 = os.path.join( subdir, '%s_conv_2.jpg' % (file.replace('.pdf', ''))) images = convert_from_path(full_path, 500, output_folder=subdir, fmt='jpg', first_page=7, last_page=7) images2 = convert_from_path(full_path, 500, output_folder=subdir, fmt='jpg', first_page=9, last_page=9) os.rename(images[0].filename, converted_name) os.rename(images2[0].filename, converted_name_2)
lc_year = year_ago.strftime("%y") month = year_ago.strftime("%m") day = year_ago.strftime("%d") aux=['E','F','M','A','Y','J','L','G','S','O','N','D'] letter_month=aux[int(month)-1] except: sys.exit() #LaVanguardia try: url = f'http://hemeroteca-paginas.lavanguardia.com/LVE05/PUB/{year}/{month}/{day}/LVG{year}{month}{day}0011LB.pdf' wget.download(url, 'temporal/lavanguardia.pdf') pages = convert_from_path('temporal/lavanguardia.pdf', 500) time.sleep(3) for page in pages: page.save('temporal/lavanguardia.jpg', 'JPEG') time.sleep(3) except: os.system("cp fail.jpg temporal/lavanguardia.jpg") #ABC try: url = f'https://static.abc.es/media/hemeroteca/{year}/{month}/{day}/abc-madrid-{year}{month}{day}-1-t6w--620x846.jpg' wget.download(url, 'temporal/abc.jpg') time.sleep(3) except: os.system("cp fail.jpg temporal/abc.jpg")
def ocr(PDF_file, outfile, output): f = open(outfile, "a") pages = convert_from_path(PDF_file, 500) for page in pages: filename = "page.png" page.save(filename, 'JPEG') cv_img = cv2.imread(filename, 0) blur = cv2.GaussianBlur(cv_img, (5, 5), 0) ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) cv2.imwrite('denoised.png', th3) text = str(((pytesseract.image_to_string(Image.open('denoised.png'))))) text = text.replace('-\n', '') f.write(text) f.close() with open(outfile, "r") as f: searchlines = f.readlines() searchlines = list(map(lambda s: s.strip(), searchlines)) for i in searchlines: if i == '': searchlines.remove(i) applicant_details = searchlines[searchlines.index( "2. Applicant Details"):searchlines.index("2. Applicant Details") + 8] applicant_name = applicant_details[1:4][0].split( 'Title')[1] + applicant_details[1:4][1].split( 'First name')[1] + applicant_details[1:4][2].split('Surname')[1] applicant_name = applicant_name.strip() applicant_company_name = applicant_details[4].split('Company name')[1] applicant_company_name = applicant_company_name.strip() applicant_address = applicant_details[5].split( 'Address line 1 ')[1] + applicant_details[6].split('Address line 2')[ 1] + applicant_details[7].split('Address line 3')[1] applicant_address = applicant_address.strip() subs = 'Description of proposed materials and finishes' res = [i for i in searchlines if subs in i] l1 = [i.split(': ') for i in res] l2 = [] for i in l1: l2.append(i[1]) for i in searchlines: if i == '': searchlines.remove(i) l3 = [] if l2 == []: agent_details = searchlines[searchlines.index("3. Agent Details"): searchlines.index("3. Agent Details") + 11] agent_name = agent_details[1:4][0].split( 'Title')[1] + agent_details[1:4][1].split( 'First name')[1] + agent_details[1:4][2].split('Surname')[1] agent_name = agent_name.strip() agent_company_name = agent_details[7] agent_company_name = agent_company_name.strip() agent_address = agent_details[8] + ',' + agent_details[ 9] + ',' + agent_details[10].split('Address line 3')[1] agent_address = agent_address.strip() l3.append( searchlines[searchlines.index("Walls - Materials"):searchlines. index("Walls - Materials") + 8]) l4 = [] for i in range(len(l3[0])): if i % 2 == 1: l4.append(l3[0][i]) f = open(output, "w") f.write("1. Applicant details \n" + "Name : " + applicant_name + '\n' + "Company : " + applicant_company_name + '\n' + "Address : " + applicant_address + '\n' + '\n' + "2. Agent details\n" + "Name : " + agent_name + '\n' + "Company : " + agent_company_name + '\n' + "Address : " + agent_address + '\n' + '\n' + "3.Materials : " + l4[0] + "," + l4[1] + "," + l4[2] + "," + l4[3]) f.close() else: agent_details = searchlines[searchlines.index("3. Agent Details"): searchlines.index("3. Agent Details") + 8] agent_name = agent_details[1:4][0].split( 'Title')[1] + agent_details[1:4][1].split( 'First name')[1] + agent_details[1:4][2].split('Surname')[1] agent_name = agent_name.strip() agent_company_name = agent_details[4].split('Company name')[1] agent_company_name = agent_company_name.strip() agent_address = agent_details[5].split( 'Address line 1')[1] + ',' + agent_details[6].split( 'Address line 2')[1] + ',' + agent_details[7].split( 'Address line 3')[1] agent_address = agent_address.strip() f = open(output, "w") f.write("1. Applicant details \n" + "Name : " + applicant_name + '\n' + "Company : " + applicant_company_name + '\n' + "Address : " + applicant_address + '\n' + '\n' + "2. Agent details\n" + "Name : " + agent_name + '\n' + "Company : " + agent_company_name + '\n' + "Address : " + agent_address + '\n' + '\n' + "3.Materials : " + l2[0] + "," + l2[1] + "," + l2[2] + "," + l2[3]) f.close()
# coding: utf-8 # In[1]: from pdf2image import convert_from_path pages = convert_from_path('pdf1.pdf', 500) # In[2]: for page in pages: page.save('out.jpg', 'JPEG') # In[3]: from PIL import Image import pytesseract #import tesseract # In[4]: text = pytesseract.image_to_string(Image.open('out.jpg')) # In[5]: text # In[ ]:
import os from pdf2image import convert_from_path import PIL #4896*6336 pdf_dir = r"C:\Users\randy\Downloads\the_way_to_train" save_dir = r'C:\Users\randy\Downloads\the_way_to_train\PDF2IMG' os.chdir(pdf_dir) for pdf_file in os.listdir(pdf_dir): if pdf_file.endswith(".pdf"): pages = convert_from_path(pdf_file, dpi=96) pdf_file = pdf_file[:-4] for page in pages: print("%s/%s-page%d.png" % (save_dir,pdf_file,pages.index(page))) # page.save("%s/%s-page%d.png" % (save_dir,pdf_file,pages.index(page)), "png") if page == [-1]: print(page) ''' if __name__ == "__main__": pdf2img(pdf_dir) '''
def test_conversion_from_path_241(self): # pragma: no cover start_time = time.time() images_from_path = convert_from_path('./tests/test_241.pdf') self.assertTrue(len(images_from_path) == 241) print('test_conversion_from_path_241: {} sec'.format( (time.time() - start_time) / 241.))
def pdf2img(self, pdf_path): return pdf2image.convert_from_path(pdf_path)
def test_empty_if_corrupted_pdf(self): start_time = time.time() with self.assertRaises(Exception): convert_from_path('./tests/test_corrupted.pdf') print('test_empty_if_corrupted_pdf: {} sec'.format(time.time() - start_time))
def process_score_image_request(): if request.method == "POST": file = request.files['file'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # create config cfg = get_cfg() # below path applies to current installation location of Detectron2 cfgFile = "DLA_mask_rcnn_X_101_32x8d_FPN_3x.yaml" cfg.merge_from_file(cfgFile) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.MODEL.WEIGHTS = "model_final_trimmed.pth" cfg.MODEL.DEVICE = "cpu" # we use a CPU Detectron copy # boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()[0] classes = ['text', 'title', 'list', 'table', 'figure'] default_predictor = detectron2.engine.defaults.DefaultPredictor( cfg) pages = convert_from_path("/uploads/" + filename, dpi=200, fmt='jpeg') for idx, p in enumerate(pages): print(idx) # pages im = np.array(p)[:, :, ::-1] predictions = default_predictor(im) instances = predictions["instances"].to('cpu') MetadataCatalog.get(cfg.DATASETS.TEST[0]).thing_classes = [ 'text', 'title', 'list', 'table', 'figure' ] pred_classes = instances.pred_classes labels = [classes[i] for i in pred_classes] label_count = [{i: labels.count(i)} for i in labels] label_count = [ dict(y) for y in set( tuple(x.items()) for x in label_count) ] label_count = [{ k: [v, []] } for label in label_count for k, v in label.items()] print(label_count) page_label_count = {f"page {idx}": label_count} print(page_label_count) # print(label_count) def add_content(content): for i in label_count: for k, v in i.items(): if k == label: v[1].append(content) return True boxes = instances.pred_boxes if isinstance(boxes, detectron2.structures.boxes.Boxes): boxes = boxes.tensor.numpy() else: boxes = np.asarray(boxes) from PIL import Image import math table = [] list_ = [] text = [] title = [] # content = [table] for label, bbox in zip(labels, boxes): # getting prediction bboxes from model outputs x2 = math.ceil(bbox[0]) x1 = math.ceil(bbox[1]) y2 = math.ceil(bbox[2]) y1 = math.ceil(bbox[3]) crop_img = im[x1:y1, x2:y2] if len(crop_img) <= 8: continue if label == "table": print(label) # add_content(img_(crop_img[ : , : , -1])) elif label == "list": add_content(extract_from_images(crop_img)) elif label == "title": add_content(extract_from_images(crop_img)) elif label != "figure": add_content(extract_from_images(crop_img)) # print(page_label_count) #print(page_label_count) for k, v in page_label_count.items(): # sendToNeo4j("MERGE (d:Document)-[:Page]->(p: Page {page_num: $k})", k=k) for i in v: for l, m in i.items(): # print(m) if l == 'figure': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Figure_count {figure: $m}]->(f:Figure {figure: 'figure'})", m=m[0], page=k) if l == 'text': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Paragraph_count {text: $m}]->(pa:Paragraph {text: $text})", m=m[0], page=k, text=m[1]) if l == 'title': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Title_count {title: $m}]->(t:Title {title: $title})", m=m[0], page=k, title=m[1]) if l == 'table': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Table_count {table: $m}]->(ta:Table {table: $table})", m=m[0], page=k, table=m[1]) if l == 'form': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Form_count {form: $m}]->(fo:Form {form: $form})", m=m[0], page=k, form=m[1]) # sendToNeo4j('MERGE(p:Page{page:$page_label_count.keys()[0]', keys=page_label_count.keys()[0]) return render_template('index.html')
def post(self): ############ Get Input Params ################ data = request.get_json() file_url = data['file_url'] file_type = data['file_type'].lower() ### Check If PDF and Image Directory Exists ### if not (os.path.exists(DownloadDirectory)): os.mkdir(DownloadDirectory) if not (os.path.exists(ExtractedImageDirectory)): os.mkdir(ExtractedImageDirectory) ############# Download File ################## file_type = file_type.lower() random_integer = str(random.randint(100000, 999999)) random_letters = ''.join(random.choice(letters) for i in range(5)) random_combination = random_integer + random_letters FileName = "File_" + random_combination DownloadFilePath = DownloadDirectory + FileName ExtractedImageFileName = ExtractedImageDirectory + FileName ########### Check File Type #################### if file_type == "png": FileName = FileName + ".png" elif file_type == "pdf": FileName = FileName + ".pdf" else: return { 'msg': 'Error', 'description': 'Unsupported file extension.' } ########## Download File ################### try: response = requests.get(str(file_url)) except: return { 'msg': 'Error', 'description': 'Unable to download file. Please check the file url again.' } ############# Write downloaded file to local ################## try: with open(DownloadFilePath, 'wb') as f: f.write(response.content) except: return { 'msg': 'Error', 'description': 'Unable to save downloaded file.' } ############## Save Image from Downloaded File ################### ImageList = [] if file_type == "png": ExtractedImageFilePath = ExtractedImageFileName + ".png" try: im = Image.open(DownloadFilePath) except: os.remove(DownloadFilePath) return { 'msg': 'Error', 'description': 'Unable to open downloaded file.' } try: im.save(ExtractedImageFilePath) ImageList.append(ExtractedImageFilePath) os.remove(DownloadFilePath) except: os.remove(DownloadFilePath) return { 'msg': 'Error', 'description': 'Unable to save image file.' } elif file_type == "pdf": try: print("--------Inside-------------------") images = convert_from_path(DownloadFilePath, dpi=500) print(len(images)) for pagenum, image in enumerate(images): image = image.convert('LA') ExtractedImageFilePath = ExtractedImageFileName + "_Page" + str( pagenum + 1) + ".png" image.save(ExtractedImageFilePath) ImageList.append(ExtractedImageFilePath) os.remove(DownloadFilePath) except: os.remove(DownloadFilePath) return { 'msg': 'Error', 'description': 'Unable to convert pdf to image.' } else: return {'msg': 'Error', 'description': 'Unsupported File Format.'} ##### Check and compress File Size ####### for image in ImageList: while os.stat(image).st_size > 9437184: im = Image.open(image) im.save(image, optimize=True, quality=80) ########### Check For Templates ########### FirstImage = ImageList[0] try: with io.open(FirstImage, 'rb') as gen_image_file: content = gen_image_file.read() except: for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to read extracted image for template detection.' } try: client = vision.ImageAnnotatorClient() #image = vision.types.Image(content=content) #image = vision.Image(content=content) image = vision.Image(content=content) response = client.text_detection(image=image) except Exception as e: print(e) for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to invoke Google vision api.' } ############ Create Dict for Vision API response ########### DictResponse = MessageToDict(response._pb) WholeContentDescription = DictResponse['textAnnotations'][0][ 'description'].lower() ################# Match Template ############################ TemplateName = "" for templatename, keywords in Templates.items(): matchfound = True for keyword in keywords: if keyword not in WholeContentDescription: matchfound = False if matchfound: TemplateName = templatename break if TemplateName == "": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to find a matching Template.' } ############## Yang Ming Template ############################# if TemplateName == "YangMingTemplate": try: response = ExtractDataForYongMingTemplate(DictResponse) if response == "missing keywords": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Google Vision API unable to find all the mandatory keywords for Yang Ming Invoice.' } else: for image in ImageList: os.remove(image) return response except: for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unknown issue occured. Please connect with system administrator with the input file.' } ############## Maersk Template ############################# if TemplateName == "MaerskTemplate": try: response = ProcessMaerskInvoice(ImageList) if response == "invocation error": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to Invoke Google Vision API' } elif response == "missing keywords": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Google Vision API unable to find all the mandatory keywords for Maersk Invoice.' } elif response == "unable to extract data from Google Vision API": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to extract data from Google Vision API.' } else: for image in ImageList: os.remove(image) return response except: print(e) for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unknown issue occured. Please connect with system administrator with the input file.' } ############## Evergreen Template ############################# if TemplateName == "EvergreenTemplate": try: response = ProcessEvergreenInvoice(ImageList) if response == "invocation error": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to Invoke Google Vision API' } elif response == "missing keywords": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Google Vision API unable to find all the mandatory keywords for Evergreen Invoice.' } elif response == "unable to extract data from Google Vision API": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to extract data from Google Vision API.' } else: for image in ImageList: os.remove(image) return response except: for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unknown issue occured. Please connect with system administrator with the input file.' } ################## OOCL Template ############################## if TemplateName == "OOCLTemplate": try: response = ProcessOOCLInvoice(ImageList) if response == "invocation error": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to Invoke Google Vision API' } elif response == "missing keywords": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Google Vision API unable to find all the mandatory keywords for Evergreen Invoice.' } elif response == "unable to extract data from Google Vision API": for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unable to extract data from Google Vision API.' } else: for image in ImageList: os.remove(image) return response except Exception as e: print(e) #raise e for image in ImageList: os.remove(image) return { 'msg': 'Error', 'description': 'Unknown issue occured. Please connect with system administrator with the input file.' }
def pipeline(directory): # Function that performs a set of operations stopping # at the type CHECKPOINT global G images = os.listdir(directory) graphs = [] for i in images: print("[*] Parsing image file {}".format(i)) # Open image depending on image format full_path = os.path.join(directory, i) if i.split('.')[1].lower() == "pdf": img = convert_from_path(full_path)[0].convert('RGB') else: img = Image.open(full_path).convert('RGB') # Trim border img = np.array(img) border_image = trim_meta(img) # Grab ECU mappings and the line colors in image print("\t|---> [*] Extracting Text") ecu_map = extract_text(border_image) # Build relationships based off lines lines_img = line_filter(border_image, color='lines') lines_img = cv2.cvtColor(lines_img, cv2.COLOR_GRAY2RGB) print("\t|---> [*] Building Relationships") graph = build_relationships(lines_img, ecu_map.copy()) graphs.append(graph.copy()) G.clear() # Concatenate page continuation lines print("[*] Composing continued page graphs") F = None for i in range(0, len(graphs) - 1): print(5 * " " + "|---> Page {} <--> Page {}".format(i, i + 1)) g, h = (F, graphs[i + 1]) if F != None else (graphs[i], graphs[i + 1]) out_nodes = [n for n in g.nodes if "OUT-" in n] next_in_nodes = [n for n in h.nodes if "IN-" in n] # next_in_nodes = next_in_nodes[:-1] if len(out_nodes) != len(next_in_nodes): print("") print( "[-] Length of OUT[{}] and IN[{}] nodes does not match".format( len(out_nodes), len(next_in_nodes))) print(" returning all graphs") return graphs new_names = [ "CONNECT-JUNCT-{}-{}".format(i, n) for n in range(0, len(next_in_nodes)) ] mapping_out = dict(list(zip(out_nodes, new_names))) mapping_in = dict(list(zip(next_in_nodes, new_names))) G_ = nx.relabel_nodes(g, mapping_out) H_ = nx.relabel_nodes(h, mapping_in) F = nx.compose(G_, H_) return F
import os import tempfile from pdf2image import convert_from_path import cv2 files = filter(os.path.isfile, os.listdir(os.curdir)) for singlefile in files: if_pdf = singlefile.split('.') if if_pdf[1] == 'pdf': try: images_from_path = convert_from_path(singlefile, last_page=1, first_page=0) base_filename = if_pdf[0] + '.jpg' save_dir = os.getcwd() images_from_path[0].save(os.path.join(save_dir, base_filename), 'JPEG') img = cv2.imread(base_filename) dim = (299, 299) resizeImg = cv2.resize(img, dim) cv2.imwrite(base_filename, resizeImg) #os.remove(singlefile) except: print(singlefile) print("Done!")
def parseCheckYN(page): # set path setpath = '/Users/natewagner/Documents/Surveys/batch1/' path = setpath + page # Convert to png and to greyscale / rotate images = convert_from_path(path) images_bw = images[0].convert('L') images_bw = images_bw.transpose(Image.ROTATE_270) # set path to pytesseract pytesseract.pytesseract.tesseract_cmd = r'/usr/local/Cellar/tesseract/4.1.1/bin/tesseract' # extract account number accnt_num_dims = (1000+155, 150-140, 1600-200, 75) accnt_num = images_bw.crop(accnt_num_dims) accnt_number = pytesseract.image_to_string(accnt_num) # extract company name comp_info_dims = (465+250, 1100, 1250+400, 1145) comp_info = images_bw.crop(comp_info_dims) bus_name = pytesseract.image_to_string(comp_info) # question 2 question2_dims = (150-40, 783+145, 290+10, 923+195) question2 = images_bw.crop(question2_dims) Q2p = [] for pixel in iter(question2.getdata()): Q2p.append(pixel) q2df = pd.DataFrame(Q2p).transpose() q2df['question'] = 'question2' q2df['accnt_num'] = accnt_number q2df['bus_name'] = bus_name #q2df.head(20) # question 3 question3_dims = (135-30, 860+80, 275+20, 1000+130) question3 = images_bw.crop(question3_dims) Q3p = [] for pixel in iter(question3.getdata()): Q3p.append(pixel) q3df = pd.DataFrame(Q3p).transpose() q3df['question'] = 'question3' q3df['accnt_num'] = accnt_number q3df['bus_name'] = bus_name # question 4 question4_dims = (115, 1070, 305, 1070+190) question4 = images_bw.crop(question4_dims) Q4p = [] for pixel in iter(question4.getdata()): Q4p.append(pixel) q4df = pd.DataFrame(Q4p).transpose() q4df['question'] = 'question4' q4df['accnt_num'] = accnt_number q4df['bus_name'] = bus_name # question 8 question8_dims = (1060+300, 928-250, 1250+300, 1118-250) question8 = images_bw.crop(question8_dims) Q8p = [] for pixel in iter(question8.getdata()): Q8p.append(pixel) q8df = pd.DataFrame(Q8p).transpose() q8df['question'] = 'question8' q8df['accnt_num'] = accnt_number q8df['bus_name'] = bus_name # question 9 question9_dims = (1060+112, 928-180, 1250+112, 1118-180) question9 = images_bw.crop(question9_dims) Q9p = [] for pixel in iter(question9.getdata()): Q9p.append(pixel) q9df = pd.DataFrame(Q9p).transpose() q9df['question'] = 'question9' q9df['accnt_num'] = accnt_number q9df['bus_name'] = bus_name check_YN_data = pd.concat([q2df, q3df, q4df, q8df, q9df]) check_YN_data['survey'] = str(page) return(check_YN_data)
def __init__(self, pdf_path): self.images = convert_from_path(pdf_path)
# Import libraries from PIL import Image import pytesseract import sys from pdf2image import convert_from_path import os # Path of the pdf PDF_file = "/home/naresh/Tesseract/Languagefiles/Hindi/law_commisionofindia/law_commisionofindia.pdf" ''' Part #1 : Converting PDF to images ''' # Store all the pages of the PDF in a variable pages = convert_from_path(PDF_file, 500) # Counter to store images of each page of PDF to image image_counter = 1 for page in pages: filename = "/home/naresh/Tesseract/PDF_Images/Hindi/" + "law_commisionofindia" + str( image_counter) + ".jpg" # Save the image of the page in system page.save(filename, 'JPEG') # Increment the counter to update filename image_counter = image_counter + 1 '''
if r > 120 and r < 254 and g > 120 and g < 254 and b > 120 and b < 254: return True else: return False # Handling of images for removing the watermark def handle(imgs): for i in range(imgs.shape[0]): for j in range(imgs.shape[1]): if select_pixel(imgs[i][j][0], imgs[i][j][1], imgs[i][j][2]): imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 255 return imgs images = convert_from_path(pdfFile) try: os.mkdir(dirname + '\img') except FileExistsError: print('Folder exist') index = 0 for img in images: index += 1 img = np.array(img) print(img.shape) img = handle(img) io.imsave(dirname + '\img\img' + str(index) + '.jpg', img) print(index) # Merging images to a sigle PDF
from pdf2image import convert_from_path import sys, os os.makedirs("pyout", exist_ok=True) convert_from_path(sys.argv[1], output_folder="pyout", fmt="jpeg")
def resume_generate_action(request): context = {} print("Entering generate_action") if not os.path.isdir(PDF_DIR): os.mkdir(PDF_DIR) if not os.path.isdir(JPG_DIR): os.mkdir(JPG_DIR) # create html file html_filename = request.user.username + ".html" create_html(request, html_filename) # html to pdf pdf_filename = PDF_DIR + request.user.username + ".pdf" # path_wkthmltopdf = r'C:\Users\dell\Anaconda3\Lib\site-packages\wkhtmltopdf\bin\wkhtmltopdf.exe' # config = pdfkit.configuration(wkhtmltopdf=path_wkthmltopdf) # pdfkit.from_file(html_filename, pdf_filename, configuration=config) pdfkit.from_file(html_filename, pdf_filename) # pdf to image jpg_filename = JPG_DIR + request.user.username + ".jpg" pages = convert_from_path(pdf_filename, 500) page = pages[0] page.save(jpg_filename, 'JPEG') position_list = [] for position_text in request.POST.getlist('positions[]'): position = Position.objects.get(text=position_text) position_list.append(position) location_list = [] for location_text in request.POST.getlist('locations[]'): location = Location.objects.get(text=location_text) location_list.append(location) # Check whether bio already exists employee = Employee.objects.get(user=request.user) bio_list = Employee.objects.get(user=request.user).bio_list new_bio = None for bio in bio_list.all(): if bio.education_level != request.POST['education_level']: print("education_level") continue if bio.job_type != request.POST['job_type']: print("job_type") continue if list(bio.positions.all()) != position_list: print(bio.positions.all()) print(position_list) print("positions") continue if list(bio.locations.all()) != location_list: print("locations") continue new_bio = bio break print(request.POST) if new_bio == None: print("Generating new bio") # If bio does not exist, create a new bio new_bio = Bio(user=request.user, education_level=request.POST['education_level'], job_type=request.POST['job_type']) new_bio.save() for position_text in request.POST.getlist('positions[]'): position = Position.objects.get(text=position_text) new_bio.positions.add(position) for location_text in request.POST.getlist('locations[]'): location = Location.objects.get(text=location_text) new_bio.locations.add(location) new_bio.save() else: print("bio already exist") # If bio already exists, set the previous master resume as slave for resume in new_bio.resume_list.all(): if resume.master == True: resume.master = False resume.save() new_bio.save() # Construct new resume # save pdf into resume new_resume = Resume(user=request.user, master=True) f = open(pdf_filename, 'rb') pdf_file = File(f) new_resume.file.save(pdf_filename, pdf_file) f.close() # save image into resume f = open(jpg_filename, 'rb') jpg_file = File(f) new_resume.picture.save(jpg_filename, jpg_file) f.close() # delete local files os.remove(html_filename) os.remove(pdf_filename) os.remove(jpg_filename) new_resume.save() new_bio.resume_list.add(new_resume) new_bio.save() if new_bio not in employee.bio_list.all(): employee.bio_list.add(new_bio) employee.save() length = len(employee.bio_list.all()) context = { 'user': request.user, 'bio_list': employee.bio_list.all(), 'len': length } return render(request, 'eyee_management.html', context)
def evalvideo(net:Yolact, path:str): # If the path is a digit, parse it as a webcam index is_webcam = path.isdigit() if is_webcam: vid = cv2.VideoCapture(int(path)) else: vid = cv2.VideoCapture(path) if not vid.isOpened(): print('Could not open video "%s"' % path) exit(-1) vid.set(cv2.CAP_PROP_FRAME_HEIGHT, args.video_res[1]) vid.set(cv2.CAP_PROP_FRAME_WIDTH, args.video_res[0]) vid.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('H', '2', '6', '4')) W = vid.get(cv2.CAP_PROP_FRAME_WIDTH) H = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) print('capture video size: w{} h{}'.format(W, H)) net = CustomDataParallel(net).cuda() transform = torch.nn.DataParallel(FastBaseTransform()).cuda() frame_times = MovingAverage(100) fps = 0 # The 0.8 is to account for the overhead of time.sleep frame_time_target = 1 / vid.get(cv2.CAP_PROP_FPS) running = True slide_num =0 bg_imgs = [] mask_alpha = args.mask_alpha bgvid = None if args.mask_bg_path is not None: if Path(args.mask_bg_path).suffix == '.pdf': pdfimages = pdf2image.convert_from_path(args.mask_bg_path) for pdfimage in pdfimages: cvimage = np.asarray(pdfimage) cvimage = cv2.cvtColor(cvimage, cv2.COLOR_RGB2BGR) bg_img = cv2.resize(cvimage, (int(W), int(H))) bg_img_gpu = torch.from_numpy( bg_img / 255.0 ).cuda().float() bg_imgs.append(bg_img_gpu) elif Path(args.mask_bg_path).suffix == '.mp4': bg_imgs = [] bgvid = cv2.VideoCapture(args.mask_bg_path) BG_W = bgvid.get(cv2.CAP_PROP_FRAME_WIDTH) BG_H = bgvid.get(cv2.CAP_PROP_FRAME_HEIGHT) print('background capture video size: w{} h{}'.format(BG_W, BG_H)) else: bg_img = cv2.resize(cv2.imread(args.mask_bg_path), (int(W), int(H))) bg_img_gpu = torch.from_numpy( bg_img / 255.0 ).cuda().float() bg_imgs.append(bg_img_gpu) def cleanup_and_exit(): print() pool.terminate() vid.release() cv2.destroyAllWindows() exit() def get_next_frame(vid): return [vid.read()[1] for _ in range(args.video_multiframe)] def rescale(frame): if args.rescale < 1.0: h, w = frame.shape[:2] new_img = np.zeros((h, w, 3), np.uint8) rescale_frame = cv2.resize(frame, dsize=(int(w * args.rescale), int(h * args.rescale))) new_img[h - int(h * args.rescale):h, 0:int(w * args.rescale)] = rescale_frame return new_img else: return frame def transform_frame(frames): with torch.no_grad(): frames = [ torch.from_numpy( rescale(frame) ).cuda().float() for frame in frames] return frames, transform(torch.stack(frames, 0)) def eval_network(inp): with torch.no_grad(): frames, imgs = inp return frames, net(imgs) def prep_frame(inp): nonlocal slide_num, bg_imgs, mask_alpha, bgvid with torch.no_grad(): frame, preds = inp return prep_display(preds, frame, None, None, undo_transform=False, class_color=True, mask_alpha=mask_alpha, bg_imgs=bg_imgs, slide_num=slide_num, bgvid=bgvid) frame_buffer = Queue() video_fps = 0 # All this timing code to make sure that def play_video(): nonlocal frame_buffer, running, video_fps, is_webcam, slide_num video_frame_times = MovingAverage(100) frame_time_stabilizer = frame_time_target last_time = None stabilizer_step = 0.0005 while running: frame_time_start = time.time() if not frame_buffer.empty(): next_time = time.time() if last_time is not None: video_frame_times.add(next_time - last_time) video_fps = 1 / video_frame_times.get_avg() cv2.imshow(path, frame_buffer.get()) last_time = next_time key_press = cv2.waitKey(1) & 0xff if key_press == 27: # Press Escape to close running = False elif key_press == ord('n'): if slide_num < len(bg_imgs) - 1: slide_num = slide_num + 1 elif key_press == ord('b'): if 0 < slide_num: slide_num = slide_num - 1 buffer_size = frame_buffer.qsize() if buffer_size < args.video_multiframe: frame_time_stabilizer += stabilizer_step elif buffer_size > args.video_multiframe: frame_time_stabilizer -= stabilizer_step if frame_time_stabilizer < 0: frame_time_stabilizer = 0 new_target = frame_time_stabilizer if is_webcam else max(frame_time_stabilizer, frame_time_target) next_frame_target = max(2 * new_target - video_frame_times.get_avg(), 0) target_time = frame_time_start + next_frame_target - 0.001 # Let's just subtract a millisecond to be safe # This gives more accurate timing than if sleeping the whole amount at once while time.time() < target_time: time.sleep(0.001) extract_frame = lambda x, i: (x[0][i] if x[1][i] is None else x[0][i].to(x[1][i]['box'].device), [x[1][i]]) # Prime the network on the first frame because I do some thread unsafe things otherwise print('Initializing model... ', end='') eval_network(transform_frame(get_next_frame(vid))) print('Done.') # For each frame the sequence of functions it needs to go through to be processed (in reversed order) sequence = [prep_frame, eval_network, transform_frame] pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2) pool.apply_async(play_video) active_frames = [] print() while vid.isOpened() and running: start_time = time.time() # Start loading the next frames from the disk next_frames = pool.apply_async(get_next_frame, args=(vid,)) # For each frame in our active processing queue, dispatch a job # for that frame using the current function in the sequence for frame in active_frames: frame['value'] = pool.apply_async(sequence[frame['idx']], args=(frame['value'],)) # For each frame whose job was the last in the sequence (i.e. for all final outputs) for frame in active_frames: if frame['idx'] == 0: frame_buffer.put(frame['value'].get()) # Remove the finished frames from the processing queue active_frames = [x for x in active_frames if x['idx'] > 0] # Finish evaluating every frame in the processing queue and advanced their position in the sequence for frame in list(reversed(active_frames)): frame['value'] = frame['value'].get() frame['idx'] -= 1 if frame['idx'] == 0: # Split this up into individual threads for prep_frame since it doesn't support batch size active_frames += [{'value': extract_frame(frame['value'], i), 'idx': 0} for i in range(1, args.video_multiframe)] frame['value'] = extract_frame(frame['value'], 0) # Finish loading in the next frames and add them to the processing queue active_frames.append({'value': next_frames.get(), 'idx': len(sequence)-1}) # Compute FPS frame_times.add(time.time() - start_time) fps = args.video_multiframe / frame_times.get_avg() print('\rProcessing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d ' % (fps, video_fps, frame_buffer.qsize()), end='') cleanup_and_exit()