total_num_pages = len(pages) bar_widgets = [ progressbar.Bar(), progressbar.Counter(format='%(value)i/%(max_value)i') ] bar = progressbar.ProgressBar(max_value=total_num_pages, widgets=bar_widgets, redirect_stdout=True) bar.start() with open(input_filepath, "rb") as fp: rsrcmgr = PDFResourceManager(caching=True) device = TextBoxStripper(rsrcmgr, outfp) interpreter = PDFPageInterpreter(rsrcmgr, device) for (page_num, page) in PDFPage.get_pages2(fp, pages, password="", caching=True, check_extractable=True, fallback=False): try: #print("===== Page {}".format(page_num)) # Text box processing: device.text_boxes = [] device.tables = [] interpreter.process_page(page) device.drop_empty_textboxes() device.merge_textboxes() # Table processing device.build_tables() ## For now, we don't care about the title of the page. ## With table contents we have all the information #for text_box in device.text_boxes: