def test_add_underline_annotation(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # attempt to read PDF doc = None with open(self.input_file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_underline_annotation( stroke_color=HexColor("CBEF43"), rectangle=Rectangle(Decimal(72.86), Decimal(486.82), Decimal(129), Decimal(13)), ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_add_rubber_stamp_annotation(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # attempt to read PDF doc = None with open(self.input_file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_stamp_annotation( name=RubberStampAnnotationIconType.CONFIDENTIAL, contents="Approved by Joris Schellekens", color=X11Color("Red"), rectangle=Rectangle(Decimal(128), Decimal(56), Decimal(132), Decimal(58)), ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) ul = OrderedList() ul.add(Paragraph(text="Lorem Ipsum Dolor Sit Amet Consectetur Nunc")) ul.add(Paragraph(text="Ipsum")) ul.add(Paragraph(text="Dolor")) ul.add(Paragraph(text="Sit")) ul.add(Paragraph(text="Amet")) layout = SingleColumnLayout(page) layout.add(ul) # determine output location out_file = self.output_dir / ("output.pdf") # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_document(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # read document doc = None with open(self.input_file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) # append document doc.append_embedded_file( "the_raven.txt", b"Once upon a midnight dreary, while I pondered weak and weary over many a quaint and curious volume of forgotten lore.", ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) embedded_files = doc.get_embedded_files() assert len(embedded_files) == 1 assert "the_raven.txt" in embedded_files assert b"Once upon a midnight" in embedded_files["the_raven.txt"]
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_stamp_annotation( name="Confidential", contents="Approved by Joris Schellekens", color=X11Color("White"), rectangle=(Decimal(128), Decimal(128), Decimal(32), Decimal(64)), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_watermark_annotation( contents="pText", rectangle=Rectangle(Decimal(128), Decimal(128), Decimal(64), Decimal(64)), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle)
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_circle_annotation( rectangle=(Decimal(128), Decimal(128), Decimal(64), Decimal(64)), interior_color=X11Color("Plum"), color=X11Color("Crimson"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) ChunkOfText( "é", font_size=Decimal(24), ).layout( page, Rectangle(Decimal(100), Decimal(600), Decimal(100), Decimal(100))) # determine output location out_file = self.output_dir / "output.pdf" # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_polygon_annotation( points=LineArtFactory.sticky_note( Rectangle(Decimal(128), Decimal(128), Decimal(64), Decimal(64))), stroke_color=X11Color("PowderBlue"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_text_annotation( contents="The quick brown fox ate the lazy mouse", rectangle=Rectangle(Decimal(128), Decimal(128), Decimal(64), Decimal(64)), name_of_icon="Key", open=True, color=X11Color("Orange"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_add_circle_annotation(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # attempt to read PDF doc = None with open(self.input_file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_circle_annotation( rectangle=Rectangle(Decimal(128), Decimal(128), Decimal(64), Decimal(64)), stroke_color=X11Color("Plum"), fill_color=X11Color("Crimson"), ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF delta = time.time() doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) print("time elapsed : %d" % (time.time() - delta)) delta = time.time() # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) print("time elapsed : %d" % (time.time() - delta)) delta = time.time() # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) print("time elapsed : %d" % (time.time() - delta)) delta = time.time() return True
def test_add_link_annotation(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # attempt to read PDF doc = None with open(self.input_file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_link_annotation( page=Decimal(0), destination_type=DestinationType.FIT, color=X11Color("Red"), rectangle=Rectangle(Decimal(128), Decimal(128), Decimal(64), Decimal(64)), ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) number_of_pages = int(doc.get_document_info().get_number_of_pages()) if number_of_pages == 1: return # remove page doc.pop_page(0) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_redact_annotation( overlay_text="Lorem Ipsum", repeat_overlay_text=True, fill_color=X11Color("AliceBlue"), rectangle=Rectangle(Decimal(72.86), Decimal(486.82), Decimal(129), Decimal(13)), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc_a = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc_a = PDF.loads(in_file_handle) # attempt to read PDF with open(self.input_file_b, "rb") as in_file_handle_b: print("\treading (2) ..") doc_b = PDF.loads(in_file_handle_b) # concat all pages to same document doc_c = Document() doc_c.append_document(doc_a) doc_c.append_document(doc_b) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twrite ..") PDF.dumps(out_file_handle, doc_c) return True
def test_add_polygon_annotation(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # attempt to read PDF doc = None with open(self.input_file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_polygon_annotation( points=[ (Decimal(72), Decimal(390)), (Decimal(242), Decimal(500)), (Decimal(156), Decimal(390)), ], stroke_color=X11Color("Crimson"), ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_add_polyline_annotation_using_lineart_factory(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # attempt to read PDF doc = None with open(self.input_file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_polyline_annotation( points=LineArtFactory.droplet( Rectangle(Decimal(100), Decimal(100), Decimal(100), Decimal(100))), stroke_color=X11Color("Crimson"), ) # attempt to store PDF with open(self.output_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(self.output_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # determine free space space_finder = FreeSpaceFinder(doc.get_page(0)) # debug purposes if self.in_debug: for i in range(0, len(space_finder.grid)): for j in range(0, len(space_finder.grid[i])): if space_finder.grid[i][j]: continue w = Decimal(space_finder.grid_resolution) x = Decimal(i) * w y = Decimal(j) * w doc.get_page(0).append_square_annotation( Rectangle(x, y, w, w), stroke_color=X11Color("Salmon")) # add annotation w, h = doc.get_page(0).get_page_info().get_size() free_rect = space_finder.find_free_space( Rectangle( Decimal(w / Decimal(2)), Decimal(h * Decimal(2)), Decimal(64), Decimal(64), )) if free_rect is not None: doc.get_page(0).append_square_annotation( rectangle=free_rect, stroke_color=HexColor("#F75C03"), fill_color=HexColor("#04A777"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) # title layout.add( Paragraph( "Lissajours Line Art", font_size=Decimal(20), font_color=X11Color("Blue"), )) # table N = 7 fill_colors = [ HSVColor(Decimal(x / N), Decimal(1), Decimal(1)) for x in range(0, N) ] stroke_colors = [HSVColor.darker(x) for x in fill_colors] fixed_bb = Rectangle(Decimal(0), Decimal(0), Decimal(100), Decimal(100)) t = Table(number_of_rows=N, number_of_columns=N) for i in range(0, N): for j in range(0, N): t.add( Shape( LineArtFactory.lissajours(fixed_bb, i + 1, j + 1), fill_color=fill_colors[(i + j) % N], stroke_color=stroke_colors[(i + j) % N], line_width=Decimal(2), )) t.set_padding_on_all_cells(Decimal(10), Decimal(10), Decimal(10), Decimal(10)) layout.add(t) # determine output location out_file = self.output_dir / ("output.pdf") # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # get text txt_ground_truth_file = self.input_dir / (file.stem + ".txt") txt_ground_truth = "" with open(txt_ground_truth_file, "r") as txt_ground_truth_file_handle: txt_ground_truth = txt_ground_truth_file_handle.read() words = [x for x in re.split("[^a-zA-Z]+", txt_ground_truth) if len(x) > 5] w = words[5] if len(words) > 5 else None if w is None: return True # determine output location out_file = self.output_dir / (file.stem + "_" + w + "_out.pdf") # attempt to read PDF doc = None l = RegularExpressionTextExtraction(w) with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle, [l]) # add annotation print( "\tAdding %d annotations" % len(l.get_matched_text_render_info_events_per_page(0)) ) for e in l.get_matched_text_render_info_events_per_page(0): baseline = e.get_baseline() doc.get_page(0).append_square_annotation( rectangle=Rectangle( Decimal(baseline.x0), Decimal(baseline.y0 - 2), Decimal(baseline.x1 - baseline.x0), Decimal(12), ), stroke_color=X11Color("Firebrick"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) # write title layout.add( Paragraph("Nonogram", font_size=Decimal(20), font_color=X11Color("YellowGreen"))) # write text layout.add( Paragraph( """ Nonograms, also known as Paint by Numbers, Picross, Griddlers, Pic-a-Pix, and various other names, are picture logic puzzles in which cells in a grid must be colored or left blank according to numbers at the side of the grid to reveal a hidden picture. In this puzzle type, the numbers are a form of discrete tomography that measures how many unbroken lines of filled-in squares there are in any given row or column. For example, a clue of "4 8 3" would mean there are sets of four, eight, and three filled squares, in that order, with at least one blank square between successive sets. """, font_color=X11Color("SlateGray"), font_size=Decimal(8), )) # write nonogram ng = Nonogram( # "https://i.pinimg.com/originals/f8/23/88/f823882e7c5fa42790e78f43ecf7e8bf.jpg" "https://cdn.shopify.com/s/files/1/2123/8425/products/166422700-LRG_242a4c8b-cad5-476e-afd1-c8b882d48fc2_530x.jpg" ) layout.add(ng) # determine output location out_file = self.output_dir / ("output.pdf") # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation for index, name in enumerate([ "Approved", "Experimental", "NotApproved", "Asis", "Expired", "NotForPublicRelease", "Confidential", "Final", "Sold", "Departmental", "ForComment", "TopSecret", "Draft", "ForPublicRelease", ]): doc.get_page(0).append_stamp_annotation( name=name, contents="Approved by Joris Schellekens", color=X11Color("White"), rectangle=Rectangle(Decimal(128), Decimal(128 + index * 34), Decimal(64), Decimal(32)), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") doc = None with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) # remove first annotation if "Annots" in doc.get_page(0): annots = doc.get_page(0)["Annots"] annots[0]["P"] = None doc.get_page(0)["Annots"] = annots[1:0] pass # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = RegularExpressionTextExtraction("[sS]orbitol") doc = PDF.loads(pdf_file_handle, [l]) # export matches output_file = self.output_dir / (file.stem + ".json") with open(output_file, "w") as json_file_handle: obj = [ { "text": x.get_text(), "x0": int(x.get_baseline().x0), "y0": int(x.get_baseline().y0), "x1": int(x.get_baseline().x1), "y1": int(x.get_baseline().y1), } for x in l.get_matched_text_render_info_events_per_page(0) ] json_file_handle.write(json.dumps(obj, indent=4)) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() with open(file, "rb") as pdf_file_handle: l = ColorSpectrumExtraction() doc = PDF.loads(pdf_file_handle, [l]) colors = [] for t in l.get_colors_per_page(0, limit=16): colors.append( { "red": float(t[0].red), "green": float(t[0].green), "blue": float(t[0].blue), "count": int(t[1]), } ) # write output with open(self.output_file, "w") as json_file_handle: json_file_handle.write(json.dumps(colors)) return True
def test_document(self, file) -> bool: doc = None with open(file, "rb") as pdf_file_handle: doc = None with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) if "XRef" not in doc: return False if "Trailer" not in doc["XRef"]: return False if "Info" not in doc["XRef"]["Trailer"]: doc["XRef"]["Trailer"][Name("Info")] = Dictionary() # change producer doc["XRef"]["Trailer"]["Info"]["Producer"] = String("pText") # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") with open(out_file, "wb") as pdf_file_handle: PDF.dumps(out_file, doc) return True
def test_document(self, file): with open(file, "rb") as pdf_file_handle: l = FontExtraction() doc = PDF.loads(pdf_file_handle, [l]) for fn in l.get_font_names_per_page(0): print(fn) return True
def test_document(self, file) -> bool: # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() doc = None with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) if "XRef" not in doc: return False if "Trailer" not in doc["XRef"]: return False if "Info" not in doc["XRef"]["Trailer"]: doc["XRef"]["Trailer"][Name("Info")] = Dictionary() # change author doc["XRef"]["Trailer"]["Info"]["Author"] = String("Joris Schellekens") # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") with open(out_file, "wb") as pdf_file_handle: PDF.dumps(pdf_file_handle, doc) return True
def test_extract_all_text(self): l = SimpleTextExtraction() with open( Path("/home/joris/Code/pdf-corpus/0600.pdf"), "rb" ) as pdf_file_handle: doc = PDF.loads(pdf_file_handle, [l]) print(l.get_text(0))