def test_PDFWriter(test_output_dir): pw = pdf_write.PDFWriter() font = pdf.PDFSimpleDict() font["Type"] = "Font" font["Subtype"] = "Type1" font["Name"] = "F1" font["BaseFont"] = "Helvetica" font["Encoding"] = "MaxRomanEncoding" font = pw.add_pdf_object(font.to_dict()) resources = pdf.PDFSimpleDict() proc_set = pw.add_pdf_object(pdf_write.ProcedureSet().object()) resources["ProcSet"] = proc_set resources["Font"] = pdf.PDFDictionary([(pdf.PDFName("F1"), font)]) resources = resources.to_dict() data = b"BT\n/F1 24 Tf\n100 100 Td\n(Hello World) Tj\nET" contents = pdf.PDFStream( [(pdf.PDFName("Length"), pdf.PDFNumeric(len(data)))], data) contents = pw.add_pdf_object(contents) page = pdf_write.Page(pdf_write.Rectangle(0, 0, 612, 792), resources, contents) pw.add_page(page) with open(os.path.join(test_output_dir, "text.pdf"), "wb") as f: f.write(bytes(pw))
def test_ParseName(): p = pdf_parser.ParseName() assert p.parse(b"asa") is None assert p.parse(b"/Matt asa") == (pdf.PDFName("Matt"), 5) assert p.parse(b"/Matt") == (pdf.PDFName("Matt"), 5) assert p.parse(b"/Matt#20 asa") == (pdf.PDFName("Matt "), 8) assert p.parse(b"/Matt/Bob") == (pdf.PDFName("Matt"), 5)
def test_DocumentCatalog(): pages = pdf.PDFObjectId(5, 2) root = pdf_write.DocumentCatalog(pages) dic = root.object().data assert set(bytes(x) for x in dic) == {b"/Type", b"/Pages"} assert dic[pdf.PDFName("Type")] == pdf.PDFName("Catalog") assert bytes(dic[pdf.PDFName("Pages")]) == b"5 2 R"
def test_ArrayConsumer(): p = pdf_parser.ParseArray() c = p.consumer() assert isinstance(c, pdf_parser.ArrayConsumer) c.consume(pdf.PDFName("Matt")) assert c.end(b"123.2") is None assert c.end(b"]ahsgas") == 1 assert c.build() == pdf.PDFArray([pdf.PDFName("Matt")])
def test_PDFArray(): x = pdf.PDFArray() with pytest.raises(IndexError): x[0] assert repr(x) == "PDFArray([])" assert bytes(x) == b"[]" assert x == pdf.PDFArray() x = pdf.PDFArray([pdf.PDFName("Matt"), pdf.PDFNumeric(12.2)]) assert repr(x) == "PDFArray([PDFName(b'Matt'), PDFNumeric(12.2)])" assert bytes(x) == b"[/Matt 12.2]" assert x == pdf.PDFArray([pdf.PDFName("Matt"), pdf.PDFNumeric(12.2)])
def test_DictionaryConsumer(): p = pdf_parser.ParseDictionary() c = p.consumer() assert isinstance(c, pdf_parser.DictionaryConsumer) assert c.end(b"ajsdga") is None assert c.end(b">>ahsga") == 2 c.consume(pdf.PDFName("Mat")) c.consume(pdf.PDFObjectId(12, 0)) d = c.build() assert bytes(d[pdf.PDFName("Mat")]) == b"12 0 R"
def test_PDFSimpleDict(): d = pdf.PDFSimpleDict() d["Filter"] = "FlateDecode" d["BitsPerComponent"] = 8 d["Interpolate"] = True d["matt"] = 1.23 dd = d.to_dict() assert dd[pdf.PDFName("Filter")] == pdf.PDFName("FlateDecode") assert dd[pdf.PDFName("BitsPerComponent")] == pdf.PDFNumeric(8) assert dd[pdf.PDFName("Interpolate")] == pdf.PDFBoolean(True) assert dd[pdf.PDFName("matt")] == pdf.PDFNumeric(1.23)
def test_PDFStream(): x = pdf.PDFStream([(pdf.PDFName("Length"), pdf.PDFNumeric(10))], b"abcdfgiqsp") assert x.stream_contents == b"abcdfgiqsp" assert repr( x ) == "PDFDictionary({PDFName(b'Length'): PDFNumeric(10)}, stream length=10)" assert bytes(x) == b"<</Length 10>>\nstream\nabcdfgiqsp\nendstream" y = pdf.PDFStream([(pdf.PDFName("Length"), pdf.PDFNumeric(10))], b"abcdfgiqsp") assert x == y
def test_PDFDictionary(): x = pdf.PDFDictionary() with pytest.raises(KeyError): x[pdf.PDFName("Mat")] assert repr(x) == "PDFDictionary({})" assert bytes(x) == b"<<>>" x = pdf.PDFDictionary([(pdf.PDFName("Bob"), pdf.PDFNumeric(12))]) assert x[pdf.PDFName("Bob")] == pdf.PDFNumeric(12) assert repr(x) == "PDFDictionary({PDFName(b'Bob'): PDFNumeric(12)})" assert bytes(x) == b"<</Bob 12>>" x[pdf.PDFName("Bob")] = pdf.PDFNumeric(1) assert bytes(x) == b"<</Bob 1>>"
def test_PageTree(page1): pt = pdf_write.PageTree() pt.add_page(page1) pt_obj = pt.object() dic = pt_obj.data assert set(bytes(x) for x in dic) == {b"/Type", b"/Kids", b"/Count"} assert dic[pdf.PDFName("Type")] == pdf.PDFName("Pages") assert dic[pdf.PDFName("Kids")][0].number == None assert dic[pdf.PDFName("Count")] == pdf.PDFNumeric(1) assert page1.parent is pt_obj page1.object().number = 5 assert dic[pdf.PDFName("Kids")][0].number == 5
def test_read_full_object(pdf1): p = pdf_read.PDF(pdf1) loc = p.object_lookup[pdf_read.PDFObjectId(5, 0)] obj = p.full_object_at(loc) assert repr(obj) == "PDFNumeric(33)" loc = p.object_lookup[pdf_read.PDFObjectId(4, 0)] obj = p.full_object_at(loc) assert obj[pdf.PDFName("Length")] == pdf.PDFNumeric(33) assert obj.stream_contents.startswith(b"q\n1637")
def test_PDFName(): x = pdf.PDFName(b"Bob") assert x.name == b"Bob" assert repr(x) == "PDFName(b'Bob')" assert x == pdf.PDFName(b"Bob") assert x == pdf.PDFName("Bob") assert bytes(x) == b"/Bob" with pytest.raises(ValueError): x = pdf.PDFName("ahdga\x00") bytes(x) x = pdf.PDFName(b"Bob\n T\xee") assert bytes(x) == b"/Bob#0A#20T#EE" assert len({pdf.PDFName("Masa"), pdf.PDFName("Masa")})
def test_Page(page1): page = page1 page.parent = pdf.PDFObjectId(2, 5) dic = page.object().data assert set(bytes(x) for x in dic) == { b"/Type", b"/Parent", b"/Resources", b"/MediaBox", b"/Contents" } assert dic[pdf.PDFName("Type")] == pdf.PDFName("Page") assert bytes(dic[pdf.PDFName("Parent")]) == b"2 5 R" assert bytes(dic[pdf.PDFName("Resources")]) == b"<</Matt 7>>" assert bytes(dic[pdf.PDFName("MediaBox")]) == b"[0 1 20 50]" assert bytes(dic[pdf.PDFName("Contents")]) == b"3 2 R"
def test_PDFParser_1(): f = io.BytesIO( b"\n<</Linearized 1/L 10171355/O 1489/E 14578/N 480/T 10169091/H [ 470 1215]>>\n" ) p = pdf_parser.PDFParser(f) got = list(p) assert len(got) == 1 got = got[0] assert got[pdf.PDFName("Linearized")] == pdf.PDFNumeric(1) assert got[pdf.PDFName("L")] == pdf.PDFNumeric(10171355) assert got[pdf.PDFName("O")] == pdf.PDFNumeric(1489) assert got[pdf.PDFName("E")] == pdf.PDFNumeric(14578) assert got[pdf.PDFName("N")] == pdf.PDFNumeric(480) assert got[pdf.PDFName("T")] == pdf.PDFNumeric(10169091) assert got[pdf.PDFName("H")] == pdf.PDFArray( [pdf.PDFNumeric(470), pdf.PDFNumeric(1215)]) keys = [] for x in got: assert isinstance(x, pdf.PDFName) keys.append(x.name) assert set(keys) == {b"Linearized", b"L", b"O", b"E", b"N", b"T", b"H"}
def test_trailer(pdf1): p = pdf_read.PDF(pdf1) assert p.trailer[pdf.PDFName("Size")] == pdf.PDFNumeric(18) assert pdf.PDFName("Info") in p.trailer assert pdf.PDFName("Root") in p.trailer
def test_read_object_1(pdf1): p = pdf_read.PDF(pdf1) objs = p.object_at(10) assert len(objs) == 1 assert objs[0][pdf.PDFName("Pages")] == pdf.PDFObjectId(2, 0) assert objs[0][pdf.PDFName("Type")] == pdf.PDFName("Catalog")
def test_ImageDictionary(): im = pdf_write.ImageDictionary(width=1024, height=768, colour_space=pdf_write.ColourSpaceRGB(), bits=8) im.add_filtered_data("FlateDecode", b"1234", {"Predictor": 5}) im.add_dictionary_entry("matt", 7) obj = im.object() dic = obj.data assert set(bytes(x) for x in dic) == { b"/Subtype", b"/Filter", b"/Width", b"/Height", b"/ColorSpace", b"/BitsPerComponent", b"/Length", b"/Interpolate", b"/DecodeParms", b"/matt" } assert dic[pdf.PDFName("Subtype")] == pdf.PDFName("Image") assert dic[pdf.PDFName("Filter")] == pdf.PDFName("FlateDecode") assert dic[pdf.PDFName("Width")] == pdf.PDFNumeric(1024) assert dic[pdf.PDFName("Height")] == pdf.PDFNumeric(768) assert dic[pdf.PDFName("ColorSpace")] == pdf.PDFName("DeviceRGB") assert dic[pdf.PDFName("BitsPerComponent")] == pdf.PDFNumeric(8) assert dic[pdf.PDFName("Length")] == pdf.PDFNumeric(4) assert dic[pdf.PDFName("Interpolate")] == pdf.PDFBoolean(True) assert dic[pdf.PDFName("matt")] == pdf.PDFNumeric(7) assert bytes(dic[pdf.PDFName("DecodeParms")]) == b"<</Predictor 5>>" assert obj.data.stream_contents == b"1234"
def test_InfoObject(): io = pdf_write.InfoObject("My Title") dic = io.object().data assert set(bytes(x) for x in dic) == {b"/Title", b"/CreationDate"} assert dic[pdf.PDFName("Title")] == pdf.PDFString("My Title")
def page1(): mediabox = pdf_write.Rectangle(0, 1, 20, 50) resources = pdf.PDFDictionary([(pdf.PDFName("Matt"), pdf.PDFNumeric(7))]) contents = pdf.PDFObjectId(3, 2) return pdf_write.Page(mediabox, resources, contents)