def test_rect_from_invalid(): with pytest.raises(TypeError): Rectangle('foo') with pytest.raises(TypeError): Rectangle(Name.Foo) with pytest.raises(TypeError): Rectangle(Array([1, 2])) with pytest.raises(TypeError): Rectangle(Array(['one', 'two', 'three', 'four']))
def test_repr_dict(self): d = Dictionary({ '/Boolean': True, '/Integer': 42, '/Real': Decimal('42.42'), '/String': String('hi'), '/Array': Array([1, 2, 3.14]), '/Operator': Operator('q'), '/Dictionary': Dictionary({'/Color': 'Red'}) }) expected = """\ pikepdf.Dictionary({ "/Array": [ 1, 2, Decimal('3.140000') ], "/Boolean": True, "/Dictionary": { "/Color": "Red" }, "/Integer": 42, "/Operator": pikepdf.Operator("q"), "/Real": Decimal('42.42'), "/String": "hi" }) """ def strip_all_whitespace(s): return ''.join(s.split()) assert strip_all_whitespace(repr(d)) == strip_all_whitespace(expected) assert eval(repr(d)) == d
def test_repr_dict(self): d = Dictionary({ '/Boolean': True, '/Integer': 42, '/Real': Decimal('42.42'), '/String': String('hi'), '/Array': Array([1, 2, 3.14]), '/Operator': Operator('q'), '/Dictionary': Dictionary({'/Color': 'Red'}), '/None': None, }) if LooseVersion(pikepdf.__libqpdf_version__) >= LooseVersion('10.2.0'): short_pi = '3.14' else: short_pi = '3.140000' expected = ("""\ pikepdf.Dictionary({ "/Array": [ 1, 2, Decimal('%s') ], "/Boolean": True, "/Dictionary": { "/Color": "Red" }, "/Integer": 42, "/None": None, "/Operator": pikepdf.Operator("q"), "/Real": Decimal('42.42'), "/String": "hi" }) """ % short_pi) def strip_all_whitespace(s): return ''.join(s.split()) assert strip_all_whitespace(repr(d)) == strip_all_whitespace(expected) assert eval(repr(d)) == d
def test_page_labels(): p = Pdf.new() d = Dictionary(Type=Name.Page, MediaBox=[0, 0, 612, 792], Resources=Dictionary()) for n in range(5): p.pages.append(d) p.pages[n].Contents = Stream(p, b"BT (Page %s) Tj ET" % str(n).encode()) p.Root.PageLabels = p.make_indirect( Dictionary( Nums=Array( [ 0, # new label rules begin at index 0 Dictionary(S=Name.r), # use lowercase roman numerals, until... 2, # new label rules begin at index 2 Dictionary( S=Name.D, St=42, P='Prefix-' ), # label pages as 'Prefix-42', 'Prefix-43', ... ] ) ) ) labels = ['i', 'ii', 'Prefix-42', 'Prefix-43', 'Prefix-44'] for n in range(5): rawpage = p.pages[n] page = Page(rawpage) assert page.label == labels[n]
def make_page_destination( pdf: Pdf, page_num: int, page_location: Optional[Union[PageLocation, str]] = None, **kwargs, ) -> Array: """ Creates a destination ``Array`` with reference to a Pdf document's page number. Arguments: pdf: PDF document object. page_num: Page number (zero-based). page_location: Optional page location, as a string or :enum:`PageLocation`. kwargs: Optional keyword arguments for the page location, e.g. ``top``. """ res = [pdf.pages[page_num]] if page_location: if isinstance(page_location, PageLocation): loc_key = page_location loc_str = loc_key.name else: loc_str = page_location try: loc_key = PageLocation[loc_str] except KeyError: raise ValueError( f"Invalid or unsupported page location type {loc_str}") res.append(Name(f'/{loc_str}')) dest_arg_names = PAGE_LOCATION_ARGS.get(loc_key) if dest_arg_names: res.extend(kwargs.get(k, 0) for k in dest_arg_names) else: res.append(Name.Fit) return Array(res)
def _make_page_destination( pdf: Pdf, page_num: int, page_location: Optional[Union[PageLocation, str]] = None, **kwargs, ) -> Array: kwargs = {k: v for k, v in kwargs.items() if v is not None} res = [pdf.pages[page_num]] if page_location: if isinstance(page_location, PageLocation): loc_key = page_location loc_str = loc_key.name else: loc_str = page_location try: loc_key = PageLocation[loc_str] except KeyError: raise ValueError( f"Invalid or unsupported page location type {loc_str}" ) from None res.append(Name(f'/{loc_str}')) dest_arg_names = PAGE_LOCATION_ARGS.get(loc_key) if dest_arg_names: res.extend(kwargs.get(k, 0) for k in dest_arg_names) else: res.append(Name.Fit) return Array(res)
def test_nametree_crud(outline): nt = NameTree(outline.Root.Names.Dests) assert nt.obj == outline.Root.Names.Dests assert '0' in nt assert isinstance(nt['0'], Object) assert 'foo' not in nt assert '3' in nt del nt['3'] assert '3' not in nt nt['3'] = Dictionary(Entry=3) assert nt['3'].Entry == 3 nt['newentry'] = Array([42]) assert nt['newentry'] == Array([42]) nt['py_newentry'] = 42
def test_dict_or_array_dict(): pdf = pikepdf.new() imobj = Stream( pdf, b'dummy', BitsPerComponent=1, ColorSpace=Name.DeviceGray, DecodeParms=Array([Dictionary( BlackIs1=False, Columns=16, K=-1, )]), Filter=Array([Name.CCITTFaxDecode]), Height=16, Width=16, Type=Name.XObject, Subtype=Name.Image, ) pim = pikepdf.PdfImage(imobj) assert pim.decode_parms[ 0].K == -1 # Check that array of dict is unpacked properly
def test_copy(): d = Dictionary({ '/Boolean': True, '/Integer': 42, '/Real': Decimal('42.42'), '/String': String('hi'), '/Array': Array([1, 2, 3.14]), '/Dictionary': Dictionary({'/Color': 'Red'}), }) d2 = copy(d) assert d2 == d assert d2 is not d assert d2['/Dictionary'] == d['/Dictionary']
def test_page_contents_add(graph, outdir): pdf = graph mat = PdfMatrix().rotated(45) stream1 = Stream(pdf, b'q ' + mat.encode() + b' cm') stream2 = Stream(pdf, b'Q') pdf.pages[0].page_contents_add(stream1, True) pdf.pages[0].page_contents_add(stream2, False) pdf.save(outdir / 'out.pdf') with pytest.raises(TypeError, match="Not a Page"): Array([42]).page_contents_add(stream1)
def test_palette_nonrgb(base, hival, palette, expect_type): pdf = pikepdf.new() imobj = Stream( pdf, b'\x00\x01\x02\x03' * 4, BitsPerComponent=8, ColorSpace=Array([Name.Indexed, base, hival, palette]), Width=16, Height=1, Type=Name.XObject, Subtype=Name.Image, ) pim = pikepdf.PdfImage(imobj) assert pim.palette == (expect_type, palette)
def test_page_boxes(graph_page): page = graph_page assert page.mediabox == page.cropbox == page.trimbox page.cropbox = [0, 0, page.mediabox[2] - 100, page.mediabox[3] - 100] page.mediabox = [ page.mediabox[0] - 50, page.mediabox[1] - 50, page.mediabox[2] + 50, page.mediabox[3] + 50, ] page.trimbox = [50, 50, page.mediabox[2] - 50, page.mediabox[3] - 50] assert page.mediabox != page.cropbox assert page.cropbox != page.mediabox page.cropbox = Array([0, 0, 50, 50])
def set_pagelabels(doc, page_labels): arr = [] for label in page_labels: pn = label['start'] - 1 # page index 1-based -> 0-based d = {} if 'style' in label and label['style'] != 'none': d['/S'] = Name('/' + label['style']) if 'prefix' in label: d['/P'] = label['prefix'] if 'initial_count' in label: d['/St'] = label['initial_count'] obj = Dictionary(d) arr.append(pn) arr.append(obj) obj = Dictionary({'/Nums': Array(arr)}) doc.root[Name.PageLabels] = obj
def test_palette_nonrgb(base, hival, bits, palette, expect_type, expect_mode): pdf = pikepdf.new() imobj = Stream( pdf, b'\x00\x01\x02\x03' * 16, BitsPerComponent=bits, ColorSpace=Array([Name.Indexed, base, hival, palette]), Width=16, Height=4, Type=Name.XObject, Subtype=Name.Image, ) pim = pikepdf.PdfImage(imobj) assert pim.palette == (expect_type, palette) pim.extract_to(stream=BytesIO()) # To view images: # pim.extract_to(fileprefix=f'palette_nonrgb_{expect_type}_{bits}') assert pim.mode == expect_mode
def test_json(): d = Dictionary({ '/Boolean': True, '/Integer': 42, '/Real': Decimal('42.42'), '/String': String('hi'), '/Array': Array([1, 2, 3.14]), '/Dictionary': Dictionary({'/Color': 'Red'}), }) json_bytes = d.to_json(False) as_dict = json.loads(json_bytes) assert as_dict == { "/Array": [1, 2, 3.14], "/Boolean": True, "/Dictionary": { "/Color": "Red" }, "/Integer": 42, "/Real": 42.42, "/String": "hi", }
def test_rect_properties(): r = Rectangle(1, 2, 101, 302) assert r.llx == 1.0 assert r.lly == 2.0 assert r.urx == 101.0 assert r.ury == 302.0 assert r.width == 100.0 assert r.height == 300.0 r.llx *= 2 r.lly *= 2 r.urx *= 2 r.ury *= 2 assert r.lower_left == (r.llx, r.lly) assert r.lower_right == (r.urx, r.lly) assert r.upper_right == (r.urx, r.ury) assert r.upper_left == (r.llx, r.ury) assert r.as_array() == Array( [Decimal(coord) for coord in [2, 4, 202, 604]]) evaled_r = eval(repr(r), dict(pikepdf=pikepdf)) # pylint: disable=eval-used assert evaled_r == r assert hash(evaled_r) == hash(r)
def _remove_simple_filters(obj, filters): """Remove simple lossless compression where it appears. Args: obj (pikepdf.Stream): the compressed object filters (list of str): all files on the data """ COMPLEX_FILTERS = { '/DCTDecode', '/JPXDecode', '/JBIG2Decode', '/CCITTFaxDecode', } idx = [n for n, item in enumerate(filters) if item in COMPLEX_FILTERS] if idx: if len(idx) > 1: raise NotImplementedError( f"Object {obj.objgen} has compound complex filters: {filters}. " "We cannot decompress this." ) simple_filters = filters[: idx[0]] complex_filters = filters[idx[0] :] else: simple_filters = filters complex_filters = [] if not simple_filters: return obj.read_raw_bytes(), complex_filters original_filters = obj.Filter try: obj.Filter = Array([Name(s) for s in simple_filters]) data = obj.read_bytes(StreamDecodeLevel.specialized) finally: obj.Filter = original_filters return data, complex_filters
def test_json(): d = Dictionary({ '/Boolean': True, '/Integer': 42, '/Real': Decimal('42.42'), '/String': String('hi'), '/Array': Array([1, 2, 3.14]), '/Dictionary': Dictionary({'/Color': 'Red'}), }) json_bytes = d.to_json(False) try: as_dict = json.loads(json_bytes) except TypeError: as_dict = json.loads(json_bytes.decode('utf-8')) # Py3.5 shim assert as_dict == { "/Array": [1, 2, 3.140000], "/Boolean": True, "/Dictionary": { "/Color": "Red" }, "/Integer": 42, "/Real": 42.42, "/String": "hi", }
def test_array_not_hashable(self): with pytest.raises(TypeError): objs = {Array([3]): None}
assert d2 is not d assert d2['/Dictionary'] == d['/Dictionary'] def test_object_iteration(sandwich): expected = len(sandwich.objects) loops = 0 for obj in sandwich.objects: loops += 1 if isinstance(obj, Dictionary): assert len(obj.keys()) >= 1 assert expected == loops @pytest.mark.parametrize( 'obj', [Array([1]), Dictionary({'/A': 'b'}), Operator('q'), String('s')] ) def test_object_isinstance(obj): assert isinstance(obj, (Array, Dictionary, Operator, String, Stream)) assert isinstance(obj, type(obj)) assert isinstance(obj, Object) def test_stream_isinstance(): pdf = pikepdf.new() stream = Stream(pdf, b'xyz') assert isinstance(stream, Stream) assert isinstance(stream, Object) def test_object_classes():
def test_pages_wrong_type(fourpages): with pytest.raises(TypeError): fourpages.pages.insert(3, {}) with pytest.raises(TypeError): fourpages.pages.insert(3, Array([42]))
def test_array_of_array(): a = Array([1, 2]) a2 = Array(a) assert a == a2 assert a is not a2
def test_rect_creation(): assert Rectangle(Array([1, 2, 3, 4])).width == 2
def test_array_from_rect(): a = Array(Rectangle(1, 2, 3, 4)) assert isinstance(a, Array)
def test_separation(): # Manually construct a 2"x1" document with a Separation # colorspace that devices a single "spot" color channel named # "LogoGreen". Define a conversion to standard CMYK that assigns # CMYK equivalents. Copied example from PDF RM. # LogoGreen is a teal-ish green. First panel is white to full green, # second is green to full white. RGB ~= (31, 202, 113) pdf = pikepdf.new() pdf.add_blank_page(page_size=(144, 72)) # pikepdf does not interpret this - it is for the PDF viewer # Explanation: # X is implicitly loaded to stack # dup: X X # 0.84 mul: X 0.84X # exch: 0.84X X # 0.00: 0.84X X 0.00 # exch: 0.84X 0.00 X # dup: 0.84X 0.00 X X # 0.44 mul: 0.84X 0.00 X 0.44X # exch: 0.84X 0.00 0.44X X # 0.21mul: 0.84X 0.00 0.44X 0.21X # X -> {0.84X, 0, 0.44X, 0.21X} tint_transform_logogreen_to_cmyk = b''' { dup 0.84 mul exch 0.00 exch dup 0.44 mul exch 0.21 mul } ''' cs = Array( [ Name.Separation, Name.LogoGreen, Name.DeviceCMYK, Stream( pdf, tint_transform_logogreen_to_cmyk, FunctionType=4, Domain=[0.0, 1.0], Range=[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], ), ] ) def check_pim(imobj, idx): pim = pikepdf.PdfImage(imobj) assert pim.mode == 'Separation' assert pim.is_separation assert not pim.is_device_n assert pim.indexed == idx assert repr(pim) with pytest.raises(pikepdf.models.image.HifiPrintImageNotTranscodableError): pim.extract_to(stream=BytesIO()) imobj0 = Stream( pdf, bytes(range(0, 256)), BitsPerComponent=8, ColorSpace=cs, Width=16, Height=16, Type=Name.XObject, Subtype=Name.Image, ) check_pim(imobj0, idx=False) imobj1 = Stream( pdf, bytes(range(0, 256)), BitsPerComponent=8, ColorSpace=Array([Name.Indexed, cs, 255, bytes(range(255, -1, -1))]), Width=16, Height=16, Type=Name.XObject, Subtype=Name.Image, ) check_pim(imobj1, idx=True) pdf.pages[0].Contents = Stream( pdf, b'72 0 0 72 0 0 cm /Im0 Do 1 0 0 1 1 0 cm /Im1 Do' ) pdf.pages[0].Resources = Dictionary(XObject=Dictionary(Im0=imobj0, Im1=imobj1))
def test_array_not_hashable(self): with pytest.raises(TypeError): {Array([3]): None} # pylint: disable=expression-not-assigned
def test_wrap_array(): assert Name('/Foo').wrap_in_array() == Array([Name('/Foo')]) assert Array([42]).wrap_in_array() == Array([42])
def test_len_array(): assert len(Array([])) == 0 assert len(Array()) == 0 assert len(Array([3])) == 1
assert d2 is not d assert d2['/Dictionary'] == d['/Dictionary'] def test_object_iteration(sandwich): expected = len(sandwich.objects) loops = 0 for obj in sandwich.objects: loops += 1 if isinstance(obj, Dictionary): assert len(obj.keys()) >= 1 assert expected == loops @pytest.mark.parametrize( 'obj', [Array([1]), Dictionary({'/A': 'b'}), Operator('q'), String('s')]) def test_object_isinstance(obj): assert isinstance(obj, (Array, Dictionary, Operator, String, Stream)) assert isinstance(obj, type(obj)) assert isinstance(obj, Object) def test_stream_isinstance(): pdf = pikepdf.new() stream = Stream(pdf, b'xyz') assert isinstance(stream, Stream) assert isinstance(stream, Object)
def test_devicen(): # Manually construct a 2"x1" document with a DeviceN # colorspace that devices a single "spot" color channel named # "Black". Define a conversion to standard CMYK that assigns # C=0 M=0 Y=0 and lets black through. The result should appear as a # gradient from white (top left) to black (bottom right) in the # left cell, and black to white in the right cell. pdf = pikepdf.new() pdf.add_blank_page(page_size=(144, 72)) # Postscript function to map X -> CMYK={0, 0, 0, X} # Explanation: # X is implicitly on the stack # 0 0 0 <- load three zeros on to stack # stack contains: X 0 0 0 # 4 -1 roll <- roll stack 4 elements -1 times, meaning the order is reversed # stack contains: 0 0 0 X # pikepdf currently does not interpret tint transformation functions. This # is done so that the output test file can be checked in a PDF viewer. tint_transform_k_to_cmyk = b'{0 0 0 4 -1 roll}' cs = Array( [ Name.DeviceN, Array([Name.Black]), Name.DeviceCMYK, Stream( pdf, tint_transform_k_to_cmyk, FunctionType=4, Domain=[0.0, 1.0], Range=[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], ), ] ) def check_pim(imobj, idx): pim = pikepdf.PdfImage(imobj) assert pim.mode == 'DeviceN' assert pim.is_device_n assert not pim.is_separation assert pim.indexed == idx assert repr(pim) with pytest.raises(pikepdf.models.image.HifiPrintImageNotTranscodableError): pim.extract_to(stream=BytesIO()) imobj0 = Stream( pdf, bytes(range(0, 256)), BitsPerComponent=8, ColorSpace=cs, Width=16, Height=16, Type=Name.XObject, Subtype=Name.Image, ) check_pim(imobj0, idx=False) imobj1 = Stream( pdf, bytes(range(0, 256)), BitsPerComponent=8, ColorSpace=Array([Name.Indexed, cs, 255, bytes(range(255, -1, -1))]), Width=16, Height=16, Type=Name.XObject, Subtype=Name.Image, ) check_pim(imobj1, idx=True) pdf.pages[0].Contents = Stream( pdf, b'72 0 0 72 0 0 cm /Im0 Do 1 0 0 1 1 0 cm /Im1 Do' ) pdf.pages[0].Resources = Dictionary(XObject=Dictionary(Im0=imobj0, Im1=imobj1))