def test_xref_stream_parse_width_value_default_ix2(): # no tail part encoded_entries = [ "0000000000", "0100000011", ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 0]) )), generic.pdf_name('/Size'): 2 }, stream_data=xref_data ) expected_out = [ XRefEntry( xref_type=XRefType.FREE, location=None, idnum=0, generation=0 ), XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1), ] actual_out = list(parse_xref_stream(stream_obj)) assert actual_out == expected_out
def test_xref_stream_parse_entry_types(): encoded_entries = [ "0000000000ffff", # free "01000000110000", # regular objects "01000000840000", "01000000bc0005", "01000001b40000", "01000002990000", "02000000030001", # object in stream "03deadbeef1337", # undefined (should be ignored) "02000000030002", # object in stream "ffcafebabe0007", # another undefined one ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 2]) )), generic.pdf_name('/Size'): 10 }, stream_data=xref_data ) expected_out = [ XRefEntry( xref_type=XRefType.FREE, location=None, idnum=0, generation=0xffff ), XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1), XRefEntry(xref_type=XRefType.STANDARD, location=0x84, idnum=2), XRefEntry( xref_type=XRefType.STANDARD, location=0xbc, idnum=3, generation=5 ), XRefEntry(xref_type=XRefType.STANDARD, location=0x1b4, idnum=4), XRefEntry(xref_type=XRefType.STANDARD, location=0x299, idnum=5), XRefEntry( xref_type=XRefType.IN_OBJ_STREAM, location=ObjStreamRef(3, 1), idnum=6 ), XRefEntry( xref_type=XRefType.IN_OBJ_STREAM, location=ObjStreamRef(3, 2), idnum=8 # idnum jump because of undefined entry ), ] actual_out = list(parse_xref_stream(stream_obj)) assert actual_out == expected_out
def test_premature_xref_stream_end(): encoded_entries = ["000000ffff", "0100110000"] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 2, 2]) )), generic.pdf_name('/Size'): 3 # one too many }, stream_data=xref_data ) with pytest.raises(misc.PdfReadError, match='incomplete entry'): list(parse_xref_stream(stream_obj))
def test_xref_stream_trailing_data(): encoded_entries = [ "0000000000ffff", # free "01000000110000", "deadbeef" ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 2]) )), generic.pdf_name('/Size'): 2 }, stream_data=xref_data ) with pytest.raises(misc.PdfReadError, match='Trailing'): list(parse_xref_stream(stream_obj))