Пример #1
0
def test_text_filter(resources, outdir):
    input_pdf = resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf'

    # Ensure the test PDF has detect we can find
    proc = run(['pdftotext', str(input_pdf), '-'],
               check=True,
               stdout=PIPE,
               encoding='utf-8')
    assert proc.stdout.strip() != '', "Need input test file that contains text"

    pdf = Pdf.open(input_pdf)
    page = pdf.pages[0]

    keep = []
    for operands, command in parse_content_stream(page):
        if command == Operator('Tj'):
            print("skipping Tj")
            continue
        keep.append((operands, command))

    new_stream = Stream(pdf, keep)
    print(new_stream.read_bytes())  # pylint: disable=no-member
    page['/Contents'] = new_stream
    page['/Rotate'] = 90

    pdf.save(outdir / 'notext.pdf', True)

    proc = run(
        ['pdftotext', str(outdir / 'notext.pdf'), '-'],
        check=True,
        stdout=PIPE,
        encoding='utf-8',
    )

    assert proc.stdout.strip() == '', "Expected text to be removed"
Пример #2
0
def test_stream_dict_oneshot():
    pdf = pikepdf.new()
    stream1 = Stream(pdf, b'12345', One=1, Two=2)
    stream2 = Stream(pdf, b'67890', {'/Three': 3, '/Four': 4})
    stream3 = pdf.make_stream(b'abcdef', One=1, Two=2)

    assert stream1.One == 1
    assert stream1.read_bytes() == b'12345'
    assert stream2.Three == 3
    assert stream3.One == 1
Пример #3
0
def test_data_decoding_errors(filter_: str, data: bytes, msg: str):
    p = Pdf.new()
    st = Stream(p, data, Filter=Name(filter_))
    with pytest.raises(DataDecodingError, match=msg):
        st.read_bytes()