示例#1
0
def test_page_operations(pdf_path, password):
    """
    This test just checks if the operation throws an exception.

    This should be done way more thoroughly: It should be checked if the
    output is as expected.
    """
    if pdf_path.startswith("http"):
        pdf_path = BytesIO(get_pdf_from_url(pdf_path, pdf_path.split("/")[-1]))
    else:
        pdf_path = os.path.join(RESOURCE_ROOT, pdf_path)
    reader = PdfReader(pdf_path)

    if password:
        reader.decrypt(password)

    page: PageObject = reader.pages[0]

    transformation = Transformation().rotate(90).scale(1).translate(1, 1)
    page.add_transformation(transformation, expand=True)
    page.add_transformation((1, 0, 0, 0, 0, 0))
    page.scale(2, 2)
    page.scale_by(0.5)
    page.scale_to(100, 100)
    page.compress_content_streams()
    page.extract_text()
    page.scale_by(0.5)
    page.scale_to(100, 100)
    page.extract_text()
示例#2
0
def test_text_extraction_encrypted():
    inputfile = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
    reader = PdfReader(inputfile)
    assert reader.is_encrypted is True
    reader.decrypt("openpassword")
    assert (reader.pages[0].extract_text().replace(
        "\n", "").strip().startswith("Lorem ipsum dolor sit amet"))
示例#3
0
文件: bench.py 项目: mstamy2/PyPDF2
def page_ops(pdf_path, password):
    pdf_path = os.path.join(RESOURCE_ROOT, pdf_path)

    reader = PdfReader(pdf_path)

    if password:
        reader.decrypt(password)

    page = reader.pages[0]

    op = Transformation().rotate(90).scale(1.2)
    page.add_transformation(op)
    page.merge_page(page)

    op = Transformation().scale(1).translate(tx=1, ty=1)
    page.add_transformation(op)
    page.merge_page(page)

    op = Transformation().rotate(90).scale(1).translate(tx=1, ty=1)
    page.add_transformation(op)
    page.merge_page(page)

    page.add_transformation((1, 0, 0, 0, 0, 0))
    page.scale(2, 2)
    page.scale_by(0.5)
    page.scale_to(100, 100)
    page.compress_content_streams()
    page.extract_text()
示例#4
0
def test_decrypt_when_no_id():
    """
    Decrypt an encrypted file that's missing the 'ID' value in its
    trailer.
    https://github.com/mstamy2/PyPDF2/issues/608
    """

    with open(os.path.join(RESOURCE_ROOT, "encrypted_doc_no_id.pdf"),
              "rb") as inputfile:
        ipdf = PdfReader(inputfile)
        ipdf.decrypt("")
        assert ipdf.metadata == {"/Producer": "European Patent Office"}
示例#5
0
def test_decrypt():
    with open(os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf"),
              "rb") as inputfile:
        reader = PdfReader(inputfile)
        assert reader.is_encrypted is True
        reader.decrypt("openpassword")
        assert len(reader.pages) == 1
        assert reader.is_encrypted is True
        metadict = reader.metadata
        assert dict(metadict) == {
            "/CreationDate": "D:20220403203552+02'00'",
            "/Creator": "Writer",
            "/Producer": "LibreOffice 6.4",
        }
示例#6
0
def test_compress_content_streams(pdf_path, password):
    reader = PdfReader(pdf_path)
    if password:
        reader.decrypt(password)
    for page in reader.pages:
        page.compress_content_streams()