Exemplo n.º 1
0
def save_page(filepath, page_number):
    infile = PdfFileReader(open(filepath, "rb"), strict=False)
    page = infile.getPage(page_number - 1)
    outfile = PdfFileWriter()
    outfile.addPage(page)
    outpath = os.path.join(os.path.dirname(filepath),
                           f"page-{page_number}.pdf")
    with open(outpath, "wb") as f:
        outfile.write(f)
    froot, fext = os.path.splitext(outpath)
    layout, __ = get_page_layout(outpath)
    # fix rotated PDF
    chars = get_text_objects(layout, ltype="char")
    horizontal_text = get_text_objects(layout, ltype="horizontal_text")
    vertical_text = get_text_objects(layout, ltype="vertical_text")
    rotation = get_rotation(chars, horizontal_text, vertical_text)
    if rotation != "":
        outpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
        os.rename(outpath, outpath_new)
        infile = PdfFileReader(open(outpath_new, "rb"), strict=False)
        if infile.isEncrypted:
            infile.decrypt("")
        outfile = PdfFileWriter()
        p = infile.getPage(0)
        if rotation == "anticlockwise":
            p.rotateClockwise(90)
        elif rotation == "clockwise":
            p.rotateCounterClockwise(90)
        outfile.addPage(p)
        with open(outpath, "wb") as f:
            outfile.write(f)
Exemplo n.º 2
0
def save_page(filepath, page_number):
    infile = PdfFileReader(open(filepath, 'rb'), strict=False)
    page = infile.getPage(page_number - 1)
    outfile = PdfFileWriter()
    outfile.addPage(page)
    outpath = os.path.join(os.path.dirname(filepath), 'page-{}.pdf'.format(page_number))
    with open(outpath, 'wb') as f:
        outfile.write(f)
    froot, fext = os.path.splitext(outpath)
    layout, __ = get_page_layout(outpath)
    # fix rotated PDF
    chars = get_text_objects(layout, ltype="char")
    horizontal_text = get_text_objects(layout, ltype="horizontal_text")
    vertical_text = get_text_objects(layout, ltype="vertical_text")
    rotation = get_rotation(chars, horizontal_text, vertical_text)
    if rotation != '':
        outpath_new = ''.join([froot.replace('page', 'p'), '_rotated', fext])
        os.rename(outpath, outpath_new)
        infile = PdfFileReader(open(outpath_new, 'rb'), strict=False)
        if infile.isEncrypted:
            infile.decrypt('')
        outfile = PdfFileWriter()
        p = infile.getPage(0)
        if rotation == 'anticlockwise':
            p.rotateClockwise(90)
        elif rotation == 'clockwise':
            p.rotateCounterClockwise(90)
        outfile.addPage(p)
        with open(outpath, 'wb') as f:
            outfile.write(f)
Exemplo n.º 3
0
    def _generate_layout(self, filename, layout_kwargs):
        # Copied from camelot/parsers/base/BaseParser._generate_layout()
        # with removing the actual layout generation because we already
        # have the layout for other needs.

        self.filename = filename
        self.layout_kwargs = layout_kwargs

        # self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)

        self.images = get_text_objects(self.layout, ltype="image")
        self.horizontal_text = get_text_objects(self.layout,
                                                ltype="horizontal_text")
        self.vertical_text = get_text_objects(self.layout,
                                              ltype="vertical_text")
        self.pdf_width, self.pdf_height = self.dimensions
        self.rootname, __ = os.path.splitext(self.filename)