Пример #1
0
def merge_pdf(inputs):
    inputs = [to_stream(inp) for inp in inputs]
    assert all(inputs), 'No inputs? %r' % inputs
    if not inputs:
        return None
    with mktempfn('-M.pdf', 'm-') as out_fn:
        try:
            simple_merge(out_fn, inputs)
        except Exception, e:
            LOG.error('simple merge cannot merge %s: %s', inputs, e)
            shaved = []
            for inp in inputs:
                with mktempfn('-shaved.pdf', delete_asap=False) as shvd:
                    shaved.append(pdftk_merge(shvd, [inp]))
            try:
                simple_merge(out_fn, map(to_stream, shaved))
            except Exception, e:
                LOG.error('simple merge cannot merge shaved %s: %s',
                    shaved, e)
                pdftk_merge(out_fn, shaved)
            map(unlink, shaved)
Пример #2
0
def to_filename(stream, opened_ok=True):
    if isinstance(stream, basestring):
        if file_exists(stream):
            return stream
        with mktempfn('-TF.pdf', 'tof-', delete_asap=False) as fn:
            open(fn, 'wb').write(stream)
            return fn
    if opened_ok and hasattr(stream, 'name') and file_exists(stream.name):
        return stream.name
    if hasattr(stream, 'seek'):
        stream.seek(0, 0)
    else:
        LOG.warn('no seek on %r!', stream)
    with mktempfn('-TF.pdf', 'tof-', delete_asap=False) as fn:
        with open(fn, 'wb') as fh:
            while True:
                chunk = stream.read(65536)
                if not chunk:
                    break
                fh.write(chunk)
        return fn
Пример #3
0
def to_stream(stream):
    if not hasattr(stream, 'seek'):
        if file_exists(stream):
            stream = MMFile(stream, 'rb', delete_on_close=False)
        else:
            with mktempfn('-TS.pdf', 'tos-') as fn:
                open(fn, 'wb').write(stream)
                stream = MMFile(fn, 'rb', delete_on_close=True)
    stream.seek(0, 2)
    if stream.tell() == 0:
        LOG.warn('zero pdf !? (%r)', repr(stream)[:100])
    stream.seek(0, 0)
    return stream
Пример #4
0
def simple_split(stream):
    stream_s = repr(stream)[:100]
    pages = list(get_pages(stream))

    for page in pages:
        out = PdfFileWriter()
        out.addPage(page)
        writer = timeouter(5, threaded=False)(out.write)
        with mktempfn('-SS.pdf', 'ss-') as fn:
            with open(fn, 'wb') as fh:
                writer(fh)
            LOG.debug('split_pdf(%s) yields %s', stream_s, fn)
            yield MMFile(fn, 'rb', delete_on_close=True)
Пример #5
0
def get_pages(stream):
    stream_s = repr(stream)[:100]
    from pyPdf import PdfFileReader
    # LOG.debug('stream: %s (%s)', stream_s, dir(stream))
    if not hasattr(stream, 'seek'):
        with mktempfn('-GP.pdf', 'gp-') as fn:
            with open(fn, 'wb') as fh:
                fh.write(stream)
            stream = MMFile(fn, 'rb', delete_on_close=True)
    stream.seek(0, 2)
    if stream.tell() == 0:
        LOG.warn('zero pdf !? (%r)', stream_s)
        return
    stream.seek(0, 0)
    for i, page in enumerate(PdfFileReader(stream).pages):
        LOG.debug('get_pages(%s) yields %d (%s)', stream_s, i,
            repr(page)[:100])
        yield page
    LOG.info('%r is %d pages', stream_s, i)
Пример #6
0
def clean_pdf(pdffn):
    LOG.info('cleaning %r', pdffn)
    with mktempfn('-pp.ps') as psfn:
        psfn = pdftops(psfn, [pdffn])
        pdffn = pstopdf(pdffn, [psfn])
    return pdffn
Пример #7
0
    else:
        LOG.warn('no seek on %r!', stream)
    with mktempfn('-TF.pdf', 'tof-', delete_asap=False) as fn:
        with open(fn, 'wb') as fh:
            while True:
                chunk = stream.read(65536)
                if not chunk:
                    break
                fh.write(chunk)
        return fn


if '__main__' == __name__:
    logging.basicConfig(level=logging.DEBUG)
    todo, args = sys.argv[1], sys.argv[2:]
    if 'merge' == todo:
        sys.stdout.write(merge_pdf(args).getvalue())
    elif 'split' == todo:
        import zipfile
        with mktempfn('.zip') as zfn:
            zfh = zipfile.ZipFile(zfn, 'w', zipfile.ZIP_DEFLATED)
            for fh in split_pdf(args[0]):
                zfh.writestr(os.path.basename(fh.name), fh.getvalue())
            zfh.close()
            with open(zfn, 'rb') as zfh:
                while True:
                    chunk = zfh.read(65536)
                    if not chunk:
                        break
                    sys.stdout.write(chunk)