Пример #1
0
def to_stream(stream):
    if not hasattr(stream, 'seek'):
        if file_exists(stream):
            stream = MMFile(stream, 'rb', delete_on_close=False)
        else:
            with mktempfn('-TS.pdf', 'tos-') as fn:
                open(fn, 'wb').write(stream)
                stream = MMFile(fn, 'rb', delete_on_close=True)
    stream.seek(0, 2)
    if stream.tell() == 0:
        LOG.warn('zero pdf !? (%r)', repr(stream)[:100])
    stream.seek(0, 0)
    return stream
Пример #2
0
def get_pages(stream):
    stream_s = repr(stream)[:100]
    from pyPdf import PdfFileReader
    # LOG.debug('stream: %s (%s)', stream_s, dir(stream))
    if not hasattr(stream, 'seek'):
        with mktempfn('-GP.pdf', 'gp-') as fn:
            with open(fn, 'wb') as fh:
                fh.write(stream)
            stream = MMFile(fn, 'rb', delete_on_close=True)
    stream.seek(0, 2)
    if stream.tell() == 0:
        LOG.warn('zero pdf !? (%r)', stream_s)
        return
    stream.seek(0, 0)
    for i, page in enumerate(PdfFileReader(stream).pages):
        LOG.debug('get_pages(%s) yields %d (%s)', stream_s, i,
            repr(page)[:100])
        yield page
    LOG.info('%r is %d pages', stream_s, i)