Exemplo n.º 1
0
def GetPageText(pg):
    dl = fitz.DisplayList()
    dv = fitz.Device(dl)
    pg.run(dv, fitz.Identity)
    ts = fitz.TextSheet()
    tp = fitz.TextPage()
    rect = pg.bound()
    dl.run(fitz.Device(ts, tp), fitz.Identity, rect)
    return tp.extractJSON()
Exemplo n.º 2
0
def pdf_show(datei, seite):
    page_idx = int(seite) - 1
    page = PDFcfg.doc.loadPage(page_idx)  # get the page
    irect = page.bound().round()  # integer rectangle representing it
    pix = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB),
                      irect)  # empty RGB pixmap of this size
    pix.clearWith(255)  # clear it with color "white"
    dev = fitz.Device(pix)  # create a "draw" device
    page.run(dev, fitz.Identity)  # render the page
    pix.writePNG(datei)
    return
Exemplo n.º 3
0
def pdf_show(pdf, page):
    page = pdf.loadPage(page - 1)  # load the page
    irect = page.bound().round()  # integer rectangle representing it
    pix = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB),
                      irect)  # create an empty RGB pixmap of this size
    pix.clearWith(255)  # clear it with color "white"
    dev = fitz.Device(pix)  # create a "draw" device
    page.run(dev, fitz.Identity)  # render the page
    data = str(pix.samples)  # pixel area. NEW: returns bytearray
    # this function needs "data" to be a string
    bitmap = wx.BitmapFromBufferRGBA(irect.width, irect.height,
                                     data)  # turn in wx.Bitmap
    # If you experience issues with this function, try the following code.
    # It will use "wx.BitmapFromBuffer" and thus ignore the transparency (alpha).
    # data2 = "".join([data[4*i:4*i+3] for i in range(len(data)/4)])
    # bitmap = wx.BitmapFromBuffer(width, height, data2)
    return bitmap
Exemplo n.º 4
0
# we create a transformation matrix here
zoom = int(sys.argv[3])
rotate = int(sys.argv[4])
trans = fitz.Matrix(zoom / 100.0, zoom / 100.0).preRotate(rotate)
'''
here we introduce the display list, which provides caching-mechanisms
to reduce parsing of a page.
first, we need to create a display list
hand it over to a list device
and then populate the display list by running the page through that device,
with transformation applied
'''
mediabox = page.rect
dl = fitz.DisplayList(mediabox)
dv = fitz.Device(dl)
page.run(dv, trans)
# get the page size, and then apply the transformation
rect = mediabox.transform(trans)

# create a pixmap with RGB as colorspace and bounded by irect
pm = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), rect.round())
# clear it with 0xff white
pm.clearWith(0xff)

# fitz.Device(pm, None) is a device for drawing
# we run the display list above through this drawing device
# with area provided
dl.run(fitz.Device(pm, None), fitz.Identity, rect)

# the drawing device save the result into the pixmap
Exemplo n.º 5
0
#!/usr/bin/env python
import fitz

f = "sdw_2015_06.pdf"
d = fitz.Document(f)
seiten = d.pageCount

for seite in range(seiten):
    print "=============== processing page", seite, " ==============="
    pg = d.loadPage(seite)
    dl = fitz.DisplayList()
    print "ok: dl = fitz.DisplayList()"
    dv = fitz.Device(dl)
    print "ok: dv = fitz.Device(dl)"
    pg.run(dv, fitz.Identity)
    print "ok: pg.run(dv, fitz.Identity)"
    ts = fitz.TextSheet()
    print "ok: ts = fitz.TextSheet()"
    tp = fitz.TextPage()
    print "ok: tp = fitz.TextPage()"
    rect = pg.bound()
    dl.run(fitz.Device(ts, tp), fitz.Identity, rect)
    print "ok: dl.run(fitz.Device(ts, tp), fitz.Identity, rect)"