Пример #1
0
def make_oval(f):
    """Make a PDF page and draw an oval inside a Quad.
    The upper left and the lower right quad points and the fill color are
    subject to a passed-in parameter. Effectively, they exchange their position,
    thus causing changes to the drawn shape.
    The resulting page picture is passed back as a PNG image and the PDF is
    discarded again. The execution speed of this function mainly determines
    the number of "frames" shown per second.
    """
    doc = fitz.open()  # dummy PDF
    page = doc.newPage(width=400, height=400)  # page dimensions as you like
    r = page.rect + (4, 4, -4, -4)
    q = r.quad  # full page rect as a quad
    q1 = fitz.Quad(
        q.lr + (q.ul - q.lr) * f,
        q.ur,
        q.ll,
        q.ul + (q.lr - q.ul) * f  # upper left
    )  # lower right
    # make an entertaining fill color - simulating rotation around
    # a diagonal
    c1 = min(1, f)
    c3 = min(1, max(1 - f, 0))
    fill = (c1, 0, c3)
    page.drawOval(
        q1,
        color=(0, 0, 1),
        fill=fill,
        width=0.3  # blue border  # variable fill color
    )  # border width
    pix = page.getPixmap(alpha=False)  # make pixmap, no alpha
    doc.close()  # discard PDF again
    return pix.getImageData("pgm")  # return a data stream
Пример #2
0
def make_oval(f):
    """Make a PDF page and draw an oval inside a Quad.
    The lower two quad points and the fill color are subject to a passed-in
    parameter. Effectively, they exchange their position, thus causing
    changes to the drawn shape.
    The resulting page picture is passed back as an image and the PDF is
    dicarded again.
    """
    doc = fitz.open()  # dummy PDF
    page = doc.newPage(width=400, height=400)  # page dimensions as you like
    r = page.rect + (4, 4, -4, -4)
    q = r.quad  # full page rect as a quad
    q1 = fitz.Quad(q.ul, q.ur, q.ll + (q.lr - q.ll) * f,
                   q.ll + (q.lr - q.ll) * (1 - f))
    # make an entertaining fill color
    c1 = min(1, f)
    c3 = min(1, max(1 - f, 0))
    c2 = c1 * c3
    fill = (c1, c2, c3)
    page.drawOval(
        q1,
        color=(0, 0, 1),
        fill=fill,
        width=0.3  # blue border  # variable fill color
    )  # border width
    pix = page.getPixmap(alpha=False)  # make pixmap, no alpha
    doc.close()  # discard PDF again
    return pix.getImageData("ppm")  # return a PGM image of the page
Пример #3
0
 def __post_init__(self):
     self.rect = fitz.Rect(self.topLeft, self.botRight)
     if self.vertices:
         n = len(self.vertices) // 4
         self.quads: List[fitz.Quad] = [
             fitz.Quad(*self.vertices[i * 4:i * 4 + 4]) for i in range(n)
         ]
     else:
         self.quads = None
     self.point = self.rect.top_left
Пример #4
0
def _parse_highlight(annot: fitz.Annot, wordlist: List) -> str:
    points = annot.vertices
    quad_count = int(len(points) / 4)
    sentences = ["" for i in range(quad_count)]
    for i in range(quad_count):
        r = fitz.Quad(points[i * 4 : i * 4 + 4]).rect
        words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(r)]
        sentences[i] = " ".join(w[4] for w in words)
    sentence = " ".join(sentences)
    return sentence
Пример #5
0
def make_oval(i):
    """Make a PDF page and draw an oval inside a Quad.
    The lower two quad points and the fill color are subject to a passed-in
    parameter. Effectively, they exchange their position, thus causing
    changes to the drawn shape.
    The resulting page picture is passed back as an image and the PDF is
    dicarded again.
    """
    doc = fitz.open()  # dummy PDF
    red = (1, 0, 0)
    blue = (0, 0, 1)
    page = doc.newPage(width=400, height=300)  # page dimensions as you like
    r = page.rect + (+4, +4, -4, -4)  # leave a border of 4 pix
    q = r.quad  # full page rect as a quad
    f = i / 100.0
    if f >= 0:
        u = f
        o = 0
    else:
        u = 0
        o = -f
    q1 = fitz.Quad(
        q.ul + (q.ur - q.ul) * o,
        q.ul + (q.ur - q.ul) * (1 - o),
        q.ll + (q.lr - q.ll) * u,
        q.ll + (q.lr - q.ll) * (1 - u),
    )
    # make an entertaining fill color
    c1 = min(1, max(o, u))
    c3 = min(1, max(1 - u, 1 - o))
    fill = (c1, 0, c3)
    img = page.newShape()
    img.drawOval(q1)
    img.finish(
        color=blue,
        fill=fill,
        width=0.3  # blue border  # variable fill color
    )  # border width
    img.drawCircle(q1.ll, 4)
    img.finish(color=red, fill=red)
    img.drawCircle(q1.lr, 4)
    img.finish(color=blue, fill=blue)
    img.drawCircle(q1.ul, 4)
    img.finish(color=red, fill=red)
    img.drawCircle(q1.ur, 4)
    img.finish(color=blue, fill=blue)
    img.commit()
    pix = page.getPixmap(alpha=False)  # make pixmap, no alpha
    doc.close()  # discard PDF again
    image = pix.getImageData("ppm")
    del pix
    del page
    del img
    return image  # return a PGM image of the page
Пример #6
0
 def rect(self) -> fitz.Rect:
     if self.type_id == SQUARE:
         return self.annot.rect
     elif self.type_id in [INK, LINE]:
         page_width = self.page.mediabox.x1
         return fitz.Rect(0, self.annot.rect.y0, page_width,
                          self.annot.rect.y1)
     elif self.type_id in [HIGHLIGHT, UNDERLINE, SQUIGGLY, STRIKEOUT]:
         points = self.annot.vertices  # TODO
         return fitz.Quad(points).rect
     else:
         return fitz.Rect()
Пример #7
0
 def _parse_highlight(self, annot, wordlist):
     points = annot.vertices
     # find how many quadrilaterals are in the doc
     quad_count = int(len(points) / 4)
     sentences = []
     for i in range(quad_count):
         # where the highlighted part is by intersecting
         # the rectangles
         r = fitz.Quad(points[i * 4 : i * 4 + 4]).rect
         words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(r)]
         sentences.append(" ".join(w[4] for w in words))
     sentence = " ".join(sentences)
     return sentence 
def add_highlights(page, highlights):
    for high in highlights:
        # Create the location, note that the Mendeley coordinate system seems to have its Y axis inverted when
        # compared to the pdf library's axis. So the Y position should be flipped
        page_lr = page.rect

        ll = pdf.Point(high.x[0], page_lr.y1 - high.y[0])
        ul = pdf.Point(high.x[0], page_lr.y1 - high.y[1])
        ur = pdf.Point(high.x[1], page_lr.y1 - high.y[1])
        lr = pdf.Point(high.x[1], page_lr.y1 - high.y[0])
        points = pdf.Quad(ul, ur, ll, lr)

        # Create the highlight
        anot = page.addHighlightAnnot(points)

        # Update the color
        color = anot.colors
        color["fill_color"] = high.color
        color["stroke_color"] = high.color
        anot.setColors(color)

        anot.update()
Пример #9
0
def _check_contain(r_word, points):
    """If `r_word` is contained in the rectangular area.

    The area of the intersection should be large enough compared to the
    area of the given word.

    Args:
        r_word (fitz.Rect): rectangular area of a single word.
        points (list): list of points in the rectangular area of the
            given part of a highlight.

    Returns:
        bool: whether `r_word` is contained in the rectangular area.
    """
    # `r` is mutable, so everytime a new `r` should be initiated.
    r = fitz.Quad(points).rect
    r.intersect(r_word)

    if r.getArea() >= r_word.getArea() * _threshold_intersection:
        contain = True
    else:
        contain = False
    return contain
Пример #10
0
    sys.exit(1)

print("Opened {} with {} pages".format(file, len(doc)))

highlight_text = []
for page in doc:
    # list to store the co-ordinates of all highlights
    highlights = []

    # loop till we have highlight annotation in the page
    annot = page.firstAnnot
    while annot:
        if annot.type[0] == 8:
            all_coordinates = annot.vertices
            if len(all_coordinates) == 4:
                highlight_coord = fitz.Quad(all_coordinates).rect
                highlights.append(highlight_coord)
            else:
                all_coordinates = [
                    all_coordinates[x:x + 4]
                    for x in range(0, len(all_coordinates), 4)
                ]
                for i in range(0, len(all_coordinates)):
                    coord = fitz.Quad(all_coordinates[i]).rect
                    highlights.append(coord)
        annot = annot.next

    all_words = page.getTextWords()

    # List to store all the highlighted texts
    for h in highlights: