def begin_page(self, page, ctm): (x0, y0, x1, y1) = page.mediabox (x0, y0) = apply_matrix_pt(ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(ctm, (x1, y1)) mediabox = (0, 0, abs(x0-x1), abs(y0-y1)) self.cur_item = LTPage(self.pageno, mediabox) return
def begin_page(self, page: PDFPage, ctm: Tuple[int, int, int, int, int, int]) -> None: (x0, y0, x1, y1) = page.mediabox (x0, y0) = apply_matrix_pt(ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(ctm, (x1, y1)) mediabox = (0, 0, abs(x0 - x1), abs(y0 - y1)) self.cur_item = LTPageExtended(self.pageno, mediabox) return
def paint_single_path(self, gstate, stroke, fill, evenodd, path): ''' Converting a single path draw command into lines and curves objects ''' if len(path) < 2: return shape = ''.join(x[0] for x in path) pts = [] for p in path: for i in xrange(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i + 1]))) # Line mode if self.line_only_shape.match(shape): # check for sloped lines first has_slope = False for i in xrange(len(pts) - 1): if pts[i][0] != pts[i + 1][0] and pts[i][1] != pts[i + 1][1]: has_slope = True break if not has_slope: for i in xrange(len(pts) - 1): self.cur_item.add( LTLine(gstate.linewidth, pts[i], pts[i + 1])) # Adding the closing line for a polygon, especially rectangles if shape.endswith('h'): self.cur_item.add(LTLine(gstate.linewidth, pts[0], pts[-1])) return # Add the curve as an arbitrary polyline (belzier curve info is lost here) self.cur_item.add(LTCurve(gstate.linewidth, pts))
def paint_single_path(self, gstate, stroke, fill, evenodd, path): ''' Converting a single path draw command into lines and curves objects ''' if len(path) < 2: return shape = ''.join(x[0] for x in path) pts = [] for p in path: for i in xrange(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1]))) # Line mode if self.line_only_shape.match(shape): # check for sloped lines first has_slope = False for i in xrange(len(pts)-1): if pts[i][0] != pts[i+1][0] and pts[i][1] != pts[i+1][1]: has_slope = True break if not has_slope: for i in xrange(len(pts) - 1): self.cur_item.add(LTLine(gstate.linewidth, pts[i], pts[i + 1])) # Adding the closing line for a polygon, especially rectangles if shape.endswith('h'): self.cur_item.add(LTLine(gstate.linewidth, pts[0], pts[-1])) return # Add the curve as an arbitrary polyline (belzier curve info is lost here) self.cur_item.add(LTCurve(gstate.linewidth, pts))
def _paint_network(self, path): network = LTNetwork() previous_element = None for tuple in path: if len(tuple) == 3: state, x, y = tuple element = (x, y) element = apply_matrix_pt(self.ctm, element) element = int_round(element[0]), int_round(element[1]) element = Point(element) elif len(tuple) == 1 and tuple[0] == 'h': element = previous_element state = 'l' else: raise IndexError network.add_point(element) # state 'l' is to draw a line, which means that we have link. # Otherwise, we only have the point. if state == 'l': network.add_link(element, previous_element) previous_element = element self.cur_item.add(network)
def paint_path(self, gstate, stroke, fill, evenodd, path): shape = ''.join(x[0] for x in path) if shape == 'ml': # horizontal/vertical line (_, x0, y0) = path[0] (_, x1, y1) = path[1] (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1))) return if shape == 'mlllh': # rectangle (_, x0, y0) = path[0] (_, x1, y1) = path[1] (_, x2, y2) = path[2] (_, x3, y3) = path[3] (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) (x2, y2) = apply_matrix_pt(self.ctm, (x2, y2)) (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): self.cur_item.add(LLTRect(gstate.linewidth, (x0, y0, x2, y2))) print self.interpreter.scs, self.interpreter.ncs, gstate, stroke, fill, ( x0, y0, x2, y2) return # other shapes pts = [] for p in path: for i in xrange(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i + 1]))) self.cur_item.add(LTCurve(gstate.linewidth, pts)) return
def newLTCharInit(self, matrix, font, fontsize, scaling, rise, text, textwidth, textdisp): LTText.__init__(self) # Patched in line self.font = font self.fontsize = fontsize self._text = text self.matrix = matrix self.fontname = font.fontname self.adv = textwidth * fontsize * scaling # compute the boundary rectangle. if font.is_vertical(): # vertical width = font.get_width() * fontsize (vx, vy) = textdisp if vx is None: vx = width//2 else: vx = vx * fontsize * .001 vy = (1000 - vy) * fontsize * .001 tx = -vx ty = vy + rise bll = (tx, ty+self.adv) bur = (tx+width, ty) else: # horizontal height = font.get_height() * fontsize descent = font.get_descent() * fontsize ty = descent + rise bll = (0, ty) bur = (self.adv, ty+height) (a, b, c, d, e, f) = self.matrix self.upright = (0 < a*d*scaling and b*c <= 0) (x0, y0) = apply_matrix_pt(self.matrix, bll) (x1, y1) = apply_matrix_pt(self.matrix, bur) if x1 < x0: (x0, x1) = (x1, x0) if y1 < y0: (y0, y1) = (y1, y0) LTComponent.__init__(self, (x0, y0, x1, y1)) if font.is_vertical(): self.size = self.width else: self.size = self.height return
def paint_path(self, gstate, stroke, fill, evenodd, path): shape = ''.join(x[0] for x in path) if shape == 'ml': # horizontal/vertical line (_,x0,y0) = path[0] (_,x1,y1) = path[1] (x0,y0) = apply_matrix_pt(self.ctm, (x0,y0)) (x1,y1) = apply_matrix_pt(self.ctm, (x1,y1)) if y0 == y1: # horizontal ruler self.cur_item.add(LTLine(gstate.linewidth, 'H', (x0,y0,x1,y1))) elif x0 == x1: # vertical ruler self.cur_item.add(LTLine(gstate.linewidth, 'V', (x0,y0,x1,y1))) elif shape == 'mlllh': # rectangle (_,x0,y0) = path[0] (_,x1,y1) = path[1] (_,x2,y2) = path[2] (_,x3,y3) = path[3] (x0,y0) = apply_matrix_pt(self.ctm, (x0,y0)) (x1,y1) = apply_matrix_pt(self.ctm, (x1,y1)) (x2,y2) = apply_matrix_pt(self.ctm, (x2,y2)) (x3,y3) = apply_matrix_pt(self.ctm, (x3,y2)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): self.cur_item.add(LTRect(gstate.linewidth, (x0,y0,x2,y2))) return
def draw_cid(self, ts, cid, force_space=False): verbose("drawing cid: ", cid) Trm = utils.mult_matrix((ts.Tfs * ts.Th, 0, 0, ts.Tfs, 0, ts.Trise), ts.Tm) if Trm[1] != 0: return if Trm[2] != 0: return verbose('Trm', Trm) if cid == 32 or force_space: Tw = ts.Tw else: Tw = 0 try: if force_space: unichar = ' ' else: try: unichar = ts.Tf.to_unichr(cid) except Exception as e: verbose(f"Failed to process {cid = }: {e}") unichar = ' ' except PDFUnicodeNotDefined: if MISSING_CHAR: unichar = MISSING_CHAR else: raise (gx, gy) = utils.apply_matrix_pt(Trm, (0, 0)) verbose("drawing unichar: '", unichar, "' @", gx, ",", gy) tfs = Trm[0] if self.current_block is None: self.current_block = (ts.Tf, tfs, gx, gy, [unichar]) elif ((self.current_block[0] == ts.Tf) and (self.current_block[1] == tfs)): self.current_block[4].append(unichar) else: self.blocks.append(self.current_block) self.current_block = (ts.Tf, tfs, gx, gy, [unichar]) verbose('current block: ', self.current_block) verbose('blocks: ', self.blocks) if force_space: pass else: w = ts.Tf.char_width(cid) if ts.Tf.is_vertical(): tx = 0 ty = self.new_ty(w, 0, ts.Tfs, ts.Tc, Tw) else: tx = self.new_tx(w, 0, ts.Tfs, ts.Tc, Tw, ts.Th) ty = 0 ts.Tm = utils.translate_matrix(ts.Tm, (tx, ty))
def paint_path(self, gstate, stroke, fill, evenodd, path): shape = ''.join(x[0] for x in path) if shape == 'ml': # horizontal/vertical line (_, x0, y0) = path[0] (_, x1, y1) = path[1] (x0, y0) = utils.apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = utils.apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1), stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return if shape == 'mlllh': #print("Painting rectangle!") # rectangle (_, x0, y0) = path[0] (_, x1, y1) = path[1] (_, x2, y2) = path[2] (_, x3, y3) = path[3] (x0, y0) = utils.apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = utils.apply_matrix_pt(self.ctm, (x1, y1)) (x2, y2) = utils.apply_matrix_pt(self.ctm, (x2, y2)) (x3, y3) = utils.apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): xlist = [x0,x1,x2,x3] ylist = [y0,y1,y2,y3] minx = min(xlist) maxx = max(xlist) miny = min(ylist) maxy = max(ylist) self.rectangles.append(Rectangle(minx, miny, maxx-minx, maxy-miny)) self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2), stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return # other shapes pts = [] for p in path: for i in range(1, len(p), 2): pts.append(utils.apply_matrix_pt(self.ctm, (p[i], p[i+1]))) self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return
def push_textbox(self, p, h, font, matrix): if self.temp_text is not None: (xt, yt) = utils.apply_matrix_pt(matrix, self.init_p) self.text_boxes.append(TextBox(self.temp_text, xt, yt, (p[0]-self.init_p[0])*matrix[0], h, font)) self.temp_text = None
def paint_single_path(self, gstate, stroke, fill, evenodd, path): ''' Converting a single path draw command into lines and curves objects ''' shape = ''.join(x[0] for x in path) if shape == 'mh': return if shape == 'ml': # horizontal/vertical line (_, x0, y0) = path[0] (_, x1, y1) = path[1] (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1))) return if shape == 'mlllh': # rectangle (_, x0, y0) = path[0] (_, x1, y1) = path[1] (_, x2, y2) = path[2] (_, x3, y3) = path[3] (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) (x2, y2) = apply_matrix_pt(self.ctm, (x2, y2)) (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2))) return # other shapes pts = [] for p in path: for i in xrange(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1]))) direction = None for i in xrange(len(pts)-1): if pts[i][0]==pts[i+1][0]: cur_dir = 'v' elif pts[i][1]==pts[i+1][1]: cur_dir = 'h' else: direction = None break if direction is None: direction = cur_dir elif direction != cur_dir: direction = None break # implicitly if direction == cur_dir, continue # If the entire segment has a consistent vertical/horizontal # direction, we split all draw commands into segments instead # of dumping the entire path as a curve if direction is not None: for i in xrange(len(pts)-1): self.cur_item.add(LTLine(gstate.linewidth,pts[i],pts[i+1])) else:# Add the curve as an arbitrary polyline (belzier curve info is lost here) self.cur_item.add(LTCurve(gstate.linewidth, pts))
def paint_path(self, gstate, stroke, fill, evenodd, path): shape = ''.join(x[0] for x in path) if shape == 'ml': # horizontal/vertical line (_, x0, y0) = path[0] (_, x1, y1) = path[1] (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: x0, y0, x1, y1 = get_bound([(x0, y0), (x1, y1)]) item = add_wh({ "x0": x0, "y0": y0, "x1": x1, "y1": y1, "object_type": "line", }) if self.interpreter.parse_styles: item.update({ "stroke_width": 0 if stroke is False else gstate.linewidth, "stroke": None if stroke is False else (gstate.scolor_hex or "#000000"), "fill": None if fill is False else (gstate.ncolor_hex or "#000000"), "dash": gstate.dash, "evenodd": evenodd, }) self.cur_item.add(item) return if shape == 'mlllh': # rectangle (_, x0, y0) = path[0] (_, x1, y1) = path[1] (_, x2, y2) = path[2] (_, x3, y3) = path[3] (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) (x2, y2) = apply_matrix_pt(self.ctm, (x2, y2)) (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): y_min = min(y0, y1, y2, y3) y_max = max(y0, y1, y2, y3) x_min = min(x0, x1, x2, x3) x_max = max(x0, x1, x2, x3) item = add_wh({ "x0": x_min, "y0": y_min, "x1": x_max, "y1": y_max, "object_type": "rect", }) if self.interpreter.parse_styles: item.update({ "stroke_width": 0 if stroke is False else gstate.linewidth, "stroke": None if stroke is False else (gstate.scolor_hex or "#000000"), "fill": None if fill is False else (gstate.ncolor_hex or "#000000"), "dash": gstate.dash, "evenodd": evenodd, }) self.cur_item.add(item) return # other shapes pts = [] for p in path: for i in range(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i + 1]))) x0, y0, x1, y1 = get_bound(pts) item = add_wh({ "x0": x0, "y0": y0, "x1": x1, "y1": y1, "points": pts, "path": path, "object_type": "curve" }) if self.interpreter.parse_styles: item.update({ "stroke_width": 0 if stroke is False else gstate.linewidth, "stroke": None if stroke is False else (gstate.scolor_hex or "#000000"), "fill": None if fill is False else (gstate.ncolor_hex or "#000000"), "dash": gstate.dash, "evenodd": evenodd, }) self.cur_item.add(item) return
def render_char(self, matrix, font, fontsize, scaling, rise, cid): try: text = font.to_unichr(cid) assert isinstance(text, six.text_type), text except PDFUnicodeNotDefined: text = self.handle_undefined_char(font, cid) textwidth = font.char_width(cid) textdisp = font.char_disp(cid) adv = textwidth * fontsize * scaling # compute the boundary rectangle. if font.is_vertical(): # vertical width = font.get_width() * fontsize (vx, vy) = textdisp if vx is None: vx = width * 0.5 else: vx = vx * fontsize * .001 vy = (1000 - vy) * fontsize * .001 tx = -vx ty = vy + rise bll = (tx, ty + self.adv) bur = (tx + width, ty) else: # horizontal height = font.get_height() * fontsize descent = font.get_descent() * fontsize ty = descent + rise bll = (0, ty) bur = (adv, ty + height) (a, b, c, d, e, f) = matrix upright = (0 < a * d * scaling and b * c <= 0) (x0, y0) = apply_matrix_pt(matrix, bll) (x1, y1) = apply_matrix_pt(matrix, bur) if x1 < x0: (x0, x1) = (x1, x0) if y1 < y0: (y0, y1) = (y1, y0) if font.is_vertical(): size = x1 - x0 else: size = y1 - y0 # cf. "Table 106 – Text rendering modes" of PDF 1.7 spec gs = self.interpreter.graphicstate r = self.interpreter.textstate.render if r in (0, 2, 4, 6): fill = gs.ncolor_hex else: fill = None if r in (1, 2, 5, 6): stroke = gs.scolor_hex else: stroke = None item = add_wh({ "x0": x0, "y0": y0, "x1": x1, "y1": y1, "fontname": font.fontname, "size": size, "text": text, "object_type": "char", }) if self.interpreter.parse_styles: item.update({ "fill": fill, "stroke": stroke, }) self.cur_item.add(item) return adv