def test_font1(): text = "PyMuPDF" font = fitz.Font("helv") assert font.name == "Helvetica" tl = font.text_length(text, fontsize=20) cl = font.char_lengths(text, fontsize=20) assert len(text) == len(cl) assert abs(sum(cl) - tl) < fitz.EPSILON for i in range(len(cl)): assert cl[i] == font.glyph_advance(ord(text[i])) * 20 font2 = fitz.Font(fontbuffer=font.buffer) assert font2.valid_codepoints() == font.valid_codepoints()
def generate_content_page(header_to_pagenumber, headers_and_subheaders, page_height, page_width): """ Generates a document that serves as a Table of Contents, with header and subheader information. """ doc = fitz.open() page = doc.newPage(height=page_height, width=page_width) horizontal_start_point = 40 vertical_start_point = 60 spacing = 15 num_lines = 1 tab = 30 # Add Table of Contents heading (centered) rect_topleft = fitz.Point(0, vertical_start_point + num_lines * spacing) num_lines += 4 rect_bottomright = fitz.Point(page_width, vertical_start_point + num_lines * spacing) rect = fitz.Rect(rect_topleft, rect_bottomright) page.insertTextbox(rect, "Table of Contents", fontsize=32, align=fitz.TEXT_ALIGN_CENTER) num_lines += 2 # Create a TextWriter (per page) wr = fitz.TextWriter(page.rect) for h1_item, h2_items in headers_and_subheaders.items(): # Insert the h1_item p = fitz.Point( horizontal_start_point, vertical_start_point + num_lines * spacing ) wr.append(p, h1_item, fontsize=24, font=fitz.Font("Arial")) num_lines += 2 for h2_item in h2_items: # Insert each h2_item p_tab = fitz.Point( tab + horizontal_start_point, vertical_start_point + num_lines * spacing ) wr.append(p_tab, h2_item, fontsize=16) # Insert ... between h2_item and page number p_tab_number = fitz.Point( tab + horizontal_start_point + 500, vertical_start_point + num_lines * spacing, ) add_dot_connector(wr, wr.lastPoint, p_tab_number) # Insert page number for h2_item wr.append(p_tab_number, str(header_to_pagenumber[h2_item]), fontsize=16) num_lines += 1 # Move to new page if nearing end of page if num_lines >= 45: wr.writeText(page) page = doc.newPage(height=page_height, width=page_width) wr = fitz.TextWriter(page.rect) num_lines = 0 num_lines += 2 wr.writeText(page) return doc
def build_repl_table(doc, fname): """Populate font replacement information. Read the JSON font relacement file and store its information in dictionaries 'font_subsets', 'font_buffers' and 'new_fontnames'. """ fd = open(fname) fontdicts = json.load(fd) fd.close() for fontdict in fontdicts: oldfont = fontdict["oldfont"] newfont = fontdict["newfont"].strip() if newfont == "keep": # ignore if not replaced continue if "." in newfont or "/" in newfont or "\\" in newfont: try: font = fitz.Font(fontfile=newfont) except: sys.exit("Could not create font '%s'." % newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer for item in oldfont: new_fontnames[item] = new_fontname del font continue try: font = fitz.Font(newfont) except: sys.exit("Could not create font '%s'." % newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer for item in oldfont: new_fontnames[item] = new_fontname del font continue
def build_repl_table(doc, fname): """Populate font replacement information. Read the font relacement file and store its information in dictionaries 'font_subsets', 'font_buffers' and 'new_fontnames'. """ fd = open(fname, "rb") lines = fd.read().splitlines() fd.close() for line in lines: line = line.decode() if line.endswith("\n"): line = line[:-1] if not line: continue line = line.strip() if line.startswith("#"): continue oldfont, newfont = line.split(";")[:2] if newfont == "keep": # ignore if not replaced continue if "." in newfont or "/" in newfont or "\\" in newfont: font = fitz.Font(fontfile=newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer new_fontnames[oldfont] = new_fontname del font continue font = fitz.Font(newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer new_fontnames[oldfont] = new_fontname del font continue
def build_repl_table(doc, fname): """Populate font replacement information. Read the font relacement file and store its information in dictionaries 'font_subsets', 'font_buffers' and 'new_fontnames'. """ repl_file = open(fname) while True: line = repl_file.readline() if not line or line == "\n": break if line.endswith("\n"): line = line[:-1] line = line.strip() if line.startswith("#"): continue xref, oldfont, newfont = line.split(";")[:3] if newfont == "keep": continue if "." in newfont or "/" in newfont or "\\" in newfont: font = fitz.Font(fontfile=newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer new_fontnames[oldfont] = new_fontname del font continue font = fitz.Font(newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer new_fontnames[oldfont] = new_fontname del font continue
def build_repl_table(doc, fname): """Populate font replacement information. Read the font relacement file and store its information in dictionaries 'font_subsets', 'font_buffers' and 'new_fontnames'. """ fd = open(fname) fontdicts = json.load(fd) fd.close() for fontdict in fontdicts: oldfont = fontdict["oldfont"] newfont = fontdict["newfont"] oldfont = oldfont.strip() newfont = newfont.strip() if newfont == "keep": # ignore if not replaced continue if "." in newfont or "/" in newfont or "\\" in newfont: font = fitz.Font(fontfile=newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer new_fontnames[oldfont] = new_fontname del font continue font = fitz.Font(newfont) fontbuffer = font.buffer new_fontname = font.name font_subsets[new_fontname] = set() font_buffers[new_fontname] = fontbuffer new_fontnames[oldfont] = new_fontname del font continue
def _change_font_and_update_bbox(self, font_name: str): '''Set new font, and update font size, span/char bbox accordingly. It's generally used for span with unnamed fonts. See this `issue <https://github.com/pymupdf/PyMuPDF/issues/642>`_. In corner case, where the PDF file containing unnamed and not embedded fonts, the span bbox extracted from ``PyMuPDF`` is not correct. ``PyMuPDF`` provides feature to replace these unnamed fonts with specified fonts, then extract correct bbox from the updated PDF. Since we care less about the original PDF itself but its layout, the idea here is to set a default font for text spans with unnamed fonts, and estimate the updated bbox with method from ``fitz.TextWriter``. Args: font_name (str): Font name. ''' # set new font property self.font = font_name # compute text length under new font with that size font = fitz.Font(font_name) new_length = font.text_length(self.text, fontsize=self.size) if new_length > self.bbox.width: self.size *= self.bbox.width / new_length # estimate occupied rect when added with TextWriter x0, y0, x1, y1 = self.bbox tw = fitz.TextWriter((0, 0, x1, y1)) rect, _ = tw.append( self.chars[0]. origin, # the bottom left point of the first character self.text, font=font, fontsize=self.size) # update span bbox # - x-direction: use original horizontal range # - y-direction: centerline defined by estimated vertical range, and height by font size buff = (rect.height - self.size) / 2.0 y0 = rect.y0 + buff y1 = rect.y1 - buff self.update_bbox((x0, y0, x1, y1)) # update contained char bbox for char in self.chars: x0, _, x1, _ = char.bbox char.update_bbox((x0, y0, x1, y1))
def test_textbox4(): """Use TextWriter for text insertion.""" doc = fitz.open() ocg = doc.add_ocg("ocg1") page = doc.new_page() rect = fitz.Rect(50, 50, 400, 600) blue = (0, 0, 1) tw = fitz.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, align=fitz.TEXT_ALIGN_LEFT, fontsize=12, font=fitz.Font("cour"), right_to_left=True, ) tw.write_text(page, oc=ocg, morph=(rect.tl, fitz.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect)
def test_textbox3(): """Use TextWriter for text insertion.""" doc = fitz.open() page = doc.new_page() font = fitz.Font("cjk") rect = fitz.Rect(50, 50, 400, 400) blue = (0, 0, 1) tw = fitz.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, align=fitz.TEXT_ALIGN_LEFT, font=font, fontsize=12, right_to_left=True, ) tw.write_text(page, morph=(rect.tl, fitz.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) doc.scrub() doc.subset_fonts()
def test_font2(): """Old and new length computation must be the same.""" font = fitz.Font("helv") text = "PyMuPDF" assert font.text_length(text) == fitz.get_text_length(text)
infilename = sys.argv[1] font_list = set() doc = fitz.open(infilename) for i in range(len(doc)): for f in doc.getPageFontList(i, full=True): if f[-1] == 0: continue # no support for text in XObjects msg = "" xref = f[0] fontname = f[3] if f[1] == "n/a": msg = "not embedded" else: extr = doc.extractFont(xref) font = fitz.Font(fontbuffer=extr[-1]) msg = make_msg(font) idx = fontname.find("+") + 1 fontname = fontname[idx:] font_list.add((xref, fontname, msg)) font_list = list(font_list) font_list.sort(key=lambda x: x[1]) outname = infilename + "-fontnames.csv" out = open(outname, "w") for xref, fontname, msg in font_list: msg1 = "keep" out.write("%i;%s;%s; %s\n" % (xref, fontname, msg1, msg)) out.close()
textwriters = {} # contains one text writer per detected text color for block in blocks: for line in block["lines"]: wmode = line["wmode"] # writing mode (horizontal, vertical) wdir = list(line["dir"]) # writing direction markup_dir = 0 bidi_level = 0 # not used if wdir == [0, 1]: markup_dir = 4 for span in line["spans"]: new_fontname = get_new_fontname(span["font"]) if new_fontname is None: # do not replace this font continue font = fitz.Font(fontbuffer=font_buffers[new_fontname]) text = span["text"].replace(chr(0xFFFD), chr(0xB6)) # guard against non-utf8 characters textb = text.encode("utf8", errors="backslashreplace") text = textb.decode("utf8", errors="backslashreplace") span["text"] = text if wdir != [1, 0]: # special treatment for tilted text tilted_span(page, wdir, span, font) continue color = span["color"] # make or reuse textwriter for the color if color in textwriters.keys(): # already have a textwriter? tw = textwriters[color] # re-use it else: # make new tw = fitz.TextWriter(page.rect) # make text writer textwriters[color] = tw # store it for later use try:
def fitzfont(name): try: import fitz except ImportError: raise ImportError("Install PyMuPDF to use this method.") return fitz.Font(fontbuffer=myfont(name))
print(fitz.__doc__) highlight = "this text is highlighted" underline = "this text is underlined" strikeout = "this text is striked out" squiggled = "this text is zigzag-underlined" red = (1, 0, 0) blue = (0, 0, 1) gold = (1, 1, 0) green = (0, 1, 0) displ = fitz.Rect(0, 50, 0, 50) r = fitz.Rect(72, 72, 220, 100) t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" font = fitz.Font("helv") # used by the TextWriter class doc = fitz.open() page = doc.newPage() page.setRotation(0) # following makes sure that TextWriter references the **unrotated** page rect # as everything else does ... page_rect = page.rect * page.derotationMatrix def print_descr(annot): """Print a short description to the right of the annot rect.""" rect = annot.rect page = annot.parent
import fitz, os thisdir = lambda f: os.path.join(os.path.dirname(__file__), f) thisfile = os.path.abspath(__file__) outfile = thisfile.replace(".py", ".pdf") font1 = fitz.Font("helv") font2 = fitz.Font("tiro") doc = fitz.open() page = doc.newPage() point = fitz.Point(50, 72) matrix = fitz.Matrix(-20) wrt1 = fitz.TextWriter(page.rect, color=(0, 0, 1)) wrt2 = fitz.TextWriter(page.rect, color=(1, 0, 0)) _, last = wrt1.append(point, "This text changes color,", font1, 11) _, last = wrt2.append(last, " font and fontsize", font2, 18) _, last = wrt1.append(last, " several", font1, 11) _, last = wrt2.append(last, " times!", font2, 24) # output both text writers on current page in arbitrary sequence wrt1.writeText(page, morph=(point, matrix)) # using the same morph parameter wrt2.writeText(page, morph=(point, matrix)) # also preserves the joint text. # make a new page page = doc.newPage() rect = wrt1.textRect | wrt2.textRect # join rect of blue and red text # make new rectangle from it, rotated by 90 degrees nrect = fitz.Rect( rect.tl, # same top-left, but width and height exchanged
import fitz outfile = os.path.abspath(__file__).replace(".py", ".pdf") doc = fitz.open() page = doc.newPage() page_rect = page.rect blue = (0, 0, 1) # color 1 red = (1, 0, 0) # This font will be used for Latin, Greek, Russian characters only. # CJK characters always are looked up in 'Doid Sans Fallback Regular'. font = fitz.Font(ordering=0) # results in fallback font for everything fsize = 11 # fontsize """ ------------------------------------------------------------------------------- Our text lines. We split them into words such that the first word of each line starts with a line break. Multiple spaces between words will be kept. Disclaimer: Non-English text pieces are arbitrary copies out of Wikipedia pages. I have no idea what they mean nor am I responsible for that content. ------------------------------------------------------------------------------- """ # Our text: a language mix. Font above (if different from fallback) will be # used for non-CJK characters. For CJK, the fallback is always used. text = """This is a text of mixed languages to demonstrate MuPDF's text output capabilities.
raise ValueError("Need PyMuPDF v.1.17.4 at least.") if len(sys.argv) != 2: startyear = fitz.getPDFnow()[2:6] # take current year else: startyear = sys.argv[1] if len(startyear) != 4 or not startyear.isnumeric(): raise ValueError("Start year must be 4 digits") suffix = "-%s.pdf" % startyear outfile = __file__.replace(".py", suffix) startyear = int(startyear) doc = fitz.open() # new empty PDF # font = fitz.Font("cour") # use the built-in font Courier font = fitz.Font("spacemo") # use Space Mono - a nicer mono-spaced font cal = calendar.LocaleTextCalendar(locale="de") # use your locale # cal = calendar.TextCalendar() # or stick with English page_rect = fitz.PaperRect("a4-l") # A4 landscape paper w = page_rect.width h = page_rect.height print_rect = page_rect + (36, 72, -36, -36) # fill this rectangle # one line in calendar output is at most 98 characters, so we calculate # the maximum possible fontsize cum grano salis as: char_width = font.glyph_advance(32) # character width of the font fontsize = print_rect.width / (char_width * 100)