def draw(self, txt, new_line=False): doc = self.page.document font = getFont(self.font_name) if font._dynamicFont: for subset, t in font.splitString(txt, doc): if self.font_subset != subset: name = font.getSubsetInternalName(subset, doc) if name not in doc.font_references: doc.font_references[name] = doc.ref() if name not in self.page.font: self.page.font[name] = doc.font_references[name] self.write("{} {} Tf {} TL\n".format( name, self.font_size, self.font_leading)) self.write("({}) Tj ".format(escapePDF(t))) elif font._multiByte: name = doc.fontMapping.get(font.fontName) if name is None: name = doc.fontMapping[font.fontName] = '/F{}'.format( len(doc.fontMapping) + 1) doc.delayedFonts.append(font) if name not in doc.font_references: doc.font_references[name] = doc.ref() if name not in self.page.font: self.page.font[name] = doc.font_references[name] self.write("/{} {} Tf {} TL\n".format(name, self.font_size, self.font_leading)) self.write("(%s) Tj ".format(font.formatForPdf(txt))) else: _font = None for f, t in pdfmetrics.unicode2T1(txt, [font] + font.substitutionFonts): if f != _font: name = doc.fontMapping.get(f.fontName) if name is None: name = doc.fontMapping[f.fontName] = '/F{}'.format( len(doc.fontMapping) + 1) doc.delayedFonts.append(f) if name not in doc.font_references: doc.font_references[name] = doc.ref() if name not in self.page.font: self.page.font[name] = doc.font_references[name] self.write("{} {} Tf {} TL\n".format( name, self.font_size, self.font_leading)) _font = f self.write("({}) Tj ".format(escapePDF(t))) if new_line: self.write('T*\n')
def formatForPdf(self, text): #these ones should be encoded asUTF16 minus the BOM from codecs import utf_16_be_encode #print 'formatting %s: %s' % (type(text), repr(text)) if isBytes(text): text = text.decode('utf8') utfText = utf_16_be_encode(text)[0] encoded = escapePDF(utfText) #print ' encoded:',encoded return encoded
def formatForPdf(self, text): encoded = escapePDF(text) #print 'encoded CIDFont:', encoded return encoded
from reportlab.pdfbase import pdfutils from reportlab.platypus.paragraph import Paragraph from reportlab.lib.styles import ParagraphStyle from reportlab.lib.rl_accel import escapePDF from reportlab.lib.utils import isUnicode from reportlab.graphics.shapes import Drawing, String, Ellipse import re import codecs textPat = re.compile(r'\([^(]*\)') #test sentences testCp1252 = b'copyright \xa9 trademark \x99 registered \xae ReportLab! Ol\xe9!' testUni = testCp1252.decode('cp1252') testUTF8 = testUni.encode('utf-8') # expected result is octal-escaped text in the PDF expectedCp1252 = escapePDF(testCp1252) def extractText(pdfOps): """Utility to rip out the PDF text within a block of PDF operators. PDF will show a string draw as something like "(Hello World) Tj" i.e. text is in curved brackets. Crude and dirty, probably fails on escaped brackets. """ found = textPat.findall(pdfOps) #chop off '(' and ')' return [x[1:-1] for x in found] def subsetToUnicode(ttf, subsetCodeStr):
def char2glyph(c,last=False): return '%s(%s) 0 get /%s put' % (not last and 'dup ' or '',escapePDF(chr(c)), glyphName(c))
from reportlab.pdfbase import pdfutils from reportlab.platypus.paragraph import Paragraph from reportlab.lib.styles import ParagraphStyle from reportlab.lib.rl_accel import escapePDF from reportlab.lib.utils import isUnicode from reportlab.graphics.shapes import Drawing, String, Ellipse import re import codecs textPat = re.compile(r'\([^(]*\)') #test sentences testCp1252 = b'copyright \xa9 trademark \x99 registered \xae ReportLab! Ol\xe9!' testUni = testCp1252.decode('cp1252') testUTF8 = testUni.encode('utf-8') # expected result is octal-escaped text in the PDF expectedCp1252 = escapePDF(testCp1252) def extractText(pdfOps): """Utility to rip out the PDF text within a block of PDF operators. PDF will show a string draw as something like "(Hello World) Tj" i.e. text is in curved brackets. Crude and dirty, probably fails on escaped brackets. """ found = textPat.findall(pdfOps) #chop off '(' and ')' return [x[1:-1] for x in found] def subsetToUnicode(ttf, subsetCodeStr): """Return unicode string represented by given subsetCode string as found when TrueType font rendered to PDF, ttf must be the font