def wordSplit(word, maxWidths, fontName, fontSize, encoding='utf8'): """Attempts to break a word which lacks spaces into two parts, the first of which fits in the remaining space. It is allowed to add hyphens or whatever it wishes. This is intended as a wrapper for some language- and user-choice-specific splitting algorithms. It should only be called after line breaking on spaces, which covers western languages and is highly optimised already. It works on the 'last unsplit word'. Presumably with further study one could write a Unicode splitting algorithm for text fragments whick was much faster. Courier characters should be 6 points wide. >>> wordSplit('HelloWorld', 30, 'Courier', 10) [[0.0, 'Hello'], [0.0, 'World']] >>> wordSplit('HelloWorld', 31, 'Courier', 10) [[1.0, 'Hello'], [1.0, 'World']] """ if not isUnicode(word): uword = word.decode(encoding) else: uword = word charWidths = getCharWidths(uword, fontName, fontSize) lines = dumbSplit(uword, charWidths, maxWidths) if not isUnicode(word): lines2 = [] #convert back for (extraSpace, text) in lines: lines2.append([extraSpace, text.encode(encoding)]) lines = lines2 return lines
def reset(self): """restore the cipher to it's start state""" # Initialize private key, k With the values of the key mod 256. # and sbox With numbers 0 - 255. Then compute sbox key = self._key if isUnicode(key): key = key.encode("utf8") sbox = list(range(256)) k = list(range(256)) lk = len(key) if isPy3: for i in sbox: k[i] = key[i % lk] % 256 else: for i in sbox: k[i] = ord(key[i % lk]) % 256 # Re-order sbox using the private key, k. # Iterating each element of sbox re-calculate the counter j # Then interchange the elements sbox[a] & sbox[b] j = 0 for i in range(256): j = (j + sbox[i] + k[i]) % 256 sbox[i], sbox[j] = sbox[j], sbox[i] self._sbox, self._i, self._j = sbox, 0, 0
def testUtf8FileName(self): fn=outputfile('test_pdfbase_utf8_filename') if not isUnicode(fn): fn = fn.decode('utf8') fn += u'_portr\xe4t.pdf' c = Canvas(fn) c.drawString(100,700, u'Filename='+fn) c.save()
def process(datafile, notes=0, handout=0, printout=0, cols=0, verbose=0, outDir=None, datafilename=None, fx=1): "Process one PythonPoint source file." if not hasattr(datafile, "read"): if not datafilename: datafilename = datafile datafile = open(datafile,'rb') else: if not datafilename: datafilename = "PseudoFile" rawdata = datafile.read() if not isUnicode(rawdata): encs = ['utf8','iso-8859-1'] m=_re_match(r'^\s*(<\?xml[^>]*\?>)',rawdata) if m: m1=_re_match(r"""^.*\sencoding\s*=\s*("[^"]*"|'[^']*')""",m.group(1)) if m1: enc = m1.group(1)[1:-1] if enc: if enc in encs: encs.remove(enc) encs.insert(0,enc) for enc in encs: try: udata = rawdata.decode(enc) break except: pass else: raise ValueError('cannot decode input data') else: udata = rawdata rawdata = udata #if pyRXP present, use it to check and get line numbers for errors... validate(rawdata) return _process(rawdata, datafilename, notes, handout, printout, cols, verbose, outDir, fx)
def pygments2xpre(s, language="python"): "Return markup suitable for XPreformatted" try: from pygments import highlight from pygments.formatters import HtmlFormatter except ImportError: return s from pygments.lexers import get_lexer_by_name rconv = lambda x: x if isPy3: out = getStringIO() else: if isUnicode(s): s = asBytes(s) rconv = asUnicode out = getBytesIO() l = get_lexer_by_name(language) h = HtmlFormatter() highlight(s,l,h,out) styles = [(cls, style.split(';')[0].split(':')[1].strip()) for cls, (style, ttype, level) in h.class2style.items() if cls and style and style.startswith('color:')] return rconv(_2xpre(out.getvalue(),styles))
def instanceStringWidthTTF(self, text, size, encoding='utf-8'): "Calculate text width" if not isUnicode(text): text = text.decode(encoding or 'utf-8') g = self.face.charWidths.get dw = self.face.defaultWidth return 0.001 * size * sum([g(ord(u), dw) for u in text])
def instanceStringWidthT1(self, text, size, encoding='utf8'): """This is the "purist" approach to width""" if not isUnicode(text): text = text.decode(encoding) return sum([ sum(map(f.widths.__getitem__, list(map(ord, t)))) for f, t in unicode2T1(text, [self] + self.substitutionFonts) ]) * 0.001 * size
def instanceStringWidthTTF(self, text, size, encoding='utf-8'): "Calculate text width" if not isUnicode(text): text = text.decode(encoding or 'utf-8') g = self.face.charWidths.get dw = self.face.defaultWidth return 0.001*size*sum([g(ord(u),dw) for u in text])
def testUtf8FileName(self): fn = outputfile('test_pdfbase_utf8_filename') if not isUnicode(fn): fn = fn.decode('utf8') fn += u'_portr\xe4t.pdf' c = Canvas(fn) c.drawString(100, 700, u'Filename=' + fn) c.save()
def unicode2T1(utext,fonts): '''return a list of (font,string) pairs representing the unicode text''' R = [] font, fonts = fonts[0], fonts[1:] enc = font.encName if 'UCS-2' in enc: enc = 'UTF16' while utext: try: if isUnicode(utext): s = utext.encode(enc) else: s = utext R.append((font,s)) break except UnicodeEncodeError as e: i0, il = e.args[2:4] if i0: R.append((font,utext[:i0].encode(enc))) if fonts: R.extend(unicode2T1(utext[i0:il],fonts)) else: R.append((font._notdefFont,font._notdefChar*(il-i0))) utext = utext[il:] return R
def _issueT1String(self,fontObj,x,y,s): fc = fontObj code_append = self.code_append fontSize = self._fontSize fontsUsed = self._fontsUsed escape = self._escape if not isUnicode(s): try: s = s.decode('utf8') except UnicodeDecodeError as e: i,j = e.args[2:4] raise UnicodeDecodeError(*(e.args[:4]+('%s\n%s-->%s<--%s' % (e.args[4],s[i-10:i],s[i:j],s[j:j+10]),))) for f, t in unicode2T1(s,[fontObj]+fontObj.substitutionFonts): if f!=fc: psName = asNative(f.face.name) code_append('(%s) findfont %s scalefont setfont' % (psName,fp_str(fontSize))) if psName not in fontsUsed: fontsUsed.append(psName) fc = f code_append('%s m (%s) show ' % (fp_str(x,y),escape(t))) x += f.stringWidth(t.decode(f.encName),fontSize) if fontObj!=fc: self._font = None self.setFont(fontObj.face.name,fontSize)
def _issueT1String(self, fontObj, x, y, s): fc = fontObj code_append = self.code_append fontSize = self._fontSize fontsUsed = self._fontsUsed escape = self._escape if not isUnicode(s): try: s = s.decode('utf8') except UnicodeDecodeError as e: i, j = e.args[2:4] raise UnicodeDecodeError( *(e.args[:4] + ('%s\n%s-->%s<--%s' % (e.args[4], s[i - 10:i], s[i:j], s[j:j + 10]), ))) for f, t in unicode2T1(s, [fontObj] + fontObj.substitutionFonts): if f != fc: psName = asNative(f.face.name) code_append('(%s) findfont %s scalefont setfont' % (psName, fp_str(fontSize))) if psName not in fontsUsed: fontsUsed.append(psName) fc = f code_append('%s m (%s) show ' % (fp_str(x, y), escape(t))) x += f.stringWidth(t.decode(f.encName), fontSize) if fontObj != fc: self._font = None self.setFont(fontObj.face.name, fontSize)
def unicode2T1(utext, fonts): '''return a list of (font,string) pairs representing the unicode text''' R = [] font, fonts = fonts[0], fonts[1:] enc = font.encName if 'UCS-2' in enc: enc = 'UTF16' while utext: try: if isUnicode(utext): s = utext.encode(enc) else: s = utext R.append((font, s)) break except UnicodeEncodeError as e: i0, il = e.args[2:4] if i0: R.append((font, utext[:i0].encode(enc))) if fonts: R.extend(unicode2T1(utext[i0:il], fonts)) else: R.append((font._notdefFont, font._notdefChar * (il - i0))) utext = utext[il:] return R
def writeXML(tree): "Convert to a string. No auto-indenting provided yet" if isUnicode(tree): return tree else: (tagName, attrs, children, spare) = tree chunks = [] chunks.append(u'<%s ' % tree)
def resolve(self, text, enc='utf8'): self._output = [] self.reset() if not isUnicode(text): text = text.decode(enc) else: enc = None self.feed(nakedAmpFix(text).replace(u'<br/>',u'<br />')) v = u''.join(self._output) return v.encode(enc) if enc else v
def resolve(self, text, enc='utf8'): self._output = [] self.reset() if not isUnicode(text): text = text.decode(enc) else: enc = None self.feed(nakedAmpFix(text).replace(u'<br/>', u'<br />')) v = u''.join(self._output) return v.encode(enc) if enc else v
def asUnicode(self, markup): """convert to unicode""" #TODO if not isUnicode(markup): try: markup = markup.decode('utf8', 'strict') except UnicodeDecodeError: #assume windows encoding markup = markup.decode('cp1252', 'replace') return markup
def drawString(self, x, y, text, _fontInfo=None, text_anchor='left'): gs = self._gs gs_fontSize = gs.fontSize gs_fontName = gs.fontName if _fontInfo and _fontInfo != (gs_fontSize, gs_fontName): fontName, fontSize = _fontInfo _setFont(gs, fontName, fontSize) else: fontName = gs_fontName fontSize = gs_fontSize try: if text_anchor in ('end', 'middle', 'end'): textLen = stringWidth(text, fontName, fontSize) if text_anchor == 'end': x -= textLen elif text_anchor == 'middle': x -= textLen / 2. elif text_anchor == 'numeric': x -= numericXShift(text_anchor, text, textLen, fontName, fontSize) if self._backend == 'rlPyCairo': gs.drawString(x, y, text) else: font = getFont(fontName) if font._dynamicFont: gs.drawString(x, y, text) else: fc = font if not isUnicode(text): try: text = text.decode('utf8') except UnicodeDecodeError as e: i, j = e.args[2:4] raise UnicodeDecodeError( *(e.args[:4] + ('%s\n%s-->%s<--%s' % (e.args[4], text[i - 10:i], text[i:j], text[j:j + 10]), ))) FT = unicode2T1(text, [font] + font.substitutionFonts) n = len(FT) nm1 = n - 1 for i in range(n): f, t = FT[i] if f != fc: _setFont(gs, f.fontName, fontSize) fc = f gs.drawString(x, y, t) if i != nm1: x += f.stringWidth(t.decode(f.encName), fontSize) finally: gs.setFont(gs_fontName, gs_fontSize)
def _AsciiHexEncode(input): """Encodes input using ASCII-Hex coding. This is a verbose encoding used for binary data within a PDF file. One byte binary becomes two bytes of ASCII. Helper function used by images.""" if isUnicode(input): input = input.encode('utf-8') output = getBytesIO() output.write(binascii.b2a_hex(input)) output.write(b'>') return output.getvalue()
def write(self,u): if isBytes(u): try: u = u.decode('utf-8') except: et, ev, tb = sys.exc_info() ev = str(ev) del et, tb raise ValueError("String %r not encoded as 'utf-8'\nerror=%s" % (u,ev)) elif not isUnicode(u): raise ValueError("EncodedWriter.write(%s) argument should be 'utf-8' bytes or str" % ascii(u)) self.append(u)
def splitString(self, text, doc, encoding='utf-8'): """Splits text into a number of chunks, each of which belongs to a single subset. Returns a list of tuples (subset, string). Use subset numbers with getSubsetInternalName. Doc is needed for distinguishing subsets when building different documents at the same time.""" asciiReadable = self._asciiReadable try: state = self.state[doc] except KeyError: state = self.state[doc] = TTFont.State(asciiReadable) curSet = -1 cur = [] results = [] if not isUnicode(text): text = text.decode('utf-8') # encoding defaults to utf-8 assignments = state.assignments subsets = state.subsets for code in map(ord, text): if code in assignments: n = assignments[code] else: if state.frozen: raise pdfdoc.PDFError( "Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code)) n = state.nextCode if n & 0xFF == 32: # make code 32 always be a space character if n != 32: subsets[n >> 8].append(32) state.nextCode += 1 n = state.nextCode state.nextCode += 1 assignments[code] = n if n > 32: if not (n & 0xFF): subsets.append([]) subsets[n >> 8].append(code) else: subsets[0][n] = code if (n >> 8) != curSet: if cur: results.append( (curSet, bytes(cur) if isPy3 else ''.join(chr(c) for c in cur))) curSet = (n >> 8) cur = [] cur.append(n & 0xFF) if cur: results.append( (curSet, bytes(cur) if isPy3 else ''.join(chr(c) for c in cur))) return results
def _AsciiHexDecode(input): """Decodes input using ASCII-Hex coding. Not used except to provide a test of the inverse function.""" #strip out all whitespace if not isUnicode(input): input = input.decode('utf-8') stripped = ''.join(input.split()) assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream' stripped = stripped[:-1] #chop off terminator assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes' return ''.join([chr(int(stripped[i:i+2],16)) for i in range(0,len(stripped),2)])
def __init__(self, value='Hello World', **kw): self.value = isUnicodeOrQRList.normalize(value) for k, v in kw.items(): setattr(self, k, v) ec_level = getattr(qrencoder.QRErrorCorrectLevel, self.barLevel) self.__dict__['qr'] = qrencoder.QRCode(self.qrVersion, ec_level) if isUnicode(self.value): self.addData(self.value) elif self.value: for v in self.value: self.addData(v)
def normalizeName(self, name): if not isUnicode(name): for enc in ('utf8', 'latin1'): try: name = asUnicode(name, enc) break except: pass else: raise ValueError('Cannot normalize name %r' % name) r = name.strip().lower() nns = getattr(self, 'normalizeNameSpaces', None) if isStr(nns): r = nns.join(filter(None, r.split())) return r
def parse_catalog(filename): """Validate and parse XML. This will complain if invalid We fully parse the XML and turn into Python variables, so that any encoding issues are confronted here rather than in the template """ xml = open(filename).read() if isUnicode(xml): xml = xml.encode('utf8') #required for python 2.7 & >=3.3 p = pyRXPU.Parser() tree = p.parse(xml) tagTree = TagWrapper(tree) request_a_quote = [109, 110, 4121, 4122, 4123] # we now need to de-duplicate; the query returns multiple rows with different images # in them. if id is same, assume it's the same product. ids_seen = set() products = [] for prodTag in tagTree: id = int(str(prodTag.ProductId1)) #extract tag content if id in ids_seen: continue else: ids_seen.add(id) prod = Product() prod.id = id prod.modelNumber = int(str(prodTag.ModelNumber)) prod.archived = (str(prodTag.Archived) == 'true') prod.name = fix(prodTag.ModelName) prod.summary = fix(prodTag.Summary) prod.description = fix(prodTag.Description) #originally the images came from a remote site. We have stashed them in #the img/ subdirectory, so just chop off the final part of the path. #asNative required for python 2.7 & >=3.3 prod.image = os.path.split(asNative(fix( prodTag.ImageUrl)))[-1].replace(' ', '') if prod.modelNumber in request_a_quote: prod.price = "Call us on 01635 246830 for a quote" else: prod.price = '£' + str( prodTag.UnitCost)[0:len(str(prodTag.UnitCost)) - 2] if prod.archived: pass else: products.append(prod) products.sort(key=lambda x: x.modelNumber) return products
def splitString(self, text, doc, encoding='utf-8'): """Splits text into a number of chunks, each of which belongs to a single subset. Returns a list of tuples (subset, string). Use subset numbers with getSubsetInternalName. Doc is needed for distinguishing subsets when building different documents at the same time.""" asciiReadable = self._asciiReadable try: state = self.state[doc] except KeyError: state = self.state[doc] = TTFont.State(asciiReadable) curSet = -1 cur = [] results = [] if not isUnicode(text): text = text.decode('utf-8') # encoding defaults to utf-8 assignments = state.assignments subsets = state.subsets for code in map(ord,text): if code in assignments: n = assignments[code] else: if state.frozen: raise pdfdoc.PDFError("Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code)) n = state.nextCode if n&0xFF==32: # make code 32 always be a space character if n!=32: subsets[n >> 8].append(32) state.nextCode += 1 n = state.nextCode state.nextCode += 1 assignments[code] = n if n>32: if not(n&0xFF): subsets.append([]) subsets[n >> 8].append(code) else: subsets[0][n] = code if (n >> 8) != curSet: if cur: results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur))) curSet = (n >> 8) cur = [] cur.append(n & 0xFF) if cur: results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur))) return results
def reset(self): '''restore the cipher to it's start state''' #Initialize private key, k With the values of the key mod 256. #and sbox With numbers 0 - 255. Then compute sbox key = self._key if isUnicode(key): key = key.encode('utf8') sbox = list(range(256)) k = list(range(256)) lk = len(key) for i in sbox: k[i] = key[i % lk] % 256 #Re-order sbox using the private key, k. #Iterating each element of sbox re-calculate the counter j #Then interchange the elements sbox[a] & sbox[b] j = 0 for i in range(256): j = (j + sbox[i] + k[i]) % 256 sbox[i], sbox[j] = sbox[j], sbox[i] self._sbox, self._i, self._j = sbox, 0, 0
def drawString(self, x, y, text, _fontInfo=None): gs = self._gs if _fontInfo: fontName, fontSize = _fontInfo else: fontSize = gs.fontSize fontName = gs.fontName try: gfont = getFont(gs.fontName) except: gfont = None font = getFont(fontName) if font._dynamicFont: gs.drawString(x, y, text) else: fc = font if not isUnicode(text): try: text = text.decode("utf8") except UnicodeDecodeError as e: i, j = e.args[2:4] raise UnicodeDecodeError( *( e.args[:4] + ("%s\n%s-->%s<--%s" % (e.args[4], text[i - 10 : i], text[i:j], text[j : j + 10]),) ) ) FT = unicode2T1(text, [font] + font.substitutionFonts) n = len(FT) nm1 = n - 1 for i in range(n): f, t = FT[i] if f != fc: _setFont(gs, f.fontName, fontSize) fc = f gs.drawString(x, y, t) if i != nm1: x += f.stringWidth(t.decode(f.encName), fontSize) if font != fc: _setFont(gs, fontName, fontSize)
def drawString(self, x, y, text, _fontInfo=None): gs = self._gs if _fontInfo: fontName, fontSize = _fontInfo else: fontSize = gs.fontSize fontName = gs.fontName try: gfont = getFont(gs.fontName) except: gfont = None font = getFont(fontName) if font._dynamicFont: gs.drawString(x, y, text) else: fc = font if not isUnicode(text): try: text = text.decode('utf8') except UnicodeDecodeError as e: i, j = e.args[2:4] raise UnicodeDecodeError( *(e.args[:4] + ('%s\n%s-->%s<--%s' % (e.args[4], text[i - 10:i], text[i:j], text[j:j + 10]), ))) FT = unicode2T1(text, [font] + font.substitutionFonts) n = len(FT) nm1 = n - 1 for i in range(n): f, t = FT[i] if f != fc: _setFont(gs, f.fontName, fontSize) fc = f gs.drawString(x, y, t) if i != nm1: x += f.stringWidth(t.decode(f.encName), fontSize) if font != fc: _setFont(gs, fontName, fontSize)
def process(datafile, notes=0, handout=0, printout=0, cols=0, verbose=0, outDir=None, datafilename=None, fx=1): "Process one PythonPoint source file." if not hasattr(datafile, "read"): if not datafilename: datafilename = datafile datafile = open(datafile,'rb') else: if not datafilename: datafilename = "PseudoFile" rawdata = datafile.read() if not isUnicode(rawdata): encs = ['utf8','iso-8859-1'] m=_re_match(r'^\s*(<\?xml[^>]*\?>)',rawdata) if m: m1=_re_match(r"""^.*\sencoding\s*=\s*("[^"]*"|'[^']*')""",m.group(1)) if m1: enc = m1.group(1)[1:-1] if enc: if enc in encs: encs.remove(enc) encs.insert(0,enc) for enc in encs: try: udata = rawdata.decode(enc) break except: pass else: raise ValueError('cannot decode input data') else: udata = rawdata if isPy3: rawdata = udata else: rawdata = udata.encode('utf8') #if pyRXP present, use it to check and get line numbers for errors... validate(rawdata) return _process(rawdata, datafilename, notes, handout, printout, cols, verbose, outDir, fx)
def tt2xml(tt): '''convert tuple tree form to unicode xml''' if tt is None: return '' if isBytes(tt): return tt2xml(tt.decode('utf8')) if isUnicode(tt): return escape(tt) if isinstance(tt,list): return ''.join(tt2xml(x) for x in tt) if isinstance(tt,tuple): tag = tt[0].decode('utf8') L=['<'+tag].append C = tt[2] if tt[1]: for k,v in tt[1].items(): L((' %s=%s' % (k,quoteattr(v))).decode('utf8')) if C is not None: L('>') L(tt2xml(C)) L('</'+tag+'>') else: L('/>') return ''.join(L.__self__) raise ValueError('Invalid value %r passed to tt2xml' % tt)
def __getitem__(self, x): try: v = TagWrapper.__getitem__(self, x) return FakingStr(v) if isinstance(v, strTypes) else v except: return FakingStr(u'' if isUnicode(self.tagName) else '')
def dumbSplit(word, widths, maxWidths): """This function attempts to fit as many characters as possible into the available space, cutting "like a knife" between characters. This would do for Chinese. It returns a list of (text, extraSpace) items where text is a Unicode string, and extraSpace is the points of unused space available on the line. This is a structure which is fairly easy to display, and supports 'backtracking' approaches after the fact. Test cases assume each character is ten points wide... >>> dumbSplit(u'Hello', [10]*5, 60) [[10, u'Hello']] >>> dumbSplit(u'Hello', [10]*5, 50) [[0, u'Hello']] >>> dumbSplit(u'Hello', [10]*5, 40) [[0, u'Hell'], [30, u'o']] """ _more = """ #>>> dumbSplit(u'Hello', [10]*5, 4) # less than one character #(u'', u'Hello') # this says 'Nihongo wa muzukashii desu ne!' (Japanese is difficult isn't it?) in 12 characters >>> jtext = u'\u65e5\u672c\u8a9e\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01' >>> dumbSplit(jtext, [10]*11, 30) # (u'\u65e5\u672c\u8a9e', u'\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01') """ if not isinstance(maxWidths,(list,tuple)): maxWidths = [maxWidths] assert isUnicode(word) lines = [] i = widthUsed = lineStartPos = 0 maxWidth = maxWidths[0] nW = len(word) while i<nW: w = widths[i] c = word[i] widthUsed += w i += 1 if widthUsed > maxWidth + _FUZZ and widthUsed>0: extraSpace = maxWidth - widthUsed if ord(c)<0x3000: # we appear to be inside a non-Asian script section. # (this is a very crude test but quick to compute). # This is likely to be quite rare so the speed of the # code below is hopefully not a big issue. The main # situation requiring this is that a document title # with an english product name in it got cut. # we count back and look for # - a space-like character # - reversion to Kanji (which would be a good split point) # - in the worst case, roughly half way back along the line limitCheck = (lineStartPos+i)>>1 #(arbitrary taste issue) for j in range(i-1,limitCheck,-1): cj = word[j] if category(cj)=='Zs' or ord(cj)>=0x3000: k = j+1 if k<i: j = k+1 extraSpace += sum(widths[j:i]) w = widths[k] c = word[k] i = j break #end of English-within-Asian special case #we are pushing this character back, but #the most important of the Japanese typography rules #if this character cannot start a line, wrap it up to this line so it hangs #in the right margin. We won't do two or more though - that's unlikely and #would result in growing ugliness. #and increase the extra space #bug fix contributed by Alexander Vasilenko <*****@*****.**> if c not in ALL_CANNOT_START and i>lineStartPos+1: #otherwise we need to push the character back #the i>lineStart+1 condition ensures progress i -= 1 extraSpace += w #lines.append([maxWidth-sum(widths[lineStartPos:i]), word[lineStartPos:i].strip()]) lines.append([extraSpace, word[lineStartPos:i].strip()]) try: maxWidth = maxWidths[len(lines)] except IndexError: maxWidth = maxWidths[-1] # use the last one lineStartPos = i widthUsed = 0 #any characters left? if widthUsed > 0: lines.append([maxWidth - widthUsed, word[lineStartPos:]]) return lines
def _test(self, x): if isUnicode(x): return True if all(isinstance(v, qrencoder.QR) for v in x): return True return False
def _processLine(line, sep=',', conv=0): if isUnicode(line): space = u' ' dquot = u'"' empty = u'' speol = u' \r\n' sep = asUnicode(sep) else: space = b' ' dquot = b'"' empty = b'' speol = b' \r\n' sep = asBytes(sep) fields = [] p = 0 ll = len(line) ls = len(sep) line += space while (ll > 0 and (line[ll-1] in speol)): ll -= 1 while p < ll: #Skip unquoted space at the start of a field while p<ll and line[p]==space: p += 1 field = empty ql = 0 while p < ll: #Skip unquoted space at the end of a field if ql == 0 and line[p] == space: q = p while q < ll and line[q] == space: q += 1 if q >= ll: break elif line[q:q+ls] == sep: p = q if ql == 0 and line[p:p+ls] == sep: break elif line[p:p+1] == dquot: if ql == 0: ql = 1 elif line[p+1:p+2]==dquot: field += dquot p += 1 else: ql = 0 else: field += line[p:p+1] p += 1 p += ls if conv: try: fields.append(int(field)) except ValueError: try: fields.append(float(field)) except ValueError: fields.append(field) else: fields.append(field) if line[ll-ls:ll]==sep: fields.append(empty) #extra field when there's a separator at the end return fields
def asciiBase85Encode(input): """Encodes input using ASCII-Base85 coding. This is a compact encoding used for binary data within a PDF file. Four bytes of binary data become five bytes of ASCII. This is the default method used for encoding images.""" doOrd = not isPy3 or isUnicode(input) # special rules apply if not a multiple of four bytes. whole_word_count, remainder_size = divmod(len(input), 4) cut = 4 * whole_word_count body, lastbit = input[0:cut], input[cut:] out = [].append for i in range(whole_word_count): offset = i * 4 b1 = body[offset] b2 = body[offset + 1] b3 = body[offset + 2] b4 = body[offset + 3] if doOrd: b1 = ord(b1) b2 = ord(b2) b3 = ord(b3) b4 = ord(b4) if b1 < 128: num = (((((b1 << 8) | b2) << 8) | b3) << 8) | b4 else: num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 if num == 0: #special case out('z') else: #solve for five base-85 numbers temp, c5 = divmod(num, 85) temp, c4 = divmod(temp, 85) temp, c3 = divmod(temp, 85) c1, c2 = divmod(temp, 85) assert ((85**4) * c1) + ((85**3) * c2) + ( (85**2) * c3) + (85 * c4) + c5 == num, 'dodgy code!' out(chr(c1 + 33)) out(chr(c2 + 33)) out(chr(c3 + 33)) out(chr(c4 + 33)) out(chr(c5 + 33)) # now we do the final bit at the end. I repeated this separately as # the loop above is the time-critical part of a script, whereas this # happens only once at the end. #encode however many bytes we have as usual if remainder_size > 0: lastbit += (4 - len(lastbit)) * ('\0' if doOrd else b'\000') b1 = lastbit[0] b2 = lastbit[1] b3 = lastbit[2] b4 = lastbit[3] if doOrd: b1 = ord(b1) b2 = ord(b2) b3 = ord(b3) b4 = ord(b4) num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 #solve for c1..c5 temp, c5 = divmod(num, 85) temp, c4 = divmod(temp, 85) temp, c3 = divmod(temp, 85) c1, c2 = divmod(temp, 85) #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( # b1,b2,b3,b4,num,c1,c2,c3,c4,c5) lastword = chr(c1 + 33) + chr(c2 + 33) + chr(c3 + 33) + chr( c4 + 33) + chr(c5 + 33) #write out most of the bytes. out(lastword[0:remainder_size + 1]) #terminator code for ascii 85 out('~>') return ''.join(out.__self__)
def instanceStringWidthT1(self, text, size, encoding='utf8'): """This is the "purist" approach to width""" if not isUnicode(text): text = text.decode(encoding) return sum([sum(map(f.widths.__getitem__,t)) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size
def preProcess(tree, nameSpace, caller=None): """Expands the parsed tree in the namespace and return new one. Returns a single tag-tuple in most cases, but a list of them if processing a loop node. """ from reportPackages.rlextra.radxml import xmlutils #expand this into a class with methods for each tag it handles. #then I can put logic tags in one and data access in another. tagName, attrs, children, extraStuff = tree #any attribute as $name becomes th value of name #tags might be nested in a loop, and if so then #each dictionary must be a fresh copy rather than # a pointer to the same dict newAttrs = attrs.copy() if attrs is not None else {} for key, value in list(newAttrs.items()): if isinstance(value,str) and value[0:1] == '$': newValue = eval(value[1:], nameSpace) newAttrs[key] = newValue attrs = newAttrs if tagName in TAG_LOOP: innerTxt = attrs[TAG_LOOP_INNER] outer = eval(attrs[TAG_LOOP_OUTER], nameSpace) dataSet = [] for row in outer: nameSpace['__loop_inner__'] = row rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace) #at this point we're making lots of child nodes. # the attribute dictionary of each shold be a copy, not # a reference newChildren = processChildren(children, nameSpace) if newChildren is not None: dataSet.extend(newChildren) return dataSet elif tagName in TAG_ASSIGN: name = attrs[TAG_ASSIGN_NAME] valueStr = attrs[TAG_ASSIGN_VALUE] try: value = eval(valueStr, nameSpace) except SyntaxError: #must be a string value = valueStr nameSpace[name] = value return None elif tagName in TAG_SCRIPT: code = children[0] if not code.endswith('\n'): code += '\n' try: rl_exec(code, nameSpace) except SyntaxError: raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code) return None elif tagName in TAG_EXPR: exprText = children[0] assert isinstance(exprText,strTypes), "expr can only contain strings" #attributes may affect escaping escape = attrs.get(u'escape', None) encoding = attrs.get(u'encoding',u'utf8') exprValue = eval(exprText, nameSpace) if isBytes(exprValue): exprValue = exprValue.decode(encoding) elif isUnicode(exprValue): pass else: exprValue = asUnicodeEx(exprValue) if escape in (u'CDATA',u'CDATAESCAPE'): exprValue = u'<![CDATA[%s]]>' % exprValue if escape==u'CDATA': return [exprValue] elif escape == u'off': return [asUnicodeEx(exprValue)] elif escape == u'unescape': return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)] return [xmlEscape(exprValue)] elif tagName in TAG_IF: condText = attrs[u'cond'] yesOrNo = eval(condText, nameSpace) if yesOrNo: return processChildren(children, nameSpace) elif tagName in TAG_SWITCH: #two modes, with and without top level variable exprText = attrs.get(u'expr',u'') if exprText: expr = eval(exprText, nameSpace) selected = None for child in children: if isinstance(child,tuple): (childTagName, childAttrs, grandChildren, stuff) = child if childTagName in TAG_CASE: condition = childAttrs[u'condition'] if exprText: #check if it equals the value try: value = eval(condition, nameSpace) except NameError: value = condition # assume a string if (expr == value): selected = processChildren(grandChildren, nameSpace) break else: #they gave us a full condition, evaluate it yes = eval(condition, nameSpace) if yes: selected = processChildren(grandChildren, nameSpace) break elif childTagName in TAG_DEFAULT: selected = processChildren(grandChildren, nameSpace) break else: raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT))) return selected elif tagName in TAG_ACQUIRE: #all children will be data fetchers xacquire.acquireData(children, nameSpace) return None elif tagName in TAG_DOCLET: #pull out args needed to initialize dirName = attrs.get(u"baseDir", None) moduleName = attrs[u"module"] className = attrs[u"class"] dataStr = attrs.get(u"data", None) #load module, import and create it if caller == 'rml': from reportPackages.rlextra.rml2pdf.rml2pdf import _rml2pdf_locations locations = _rml2pdf_locations(dirName) else: locations = dirName m = recursiveImport(moduleName, locations) klass = getattr(m, className) docletObj = klass() #give it the data model if dataStr: dataObj = eval(dataStr, nameSpace) else: dataObj = nameSpace docletObj.setData(dataObj) #hide it in the tree so RML can see the object attrs[u'__doclet__'] = docletObj #return the tag otherwise unmodified return (tagName, attrs, children, extraStuff) else: newChildren = processChildren(children, nameSpace) return (tagName, attrs, newChildren, extraStuff)
def asciiBase85Decode(input): """Decodes input using ASCII-Base85 coding. This is not normally used - Acrobat Reader decodes for you - but a round trip is essential for testing.""" #strip all whitespace stripped = ''.join(asNative(input).split()) #check end assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' stripped = stripped[:-2] #chop off terminator #may have 'z' in it which complicates matters - expand them stripped = stripped.replace('z','!!!!!') # special rules apply if not a multiple of five bytes. whole_word_count, remainder_size = divmod(len(stripped), 5) #print '%d words, %d leftover' % (whole_word_count, remainder_size) #assert remainder_size != 1, 'invalid Ascii 85 stream!' cut = 5 * whole_word_count body, lastbit = stripped[0:cut], stripped[cut:] out = [].append for i in range(whole_word_count): offset = i*5 c1 = ord(body[offset]) - 33 c2 = ord(body[offset+1]) - 33 c3 = ord(body[offset+2]) - 33 c4 = ord(body[offset+3]) - 33 c5 = ord(body[offset+4]) - 33 num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 temp, b4 = divmod(num,256) temp, b3 = divmod(temp,256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' out(chr(b1)) out(chr(b2)) out(chr(b3)) out(chr(b4)) #decode however many bytes we have as usual if remainder_size > 0: while len(lastbit) < 5: lastbit = lastbit + '!' c1 = ord(lastbit[0]) - 33 c2 = ord(lastbit[1]) - 33 c3 = ord(lastbit[2]) - 33 c4 = ord(lastbit[3]) - 33 c5 = ord(lastbit[4]) - 33 num = (((85*c1+c2)*85+c3)*85+c4)*85 + (c5 +(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size]) temp, b4 = divmod(num,256) temp, b3 = divmod(temp,256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) #the last character needs 1 adding; the encoding loses #data by rounding the number to x bytes, and when #divided repeatedly we get one less if remainder_size == 2: lastword = chr(b1) elif remainder_size == 3: lastword = chr(b1) + chr(b2) elif remainder_size == 4: lastword = chr(b1) + chr(b2) + chr(b3) else: lastword = '' out(lastword) r = ''.join(out.__self__) return r.encode('latin1') if isUnicode(input) else r
def __getitem__(self, x): try: v = TagWrapper.__getitem__(self,x) return FakingStr(v) if isinstance(v,strTypes) else v except: return FakingStr(u'' if isUnicode(self.tagName) else '')
def asciiBase85Encode(input): """Encodes input using ASCII-Base85 coding. This is a compact encoding used for binary data within a PDF file. Four bytes of binary data become five bytes of ASCII. This is the default method used for encoding images.""" doOrd = not isPy3 or isUnicode(input) # special rules apply if not a multiple of four bytes. whole_word_count, remainder_size = divmod(len(input), 4) cut = 4 * whole_word_count body, lastbit = input[0:cut], input[cut:] out = [].append for i in range(whole_word_count): offset = i*4 b1 = body[offset] b2 = body[offset+1] b3 = body[offset+2] b4 = body[offset+3] if doOrd: b1 = ord(b1) b2 = ord(b2) b3 = ord(b3) b4 = ord(b4) if b1<128: num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4 else: num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 if num == 0: #special case out('z') else: #solve for five base-85 numbers temp, c5 = divmod(num, 85) temp, c4 = divmod(temp, 85) temp, c3 = divmod(temp, 85) c1, c2 = divmod(temp, 85) assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!' out(chr(c1+33)) out(chr(c2+33)) out(chr(c3+33)) out(chr(c4+33)) out(chr(c5+33)) # now we do the final bit at the end. I repeated this separately as # the loop above is the time-critical part of a script, whereas this # happens only once at the end. #encode however many bytes we have as usual if remainder_size > 0: lastbit += (4-len(lastbit))*('\0' if doOrd else b'\000') b1 = lastbit[0] b2 = lastbit[1] b3 = lastbit[2] b4 = lastbit[3] if doOrd: b1 = ord(b1) b2 = ord(b2) b3 = ord(b3) b4 = ord(b4) num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 #solve for c1..c5 temp, c5 = divmod(num, 85) temp, c4 = divmod(temp, 85) temp, c3 = divmod(temp, 85) c1, c2 = divmod(temp, 85) #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( # b1,b2,b3,b4,num,c1,c2,c3,c4,c5) lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33) #write out most of the bytes. out(lastword[0:remainder_size + 1]) #terminator code for ascii 85 out('~>') return ''.join(out.__self__)
def drawImage(self, image, filename, x, y, width=None, height=None, mask=None, preserveAspectRatio=False, anchor='c'): self._currentPageHasImages = 1 #imagename, use it s = '%s%s' % (filename, mask) if isUnicode(s): s = s.encode('utf-8') name = _digester(s) # in the pdf document, this will be prefixed with something to # say it is an XObject. Does it exist yet? regName = self._doc.getXObjectName(name) imgObj = self._doc.idToObject.get(regName, None) if not imgObj: #first time seen, create and register the PDFImageXobject imgObj = pdfdoc.PDFImageXObject(name, mask=mask) ext = os.path.splitext(filename)[1].lower() if not (ext in ('.jpg', '.jpeg') and imgObj.loadImageFromJPEG(image)): if rl_config.useA85: imgObj.loadImageFromA85(image) else: imgObj.loadImageFromRaw(image) imgObj.name = name self._setXObjects(imgObj) self._doc.Reference(imgObj, regName) self._doc.addForm(name, imgObj) smask = getattr(imgObj, '_smask', None) if smask: #set up the softmask obtained above mRegName = self._doc.getXObjectName(smask.name) mImgObj = self._doc.idToObject.get(mRegName, None) if not mImgObj: self._setXObjects(smask) imgObj.smask = self._doc.Reference(smask, mRegName) else: imgObj.smask = pdfdoc.PDFObjectReference(mRegName) del imgObj._smask # ensure we have a size, as PDF will make it 1x1 pixel otherwise! x, y, width, height, scaled = aspectRatioFix(preserveAspectRatio, anchor, x, y, width, height, imgObj.width, imgObj.height) # scale and draw self.saveState() self.translate(x, y) self.scale(width, height) self._code.append("/%s Do" % regName) self.restoreState() # track what's been used on this page self._formsinuse.append(name) return (imgObj.width, imgObj.height)
def preProcess(tree, nameSpace, caller=None): """Expands the parsed tree in the namespace and return new one. Returns a single tag-tuple in most cases, but a list of them if processing a loop node. """ from rlextra.radxml import xmlutils #expand this into a class with methods for each tag it handles. #then I can put logic tags in one and data access in another. tagName, attrs, children, extraStuff = tree #any attribute as $name becomes th value of name #tags might be nested in a loop, and if so then #each dictionary must be a fresh copy rather than # a pointer to the same dict newAttrs = attrs.copy() if attrs is not None else {} for key, value in list(newAttrs.items()): if isinstance(value,str) and value[0:1] == '$': newValue = eval(value[1:], nameSpace) newAttrs[key] = newValue attrs = newAttrs if tagName in TAG_LOOP: innerTxt = attrs[TAG_LOOP_INNER] outer = eval(attrs[TAG_LOOP_OUTER], nameSpace) dataSet = [] for row in outer: nameSpace['__loop_inner__'] = row rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace) #at this point we're making lots of child nodes. # the attribute dictionary of each shold be a copy, not # a reference newChildren = processChildren(children, nameSpace) if newChildren is not None: dataSet.extend(newChildren) return dataSet elif tagName in TAG_ASSIGN: name = attrs[TAG_ASSIGN_NAME] valueStr = attrs[TAG_ASSIGN_VALUE] try: value = eval(valueStr) except SyntaxError: #must be a string value = valueStr nameSpace[name] = value return None elif tagName in TAG_SCRIPT: code = children[0] if not code.endswith('\n'): code += '\n' try: rl_exec(code, nameSpace) except SyntaxError: raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code) return None elif tagName in TAG_EXPR: exprText = children[0] assert isinstance(exprText,strTypes), "expr can only contain strings" #attributes may affect escaping escape = attrs.get(u'escape', None) encoding = attrs.get(u'encoding',u'utf8') exprValue = eval(exprText, nameSpace) if isBytes(exprValue): exprValue = exprValue.decode(encoding) elif isUnicode(exprValue): pass else: exprValue = asUnicodeEx(exprValue) if escape in (u'CDATA',u'CDATAESCAPE'): exprValue = u'<![CDATA[%s]]>' % exprValue if escape==u'CDATA': return [exprValue] elif escape == u'off': return [asUnicodeEx(exprValue)] elif escape == u'unescape': return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)] return [xmlEscape(exprValue)] elif tagName in TAG_IF: condText = attrs[u'cond'] yesOrNo = eval(condText, nameSpace) if yesOrNo: return processChildren(children, nameSpace) elif tagName in TAG_SWITCH: #two modes, with and without top level variable exprText = attrs.get(u'expr',u'') if exprText: expr = eval(exprText, nameSpace) selected = None for child in children: if isinstance(child,tuple): (childTagName, childAttrs, grandChildren, stuff) = child if childTagName in TAG_CASE: condition = childAttrs[u'condition'] if exprText: #check if it equals the value try: value = eval(condition, nameSpace) except NameError: value = condition # assume a string if (expr == value): selected = processChildren(grandChildren, nameSpace) break else: #they gave us a full condition, evaluate it yes = eval(condition, nameSpace) if yes: selected = processChildren(grandChildren, nameSpace) break elif childTagName in TAG_DEFAULT: selected = processChildren(grandChildren, nameSpace) break else: raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT))) return selected elif tagName in TAG_ACQUIRE: #all children will be data fetchers xacquire.acquireData(children, nameSpace) return None elif tagName in TAG_DOCLET: #pull out args needed to initialize dirName = attrs.get(u"baseDir", None) moduleName = attrs[u"module"] className = attrs[u"class"] dataStr = attrs.get(u"data", None) #load module, import and create it if caller == 'rml': from rlextra.rml2pdf.rml2pdf import _rml2pdf_locations locations = _rml2pdf_locations(dirName) else: locations = dirName m = recursiveImport(moduleName, locations) klass = getattr(m, className) docletObj = klass() #give it the data model if dataStr: dataObj = eval(dataStr, nameSpace) else: dataObj = nameSpace docletObj.setData(dataObj) #hide it in the tree so RML can see the object attrs[u'__doclet__'] = docletObj #return the tag otherwise unmodified return (tagName, attrs, children, extraStuff) else: newChildren = processChildren(children, nameSpace) return (tagName, attrs, newChildren, extraStuff)