def do_BDC(self, tag, props): #print "BDC" super(TagInterpreter, self).do_BDC(tag, props) if self.getValue(props, "MCID") != None: # zawartosc oznaczona zawierajaca MCID #print "BDC, MCID =", self.getValue(props, "MCID") bdc = MarkedContent(props.get("MCID"), tag, self.__page, self.__pagination > 0 or literal_name(self.getValue(props, "Type")) == "Pagination", #literal_name(tag) == "Artifact", self.__ind, [self.__aktfont, self.__aktfontsize]) #literal_name(tag) == "Artifact", [self.__aktfont, self.__aktfontsize]) self.__artifact > 0 or literal_name(tag) == "Artifact", [self.__aktfont, self.__aktfontsize]) # od razu na poczatku dodalismy do zawartosci ostatni font (bo zawartosc moze nie # miec na poczatku fontu tylko od razu tekst (korzysta wtedy z fontu zdefiniowanego # przed zawartoscia (czyli wlasnie tego ostatniego ktory do niej dodalismy)) self.__mc = True bdc.initialized = True self.__bdcs.append(bdc) self.__stack.append("MCID") elif literal_name(tag) == "Artifact" and literal_name(self.getValue(props, "Type")) == "Pagination" and not self.__pagination: # dana zawartosc oznaczona jest pagina (poniewaz zawartosci moga byc zagniezdzone # to w niej moze byc np. zawartosc z MCIDem) i wtedy ona bedzie traktowana jako # pagina (bo znajduje sie we wiekszej zawartosci bedacej pagina, co poznamy po # polu self.__pagination)) self.__pagination += 1 self.__stack.append("Pagination") elif literal_name(tag) == "Artifact": # j.w. z tym ze zawartosc jest artefaktem nie pagina self.__artifact += 1 self.__stack.append("Artifact") else: # inna zawartosc oznaczona self.__stack.append("BDC") #print ":", self.stack, self.bdcs return
def get_colorspace(spec): if isinstance(spec, list): name = literal_name(spec[0]) else: name = literal_name(spec) if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, stream_value(spec[1]).dic['N']) elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE[name]
def do_Do(self, xobjid): # the base of this function is basically copy-pasted from ancestor; unfortunately, I found no better solution xobjid = literal_name(xobjid) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if STRICT: raise PDFInterpreterError("Undefined xobject id: %r" % xobjid) return if self.debug: logging.info("Processing xobj: %r" % xobj) subtype = xobj.get("Subtype") if subtype is LITERAL_FORM and "BBox" in xobj: interpreter = self.dup() interpreter.is_first_level_call = None bbox = list_value(xobj["BBox"]) matrix = list_value(xobj.get("Matrix", MATRIX_IDENTITY)) # According to PDF reference 1.7 section 4.9.1, XObjects in # earlier PDFs (prior to v1.2) use the page's Resources entry # instead of having their own Resources entry. resources = dict_value(xobj.get("Resources")) or self.resources.copy() self.device.begin_figure(xobjid, bbox, matrix) interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm)) self.device.end_figure(xobjid) # for (k,v) in interpreter.text_lines.iteritems(): # self.text_sequences[k + self.keyword_count] = v self.keyword_count += interpreter.keyword_count print "Included %i keywords" % interpreter.keyword_count else: # ignored xobject type. pass return
def do_Do(self, xobjid): xobjid = literal_name(xobjid) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return if 1 <= self.debug: print >>stderr, 'Processing xobj: %r' % xobj subtype = xobj.dic.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj.dic: interpreter = self.dup() bbox = list_value(xobj.dic['BBox']) matrix = list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY)) self.device.begin_figure(xobjid, bbox, matrix) interpreter.render_contents(dict_value(xobj.dic.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm)) self.device.end_figure(xobjid) elif subtype is LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic: self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY) (w,h) = (xobj.dic['Width'], xobj.dic['Height']) self.device.render_image(xobj, (w,h)) self.device.end_figure(xobjid) else: # unsupported xobject type. pass return
def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self.ready = True return (docid, param) = self.encryption if literal_name(param['Filter']) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password + self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('<l', P)) # 4 hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError( 'Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): hash = md5.md5(hash.digest()[:length / 8]) key = hash.digest()[:length / 8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(password) elif R == 3: # Algorithm 3.5 hash = md5.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = Arcfour(key).process(hash.digest()[:16]) # 4 for i in xrange(1, 19 + 1): k = ''.join(chr(ord(c) ^ i) for c in key) x = Arcfour(k).process(x) u1 = x + x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES self.ready = True return
def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self.ready = True return (docid, param) = self.encryption if literal_name(param['Filter']) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password+self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('<l', P)) # 4 hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError('Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): hash = md5.md5(hash.digest()[:length/8]) key = hash.digest()[:length/8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(password) elif R == 3: # Algorithm 3.5 hash = md5.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = Arcfour(key).process(hash.digest()[:16]) # 4 for i in xrange(1,19+1): k = ''.join( chr(ord(c) ^ i) for c in key ) x = Arcfour(k).process(x) u1 = x+x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES self.ready = True return
def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: if STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) return self.textstate.fontsize = fontsize return
def do_BMC(self, tag): super(TagInterpreter, self).do_BMC(tag) if literal_name(tag) == "Artifact": self.__stack.append("BMCArtifact") self.__artifact += 1 else: self.__stack.append("BMC") #print "::", self.stack, self.bdcs return
def do_Tf(self, fontid, fontsize): verbose_operator("PDF OPERATOR Tf: fontid=", fontid, ", fontsize=", fontsize) try: self.mpts.Tf = self.fontmap[literal_name(fontid)] verbose_operator("font=", self.mpts.Tf.fontname) self.mpts.Tfs = fontsize except KeyError: raise PDFInterpreterError('Undefined Font id: %r' % fontid)
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'), self.cidsysteminfo.get('Ordering', 'unknown')) try: name = literal_name(spec['Encoding']) except KeyError: if STRICT: raise PDFFontError('Encoding is unspecified') name = 'unknown' try: self.cmap = rsrc.get_cmap(name, strict=STRICT) except CMapDB.CMapNotFound, e: raise PDFFontError(e)
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if 'Encoding' in spec: encoding = resolve1(spec['Encoding']) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get('Differences', None)) self.encoding = EncodingDB.get_encoding(name, diff) else: self.encoding = EncodingDB.get_encoding(literal_name(encoding)) self.ucs2_cmap = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.ucs2_cmap = CMap() CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % (self.cidsysteminfo.get( 'Registry', 'unknown'), self.cidsysteminfo.get('Ordering', 'unknown')) try: name = literal_name(spec['Encoding']) except KeyError: if STRICT: raise PDFFontError('Encoding is unspecified') name = 'unknown' try: self.cmap = rsrc.get_cmap(name, strict=STRICT) except CMapDB.CMapNotFound, e: raise PDFFontError(e)
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = descriptor.get('FontName', 'unknown') if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.default_width = default_width or descriptor.get('MissingWidth', 0) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0))) self.hscale = self.vscale = .001 return
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = descriptor.get('FontName', 'unknown') if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.default_width = default_width or descriptor.get('MissingWidth', 0) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) self.hscale = self.vscale = .001 return
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if 'Encoding' in spec: encoding = resolve1(spec['Encoding']) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name( encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get('Differences', None)) self.encoding = EncodingDB.get_encoding(name, diff) else: self.encoding = EncodingDB.get_encoding(literal_name(encoding)) self.ucs2_cmap = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.ucs2_cmap = CMap() CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return
def get_font(self, objid, spec): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: if settings.STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: if settings.STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): # Type1 Font font = PDFType1Font(self, spec) elif subtype == 'TrueType': # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == 'Type3': # Type3 Font font = PDFType3Font(self, spec) elif subtype in ('CIDFontType0', 'CIDFontType2'): # CID Font - Ensure recursive object references have been resolved if type(spec['CIDSystemInfo']) is not PDFObjRef: for k in spec['CIDSystemInfo']: if type(spec['CIDSystemInfo'][k]) is PDFObjRef: spec['CIDSystemInfo'][k] = spec['CIDSystemInfo'][ k].resolve() font = PDFCIDFont(self, spec) elif subtype == 'Type0': # Type0 Font dfonts = list_value(spec['DescendantFonts']) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ('Encoding', 'ToUnicode'): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if settings.STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) if objid and self.caching: self._cached_fonts[objid] = font return font
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) ) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def load_fields_from_pdf(field, T=''): # Recursively load form fields form = field.get('Kids', None) t = field.get('T') if t is None: t = T else: # Add its father name t = T + '.' + t if T != '' else t if form and t: return [load_fields_from_pdf(resolve1(f), t) for f in form] else: # Some field types, like signatures, need extra resolving value = resolve1(field.get('AS')) if resolve1(field.get('AS')) is not None else resolve1(field.get('V')) # if output is PSLiteral type, transfer it into str type through "literal_name" function if isinstance(value, PSLiteral): return (t, literal_name(value)) else: return (t, resolve1(value))
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) (pos, data) = self.get_inline_data(pos+len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def get_font(self, objid, spec): if objid and objid in self.fonts: font = self.fonts[objid] else: if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: if STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): # Type1 Font font = PDFType1Font(self, spec) elif subtype == 'TrueType': # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == 'Type3': # Type3 Font font = PDFType3Font(self, spec) elif subtype in ('CIDFontType0', 'CIDFontType2'): # CID Font font = PDFCIDFont(self, spec) elif subtype == 'Type0': # Type0 Font dfonts = list_value(spec['DescendantFonts']) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ('Encoding', 'ToUnicode'): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid: self.fonts[objid] = font return font
def load_fields_from_pdf(field, T=''): # Recursively load form fields form = field.get('Kids', None) t = field.get('T') if t is None: t = T else: # Add its father name t = T + '.' + t if T != '' else t """ Following is to repeat fields that have "Kids", now is commented because 1. There could be multiple fileds who shared the same field name. 2. For buttons, the parents has "V" value already, don't need to dig in Kids. """ # if form and t: # return [load_fields_from_pdf(resolve1(f), t) for f in form] # else: # Some field types, like signatures, need extra resolving value = resolve1(field.get('AS')) if resolve1( field.get('AS')) is not None else resolve1(field.get('V')) # if output is PSLiteral type, transfer it into str type through "literal_name" function if isinstance(value, PSLiteral): return (t, literal_name(value)) else: return (t, resolve1(value))
def __initializePTree(self, doc): self.__ptree.label = "Document" i = 1 for p in doc.get_pages(): child = PTree() child.label = "Page " + str(i) self.__pagenos.setdefault(i, p.pageid) i += 1 child.data = p.pageid self.__ptree.children.append(child) child.parent = self.__ptree fonts = dict_value(p.resources.get("Font")) images = dict_value(p.resources.get("XObject")) #print images for (fontid, spec) in fonts.iteritems(): # TODO: I czy tu zawsze bedzie referencja? objid = spec.objid spec = dict_value(spec) child2 = PTree() child2.label = "Font " + str(fontid) child2.data = Font.new(spec, None, p.pageid, child2, gui=self.__gui, map=self.__map) #print spec assert (child2.data.name != None) child.children.append(child2) child2.parent = child maskMap = {} masks = [] def __isMask(spec): spec = stream_value(spec) if spec.get("ImageMask") == None: return False else: #print "else", num_value(spec.get("Mask")) return num_value(spec.get("ImageMask")) == 1 def __hasMask(spec): if stream_value(spec).get("Mask") == None: #print "false" return False elif stream_value2(stream_value(spec).get("Mask")) != None: #print "true" # TODO: NOTE pdfminer nie obsluguje genno maskMap.setdefault( stream_value(spec).get("Mask").objid, spec.objid) #print stream_value(spec).get("Mask").objid, spec.objid else: #print "else" return False for (objname, spec) in images.iteritems(): #print spec # TODO: I czy tu zawsze bedzie referencja? objid = spec.objid isMask = False if __isMask(spec): isMask = True spec = stream_value(spec) __hasMask(spec) if literal_name(spec.get("Subtype")) == "Image": #print objid child2 = PTree() child2.label = "Image " + str(objname) child2.data = (spec, i - 1, objid, 0) child.children.append( child2) # TODO: NOTE pdfminer nie wspiera genno child2.parent = child if isMask: masks.append(child2) for mask in masks: (a, b, c, d) = mask.data objid = maskMap.get(c) if objid != None: #print c, objid mask.data = (a, b, objid, d)
def get_font(self, objid, spec): font = PDFResourceManager.get_font(self, objid, spec) # Correct broken fond - either it has an Encoding or a Unicode_map for text extraction if literal_name(spec['Encoding']) == 'WinAnsiEncoding': font.unicode_map = None return font
def do_keyword(self, pos, token): name = token.name if name == 'begincmap': self.in_cmap = True self.popall() return elif name == 'endcmap': self.in_cmap = False return if not self.in_cmap: return # if name == 'def': try: ((_, k), (_, v)) = self.pop(2) self.cmap.attrs[literal_name(k)] = v except PSSyntaxError: pass return if name == 'usecmap': try: ((_, cmapname), ) = self.pop(1) self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass return if name == 'begincodespacerange': self.popall() return if name == 'endcodespacerange': self.popall() return if name == 'begincidrange': self.popall() return if name == 'endcidrange': objs = [obj for (_, obj) in self.popall()] for (s, e, cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or not isinstance(cid, int) or len(s) != len(e)): continue sprefix = s[:-4] eprefix = e[:-4] if sprefix != eprefix: continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) #assert s1 <= e1 for i in xrange(e1 - s1 + 1): x = sprefix + pack('>L', s1 + i)[-vlen:] self.cmap.register_code2cid(x, cid + i) return if name == 'begincidchar': self.popall() return if name == 'endcidchar': objs = [obj for (_, obj) in self.popall()] for (cid, code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.register_code2cid(code, nunpack(cid)) return if name == 'beginbfrange': self.popall() return if name == 'endbfrange': objs = [obj for (_, obj) in self.popall()] for (s, e, code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 if isinstance(code, list): for i in xrange(e1 - s1 + 1): self.cmap.register_cid2code(s1 + i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) for i in xrange(e1 - s1 + 1): x = prefix + pack('>L', base + i)[-vlen:] self.cmap.register_cid2code(s1 + i, x) return if name == 'beginbfchar': self.popall() return if name == 'endbfchar': objs = [obj for (_, obj) in self.popall()] for (cid, code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.register_cid2code(nunpack(cid), code) return if name == 'beginnotdefrange': self.popall() return if name == 'endnotdefrange': self.popall() return self.push((pos, token)) return
def do_ri(self, intent): PDFPageInterpreter.do_ri(self, intent) self.__validator.validateRenderingIntent(literal_name(intent), "Rendering intent specified with ri operation has value " + literal_name(intent) + ".")
def do_keyword(self, pos, token): name = token.name if name == 'begincmap': self.in_cmap = True self.popall() return elif name == 'endcmap': self.in_cmap = False return if not self.in_cmap: return # if name == 'def': try: ((_,k),(_,v)) = self.pop(2) self.cmap.attrs[literal_name(k)] = v except PSSyntaxError: pass return if name == 'usecmap': try: ((_,cmapname),) = self.pop(1) self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass return if name == 'begincodespacerange': self.popall() return if name == 'endcodespacerange': self.popall() return if name == 'begincidrange': self.popall() return if name == 'endcidrange': objs = [ obj for (_,obj) in self.popall() ] for (s,e,cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or not isinstance(cid, int) or len(s) != len(e)): continue sprefix = s[:-4] eprefix = e[:-4] if sprefix != eprefix: continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) #assert s1 <= e1 for i in xrange(e1-s1+1): x = sprefix+pack('>L',s1+i)[-vlen:] self.cmap.register_code2cid(x, cid+i) return if name == 'begincidchar': self.popall() return if name == 'endcidchar': objs = [ obj for (_,obj) in self.popall() ] for (cid,code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.register_code2cid(code, nunpack(cid)) return if name == 'beginbfrange': self.popall() return if name == 'endbfrange': objs = [ obj for (_,obj) in self.popall() ] for (s,e,code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 if isinstance(code, list): for i in xrange(e1-s1+1): self.cmap.register_cid2code(s1+i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) for i in xrange(e1-s1+1): x = prefix+pack('>L',base+i)[-vlen:] self.cmap.register_cid2code(s1+i, x) return if name == 'beginbfchar': self.popall() return if name == 'endbfchar': objs = [ obj for (_,obj) in self.popall() ] for (cid,code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.register_cid2code(nunpack(cid), code) return if name == 'beginnotdefrange': self.popall() return if name == 'endnotdefrange': self.popall() return self.push((pos, token)) return
def literal_name_none(x): if x == None: return x else: return literal_name(x)
def do_cs(self, name): self.ncs = self.csmap[literal_name(name)] return
def do_CS(self, name): self.scs = self.csmap[literal_name(name)] return
def getValue(self, props, key): try: return props.get(key) # slownik w contencie except AttributeError: # slownik w resource'ach dict = self.resources.get("Properties").get(literal_name(props)) return dict_value(dict).get(key)