def init_params(self): self.v = int_value(self.param.get('V', 0)) self.r = int_value(self.param['R']) self.p = int_value(self.param['P']) self.o = str_value(self.param['O']) self.u = str_value(self.param['U']) self.length = int_value(self.param.get('Length', 40))
def read_xref_from(self, start, xrefs): """Reads XRefs from the given location.""" self.seek(start) self.reset() try: (pos, token) = self.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') if 2 <= self.debug: print >>sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token) if isinstance(token, int): # XRefStream: PDF-1.5 self.seek(pos) self.reset() xref = PDFXRefStream() xref.load(self, debug=self.debug) else: if token is self.KEYWORD_XREF: self.nextline() xref = PDFXRef() xref.load(self, debug=self.debug) xrefs.append(xref) trailer = xref.get_trailer() if 1 <= self.debug: print >>sys.stderr, 'trailer: %r' % trailer if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(pos, xrefs) if 'Prev' in trailer: # find previous xref pos = int_value(trailer['Prev']) self.read_xref_from(pos, xrefs) return
def read_xref_from(self, parser, start, xrefs): """Reads XRefs from the given location.""" parser.seek(start) parser.reset() try: (pos, token) = parser.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') logging.info('read_xref_from: start=%d, token=%r' % (start, token)) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) parser.reset() xref = PDFXRefStream() xref.load(parser) else: if token is parser.KEYWORD_XREF: parser.nextline() xref = PDFXRef() xref.load(parser) xrefs.append(xref) trailer = xref.get_trailer() logging.info('trailer: %r' % trailer) if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(parser, pos, xrefs) if 'Prev' in trailer: # find previous xref pos = int_value(trailer['Prev']) self.read_xref_from(parser, pos, xrefs) return
def read_xref_from(self, parser, start, xrefs): """Reads XRefs from the given location.""" parser.seek(start) parser.reset() try: (pos, token) = parser.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') if 2 <= self.debug: print >> sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) parser.reset() xref = PDFXRefStream() xref.load(parser, debug=self.debug) else: if token is parser.KEYWORD_XREF: parser.nextline() xref = PDFXRef() xref.load(parser, debug=self.debug) xrefs.append(xref) trailer = xref.get_trailer() if 1 <= self.debug: print >> sys.stderr, 'trailer: %r' % trailer if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(parser, pos, xrefs) if 'Prev' in trailer: # find previous xref pos = int_value(trailer['Prev']) self.read_xref_from(parser, pos, xrefs) return
def initialize(self, password=''): """Perform the initialization with a given password. This step is mandatory even if there's no password associated with the document. """ if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True return (docid, param) = self.encryption if literal_name(param.get('Filter')) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password + self.PASSWORD_PADDING)[:32] # 1 md5hash = md5.md5(password) # 2 md5hash.update(O) # 3 md5hash.update(struct.pack('<l', P)) # 4 md5hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError('Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): md5hash = md5.md5(md5hash.digest()[:length / 8]) key = md5hash.digest()[:length / 8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(self.PASSWORD_PADDING) elif R == 3: # Algorithm 3.5 md5hash = md5.md5(self.PASSWORD_PADDING) # 2 md5hash.update(docid[0]) # 3 x = Arcfour(key).process(md5hash.digest()[:16]) # 4 for i in xrange(1, 19 + 1): k = ''.join(chr(ord(c) ^ i) for c in key) x = Arcfour(k).process(x) u1 = x + x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES
def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self._initialized = True return (docid, param) = self.encryption if literal_name(param['Filter']) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password + self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('<l', P)) # 4 hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError( 'Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): hash = md5.md5(hash.digest()[:length / 8]) key = hash.digest()[:length / 8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(self.PASSWORD_PADDING) elif R == 3: # Algorithm 3.5 hash = md5.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = Arcfour(key).process(hash.digest()[:16]) # 4 for i in xrange(1, 19 + 1): k = ''.join(chr(ord(c) ^ i) for c in key) x = Arcfour(k).process(x) u1 = x + x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES self._initialized = True return
def __init__(self, rsrcmgr, spec): firstchar = int_value(spec.get("FirstChar", 0)) lastchar = int_value(spec.get("LastChar", 0)) widths = list_value(spec.get("Widths", [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) if "FontDescriptor" in spec: descriptor = dict_value(spec["FontDescriptor"]) else: descriptor = {"Ascent": 0, "Descent": 0, "FontBBox": spec["FontBBox"]} PDFSimpleFont.__init__(self, descriptor, widths, spec) self.matrix = tuple(list_value(spec.get("FontMatrix"))) (_, self.descent, _, self.ascent) = self.bbox (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1)) return
def __init__(self, rsrcmgr, spec): firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 0)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) if 'FontDescriptor' in spec: descriptor = dict_value(spec['FontDescriptor']) else: descriptor = {'Ascent': 0, 'Descent': 0, 'FontBBox': spec['FontBBox']} PDFSimpleFont.__init__(self, descriptor, widths, spec) self.matrix = tuple(list_value(spec.get('FontMatrix'))) (_, self.descent, _, self.ascent) = self.bbox (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1))
def __init__(self, rsrcmgr, spec): firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 0)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths)) if 'FontDescriptor' in spec: descriptor = dict_value(spec['FontDescriptor']) else: descriptor = {'Ascent':0, 'Descent':0, 'FontBBox':spec['FontBBox']} PDFSimpleFont.__init__(self, descriptor, widths, spec) self.matrix = tuple(list_value(spec.get('FontMatrix'))) (_,self.descent,_,self.ascent) = self.bbox (self.hscale,self.vscale) = apply_matrix_norm(self.matrix, (1,1)) return
def __init__(self, doc, pageid, attrs): """Initialize a page object. doc: a PDFDocument object. pageid: any Python object that can uniquely identify the page. attrs: a dictionary of page attributes. """ self.doc = doc self.pageid = pageid self.attrs = dict_value(attrs) self.lastmod = resolve1(self.attrs.get('LastModified')) self.resources = resolve1(self.attrs['Resources']) self.mediabox = resolve1(self.attrs['MediaBox']) if 'CropBox' in self.attrs: self.cropbox = resolve1(self.attrs['CropBox']) else: self.cropbox = self.mediabox self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360 self.annots = self.attrs.get('Annots') self.beads = self.attrs.get('B') if 'Contents' in self.attrs: contents = resolve1(self.attrs['Contents']) else: contents = [] if not isinstance(contents, list): contents = [ contents ] self.contents = contents return
def __init__(self, doc, pageid, attrs): """Initialize a page object. doc: a PDFDocument object. pageid: any Python object that can uniquely identify the page. attrs: a dictionary of page attributes. """ self.doc = doc self.pageid = pageid self.attrs = dict_value(attrs) self.lastmod = resolve1(self.attrs.get('LastModified')) self.resources = resolve1(self.attrs['Resources']) self.mediabox = resolve1(self.attrs['MediaBox']) if 'CropBox' in self.attrs: self.cropbox = resolve1(self.attrs['CropBox']) else: self.cropbox = self.mediabox self.rotate = (int_value(self.attrs.get('Rotate', 0)) + 360) % 360 self.annots = self.attrs.get('Annots') self.beads = self.attrs.get('B') if 'Contents' in self.attrs: contents = resolve1(self.attrs['Contents']) else: contents = [] if not isinstance(contents, list): contents = [contents] self.contents = contents return
def do_keyword(self, pos, token): """Handles PDF-related keywords.""" if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): self.add_results(*self.pop(1)) elif token is self.KEYWORD_ENDOBJ: self.add_results(*self.pop(4)) elif token is self.KEYWORD_NULL: # null object self.push((pos, None)) elif token is self.KEYWORD_R: # reference to indirect object try: ((_, objid), (_, genno)) = self.pop(2) (objid, genno) = (int(objid), int(genno)) obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: pass elif token is self.KEYWORD_STREAM: # stream object ((_, dic),) = self.pop(1) dic = dict_value(dic) objlen = 0 if not self.fallback: try: objlen = int_value(dic['Length']) except KeyError: handle_error(PDFSyntaxError, '/Length is undefined: %r' % dic) self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: handle_error(PDFSyntaxError, 'Unexpected EOF') return pos += len(line) self.fp.seek(pos) data = self.fp.read(objlen) self.seek(pos + objlen) while 1: try: (linepos, line) = self.nextline() except PSEOF: handle_error(PDFSyntaxError, 'Unexpected EOF') break if 'endstream' in line: i = line.index('endstream') objlen += i data += line[:i] break objlen += len(line) data += line self.seek(pos+objlen) # XXX limit objlen not to exceed object boundary log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) else: # others self.push((pos, token))
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) ) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: handle_error(PDFFontError, 'BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i+firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) if 'Encoding' not in spec and 'FontFile' in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = stream_value(descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(StringIO(data)) self.cid2unicode = parser.get_encoding()
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get('FontName', 'unknown')) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0)) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0))) self.hscale = self.vscale = .001
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec["BaseFont"]) except KeyError: if STRICT: raise PDFFontError("BaseFont is missing") self.basefont = "unknown" try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get("FontDescriptor", {})) firstchar = int_value(spec.get("FirstChar", 0)) lastchar = int_value(spec.get("LastChar", 255)) widths = list_value(spec.get("Widths", [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) if "Encoding" not in spec and "FontFile" in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = stream_value(descriptor.get("FontFile")) length1 = int_value(self.fontfile["Length1"]) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(StringIO(data)) self.cid2unicode = parser.get_encoding() return
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) ) PDFSimpleFont.__init__(self, descriptor, widths, spec) if 'Encoding' not in spec and 'FontFile' in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = stream_value(descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(StringIO(data)) self.cid2unicode = parser.get_encoding() return
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get('FontName', 'unknown')) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0)) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) self.hscale = self.vscale = .001 return
def _initialize_password(self, password=''): (docid, param) = self.encryption if literal_name(param.get('Filter')) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) v = int_value(param.get('V', 0)) factory = self.security_handler_registry.get(v) if factory is None: raise PDFEncryptionError('Unknown algorithm: param=%r' % param) handler = factory(docid, param, password) self.decipher = handler.decrypt self.is_printable = handler.is_printable() self.is_modifiable = handler.is_modifiable() self.is_extractable = handler.is_extractable() self._parser.fallback = False # need to read streams with exact length return
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get("FontName", "unknown")) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get("Flags", 0)) self.ascent = num_value(descriptor.get("Ascent", 0)) self.descent = num_value(descriptor.get("Descent", 0)) self.italic_angle = num_value(descriptor.get("ItalicAngle", 0)) self.default_width = default_width or num_value(descriptor.get("MissingWidth", 0)) self.leading = num_value(descriptor.get("Leading", 0)) self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0))) self.hscale = self.vscale = 0.001 return
def do_keyword(self, pos, token): """Handles PDF-related keywords.""" if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): self.add_results(*self.pop(1)) elif token is self.KEYWORD_ENDOBJ: self.add_results(*self.pop(4)) elif token is self.KEYWORD_NULL: # null object self.push((pos, None)) elif token is self.KEYWORD_R: # reference to indirect object try: ((_,objid), (_,genno)) = self.pop(2) (objid, genno) = (int(objid), int(genno)) obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: pass elif token is self.KEYWORD_STREAM: # stream object ((_,dic),) = self.pop(1) dic = dict_value(dic) objlen = 0 if not self.fallback: try: objlen = int_value(dic['Length']) except KeyError: if STRICT: raise PDFSyntaxError('/Length is undefined: %r' % dic) self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') return pos += len(line) self.fp.seek(pos) data = self.fp.read(objlen) self.seek(pos+objlen) while 1: try: (linepos, line) = self.nextline() except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') break if 'endstream' in line: i = line.index('endstream') objlen += i data += line[:i] break objlen += len(line) data += line self.seek(pos+objlen) # XXX limit objlen not to exceed object boundary if 2 <= self.debug: print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ (pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) else: # others self.push((pos, token)) return
def do_keyword(self, pos, token): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): self.add_results(*self.pop(1)) return if token is self.KEYWORD_ENDOBJ: self.add_results(*self.pop(4)) return if token is self.KEYWORD_R: # reference to indirect object try: ((_,objid), (_,genno)) = self.pop(2) (objid, genno) = (int(objid), int(genno)) obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: pass return if token is self.KEYWORD_STREAM: # stream object ((_,dic),) = self.pop(1) dic = dict_value(dic) try: objlen = int_value(dic['Length']) except KeyError: if STRICT: raise PDFSyntaxError('/Length is undefined: %r' % dic) objlen = 0 self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') return pos += len(line) self.fp.seek(pos) data = self.fp.read(objlen) self.seek(pos+objlen) while 1: try: (linepos, line) = self.nextline() except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') break if 'endstream' in line: i = line.index('endstream') objlen += i data += line[:i] break objlen += len(line) data += line self.seek(pos+objlen) if 1 <= self.debug: print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ (pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) return # others self.push((pos, token)) return
def __init__(self, doc, pageid, attrs): """Initialize a page object. doc: a PDFDocument object. pageid: any Python object that can uniquely identify the page. attrs: a dictionary of page attributes. """ self.doc = doc self.pageid = pageid self.attrs = dict_value(attrs) self.lastmod = resolve1(self.attrs.get('LastModified')) self.resources = resolve1(self.attrs['Resources']) self.mediabox = resolve1(self.attrs['MediaBox']) if 'CropBox' in self.attrs: self.cropbox = resolve1(self.attrs['CropBox']) else: self.cropbox = self.mediabox self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360 self.annots = list_value(self.attrs.get('Annots')) self.widgets = [] def get_widget_type(obj): if 'FT' in obj: if obj.get('FT') is LITERAL_TX: return 'text' elif obj.get('FT') is LITERAL_CH or obj.get('FT') is LITERAL_BTN: return 'checkbox' elif obj.get('FT') is LITERAL_SIG: return 'signature' else: return None def create_widget(obj): if 'Subtype' in obj and obj.get('Subtype') is LITERAL_WIDGET: wtype = get_widget_type(obj) if wtype: return LTWidget(list_value(obj.get('Rect')), wtype, str_value(obj.get('T'))) elif 'Parent' in obj: p = resolve1(obj.get('Parent')) return LTWidget(list_value(obj.get('Rect')), get_widget_type(p), str_value(p.get('T'))) def find_widgets(obj_list, widgets): for an in obj_list: try: obj = resolve1(an) except PDFObjectNotFound: print 'object not found' print an widgets.append(create_widget(obj)) find_widgets(self.annots, self.widgets) self.beads = self.attrs.get('B') if 'Contents' in self.attrs: contents = resolve1(self.attrs['Contents']) else: contents = [] if not isinstance(contents, list): contents = [contents] self.contents = contents return