def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True return (docid, param) = self.encryption if literal_name(param.get('Filter')) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password+self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('<l', P)) # 4 hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError('Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in range(50): hash = md5.md5(hash.digest()[:length/8]) key = hash.digest()[:length/8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(self.PASSWORD_PADDING) elif R == 3: # Algorithm 3.5 hash = md5.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = Arcfour(key).process(hash.digest()[:16]) # 4 for i in range(1,19+1): k = ''.join( chr(ord(c) ^ i) for c in key ) x = Arcfour(k).process(x) u1 = x+x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES return
def search(entry, level): entry = dict_value(entry) if 'Title' in entry: if 'A' in entry or 'Dest' in entry: title = decode_text(str_value(entry['Title'])) dest = entry.get('Dest') action = entry.get('A') se = entry.get('SE') yield (level, title, dest, action, se) if 'First' in entry and 'Last' in entry: for x in search(entry['First'], level+1): yield x if 'Next' in entry: for x in search(entry['Next'], level): yield x return