def load(self, parser, debug=0): parser.seek(0) while 1: try: (pos, line) = parser.nextline() except PSEOF: break if line.startswith('trailer'): parser.seek(pos) self.load_trailer(parser) if 1 <= debug: print >> sys.stderr, 'trailer: %r' % self.get_trailer() break m = self.PDFOBJ_CUE.match(line) if not m: continue (objid, genno) = m.groups() objid = int(objid) genno = int(genno) self.offsets[objid] = (None, pos, genno) # expand ObjStm. parser.seek(pos) (_, obj) = parser.nextobject() if isinstance(obj, PDFStream) and obj.get('Type') is LITERAL_OBJSTM: stream = stream_value(obj) try: n = stream['N'] except KeyError: if STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser1 = PDFStreamParser(stream.get_data()) objs = [] try: while 1: (_, obj) = parser1.nextobject() objs.append(obj) except PSEOF: pass n = min(n, len(objs) // 2) for index in xrange(n): objid1 = objs[index * 2] self.offsets[objid1] = (objid, index, 0) return
def _get_objects(self, stream): if stream.get('Type') is not LITERAL_OBJSTM: if STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: n = stream['N'] except KeyError: if STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser = PDFStreamParser(stream.get_data()) parser.set_document(self) objs = [] try: while 1: (_, obj) = parser.nextobject() objs.append(obj) except PSEOF: pass return (objs, n)