Пример #1
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_R:
         # reference to indirect object
         try:
             ((_, objid), (_, genno)) = self.pop(2)
             (objid, genno) = (int(objid), int(genno))
             obj = PDFObjRef(self.doc, objid, genno)
             self.push((pos, obj))
         except PSSyntaxError:
             pass
         return
     # others
     self.push((pos, token))
     return
Пример #2
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_R:
         # reference to indirect object
         try:
             ((_, objid), (_, genno)) = self.pop(2)
             (objid, genno) = (int(objid), int(genno))
             obj = PDFObjRef(self.doc, objid, genno)
             self.push((pos, obj))
         except PSSyntaxError:
             pass
         return
     elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ):
         if STRICT:
             # See PDF Spec 3.4.6: Only the object values are stored in the
             # stream; the obj and endobj keywords are not used.
             raise PDFSyntaxError("Keyword endobj found in stream")
         return
     # others
     self.push((pos, token))
     return
Пример #3
0
    def do_keyword(self, pos, token):
        """Handles PDF-related keywords."""
        
        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
            self.add_results(*self.pop(1))
        
        elif token is self.KEYWORD_ENDOBJ:
            self.add_results(*self.pop(4))

        elif token is self.KEYWORD_NULL:
            # null object
            self.push((pos, None))

        elif token is self.KEYWORD_R:
            # reference to indirect object
            try:
                ((_,objid), (_,genno)) = self.pop(2)
                (objid, genno) = (int(objid), int(genno))
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
            except PSSyntaxError:
                pass

        elif token is self.KEYWORD_STREAM:
            # stream object
            ((_,dic),) = self.pop(1)
            dic = dict_value(dic)
            objlen = 0
            if not self.fallback:
                try:
                    objlen = int_value(dic['Length'])
                except KeyError:
                    if STRICT:
                        raise PDFSyntaxError('/Length is undefined: %r' % dic)
            self.seek(pos)
            try:
                (_, line) = self.nextline()  # 'stream'
            except PSEOF:
                if STRICT:
                    raise PDFSyntaxError('Unexpected EOF')
                return
            pos += len(line)
            self.fp.seek(pos)
            data = self.fp.read(objlen)
            self.seek(pos+objlen)
            while 1:
                try:
                    (linepos, line) = self.nextline()
                except PSEOF:
                    if STRICT:
                        raise PDFSyntaxError('Unexpected EOF')
                    break
                if 'endstream' in line:
                    i = line.index('endstream')
                    objlen += i
                    data += line[:i]
                    break
                objlen += len(line)
                data += line
            self.seek(pos+objlen)
            # XXX limit objlen not to exceed object boundary
            if 2 <= self.debug:
                print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
                      (pos, objlen, dic, data[:10])
            obj = PDFStream(dic, data, self.doc.decipher)
            self.push((pos, obj))

        else:
            # others
            self.push((pos, token))
        
        return