def get_pdf_page_array(): pdf_pages = [] attrs = {"MediaBox": [0, 0, 400, 300]} for i in range(4): page = PDFPage(doc=None, pageid=i, attrs=attrs) rawdata = "Department of Homeland Security" pdf_stream = PDFStream(attrs, rawdata) pdf_stream.data = rawdata pdf_stream.set_objid(i, i) page.contents = [pdf_stream] pdf_pages.append(page) return pdf_pages
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) (pos, data) = self.get_inline_data(pos+len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def test_encoding_DLIdentV_as_stream(self): stream = PDFStream({'CMapName':'DLIdent-V'}, '') spec = {'Encoding': stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap)
def test_encoding_DLIdentH_as_PSLiteral_stream(self): stream = PDFStream({'CMapName':PSLiteral('DLIdent-H')}, '') spec = {'Encoding': stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap)
def test_cmapname_H(self): stream = PDFStream({'CMapName': PSLiteral('H')}, '') spec = {'Encoding': stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, CMap)
def test_cmapname_onebyteidentityH(self): stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '') spec = {'Encoding': stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMapByte)
def do_keyword(self, pos, token): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): self.add_results(*self.pop(1)) return if token is self.KEYWORD_ENDOBJ: self.add_results(*self.pop(4)) return if token is self.KEYWORD_R: # reference to indirect object try: ((_, objid), (_, genno)) = self.pop(2) (objid, genno) = (int(objid), int(genno)) obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: pass return if token is self.KEYWORD_STREAM: # stream object ((_, dic), ) = self.pop(1) dic = dict_value(dic) try: objlen = int_value(dic['Length']) except KeyError: if STRICT: raise PDFSyntaxError('/Length is undefined: %r' % dic) objlen = 0 self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') return pos += len(line) self.fp.seek(pos) data = self.fp.read(objlen) self.seek(pos + objlen) while 1: try: (linepos, line) = self.nextline() except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') break if 'endstream' in line: i = line.index('endstream') objlen += i data += line[:i] break objlen += len(line) data += line self.seek(pos + objlen) if 1 <= self.debug: print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ (pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) return # others self.push((pos, token)) return