示例#1
0
    def testObjectIds(self):
        """
        Tests the ``ObjectStream.objectIds()`` method.
        """
        expResults = (
            (8, 3, 10, 2, 1, 11, 13, 15, 4, 19, 5, 20, 6, 21, 17),
            (644, 642, 646, 647, 648, 122, 119, 120, 121, 124, 179, 232, 327,
             467, 478, 519, 568, 573, 580, 586, 592, 598, 603, 611, 616, 623,
             629, 634),
        )
        # Files we know to have Object Streams within
        inputData = (
            # (filename, id, generation number)
            ("crazyones.pdf", 9, 0),
            ("GeoBase_NHNC1_Data_Model_UML_EN.pdf", 645, 0),
        )

        for o, d in zip(expResults, inputData):
            filepath = join(TESTS_DATA_ROOT, d[0])
            r = PdfFileReader(filepath)
            ref = IndirectObject(d[1], d[2], r)
            objStm = r.getObject(ref)

            r.close()

            self.assertIsInstance(objStm, ObjectStream)
            self.assertTupleEqual(tuple(o), tuple(objStm.objectIds))
示例#2
0
    def testDecodeStreamData(self):
        """ Stores PDF files infos and the coordinates of stream objects. We
            don't care if we need to open a new file stream for each obj.
            reference -- unit tests don't have to be efficient
        """
        this_dir = join(TEST_DATA_ROOT, self.testDecodeStreamData.__name__)
        filters = (
            # (filter type, filename, id, gen. number)
            (FlateCodec, "FlateDecode.pdf", 4, 0),
            (FlateCodec, "FlateDecode.pdf", 8, 0),
            (FlateCodec, "FlateDecode.pdf", 9, 0),
            # TO-DO No PDF files found with this type of encoding, get them.
            # (ASCIIHexCodec, "ASCIIHexDecode.pdf", ?, ?)
            (LZWCodec, "LZWDecode.pdf", 209, 0),
            (LZWCodec, "LZWDecode.pdf", 210, 0),
            (LZWCodec, "LZWDecode.pdf", 211, 0),
            (ASCII85Codec, "ASCII85Decode.pdf", 5, 0),
            (ASCII85Codec, "ASCII85Decode.pdf", 6, 0),
            (DCTCodec, "DCTDecode.pdf", 4, 0),
            # TO-DO No PDF files found with this type of encoding, get them.
            # (JPXCodec, "JPXDecode.pdf", ?, ?)
            (CCITTFaxCodec, "CCITTFaxDecode.pdf", 46, 0),
        )

        for f__ in filters:
            with open(join(this_dir, f__[1]), "rb") as infile:
                reader = PdfFileReader(infile)
                ref = IndirectObject(f__[2], f__[3], reader)
                stream = reader.getObject(ref)

                # Ensures that the PdfFileReader reads a stream object
                self.assertEqual(EncodedStreamObject, type(stream))

                # print("Running with %s!" % f[0].__name__)
                if f__[0] is CCITTFaxCodec:
                    self.assertEqual(
                        f__[0].decode(
                            stream._data,
                            stream.get("/DecodeParms"),
                            stream.get("/Height"),
                        ),
                        decodeStreamData(stream),
                    )
                else:
                    self.assertEqual(
                        f__[0].decode(stream._data, stream.get("/DecodeParms")),
                        decodeStreamData(stream),
                    )