Пример #1
0
    def testIsObjectFree(self):
        """
        Tests the ``PdfFileReader.isObjectFree()` method.
        """
        # TO-DO Find PDF files that feature free-entry lists. We are checking
        # isObjectFree() only against used items.
        inputFiles = (
            "jpeg.pdf",
            "Seige_of_Vicksburg_Sample_OCR.pdf",
            "SF424_page2.pdf",
        )

        for filename in inputFiles:
            filepath = join(self.localDataRoot, filename)
            r = PdfFileReader(join(TEST_DATA_ROOT, filename))
            expItems = self._parseXRefTable(filepath, (0, 1, 3))
            actualItems = list()

            for ref in r.objects(PdfFileReader.R_XTABLE, True):
                actualItems.append(
                    # This is where isObjectFree() gets invoked
                    (ref.idnum, ref.generation, r.isObjectFree(ref)))

            r.close()
            expItems = sorted(expItems)
            actualItems = sorted(actualItems)

            self.assertListEqual(expItems, actualItems)
Пример #2
0
    def testXRefTableObjects(self):
        """
        Ensures that after ``PdfFileReader._parsePdfFile()`` all the indirect
        references from the XRef-Table *only* have been loaded as expected.
        Objects from the free entries list are included as well in the test.

        This case tests the part of ``PdfFileReader.objects()`` responsible for
        generating the Cross-Reference Table entries too.
        """
        self.maxDiff = None
        inputFiles = ("jpeg.pdf", "Seige_of_Vicksburg_Sample_OCR.pdf",
                      "SF424_page2.pdf")

        for filename in inputFiles:
            filepath = join(TEST_DATA_ROOT, filename)
            xtablepath = join(self.localDataRoot, filename)
            r = PdfFileReader(filepath)
            # The two below are (id, gen, byte offset)-valued lists
            actualItems = list()
            expItems = list()

            for ref in r.objects(PdfFileReader.R_XTABLE, True):
                actualItems.append(
                    (ref.idnum, ref.generation,
                     r._xrefTable[ref.generation][ref.idnum][0]))

            r.close()
            # We artificially read the XRef Table entries that we know belong
            # to filepath, and store them into expItems.
            expItems = sorted(self._parseXRefTable(xtablepath, (0, 1, 2)))
            actualItems = sorted(actualItems)
            expItems = sorted(expItems)

            self.assertListEqual(expItems, actualItems,
                                 "Differences found in " + filename)
Пример #3
0
    def testXTableAgainstXStream(self):
        """
        In section 7.5.8.4 of ISO 32000, "Compatibility with Applications That
        Do Not Support Compressed Reference Streams", the standard describes a
        means of crafting PDF files designed for versions 1.5+ that can be
        opened nevertheless by readers that support older versions.

        This test case verifies that all the items hidden by the XRef Table in
        non-conforming readers are *all and exactly* loaded into the XRef
        Stream by readers that support PDF 1.5+.
        """
        self.maxDiff = None
        # TO-DO Possibly add a few other files to this test case
        inputFiles = ("GeoBase_NHNC1_Data_Model_UML_EN.pdf", )

        for filename in inputFiles:
            filepath = join(self.localDataRoot, filename)
            expItems = {
                e[0]: e[1:]
                for e in self._parseXRefTable(filepath, (0, 2, 3))
            }
            actualItems = list()
            r = PdfFileReader(join(TEST_DATA_ROOT, filename))

            for ref in r.objects(PdfFileReader.R_XSTREAM, True):
                actualItems.append(ref)

            r.close()
            actualItems = sorted(actualItems, key=lambda e: e.idnum)
            expKeys = sorted(expItems.keys())
            actualKeys = list(map(lambda e: e.idnum, actualItems))

            self.assertListEqual(expKeys, actualKeys,
                                 "Lists of item IDs are not identical")

            for e, a in zip(expKeys, actualItems):
                self.assertEqual(e, a.idnum, "Items ID does not correspond")

                # If an item is in use in the XRef Stream, ensure then that it
                # is marked free in the XRef Table.
                if r._xrefStm[a.idnum][0] in (2, ):
                    self.assertTrue(
                        expItems[e][-1],
                        "Item %d should be hid by the XRef Table, but it was "
                        "not." % e,
                    )
Пример #4
0
    def testXRefStreamObjects(self):
        """
        Like ``PdfReaderTestCases.testXRefTableObjects()``, except that it
        tests objects referenced by the Cross-Reference Stream.
        ``PdfFileReader.objects()`` second part (dealing with XStream objects)
        is invoked and implicitly tested.
        """
        inputFiles = ("crazyones.pdf", )

        for filename in inputFiles:
            filepath = join(self.localDataRoot, filename)
            r = PdfFileReader(join(TEST_DATA_ROOT, filename))
            # Two lists of tuples as explained by Table 18
            actualItems = list()
            expItems = list()

            with open(filepath, "r") as instream:
                for line in instream:
                    if not line or line.isspace() or line.startswith("%"):
                        continue

                    this_type, field2, field3 = (int(f) for f in line.split())
                    expItems.append((this_type, field2, field3))

            for item in r.objects(PdfFileReader.R_XSTREAM, True):
                priv8Item = r._xrefStm[item.idnum]

                if priv8Item[0] in {0, 1}:
                    self.assertEqual(priv8Item[2], item.generation)
                elif priv8Item[0] == 2:
                    self.assertEqual(item.generation, 0)

                actualItems.append(priv8Item)

            r.close()
            actualItems = sorted(actualItems)
            expItems = sorted(expItems)

            self.assertListEqual(
                expItems,
                actualItems,
                "Didn't correctly read the Cross-Reference Stream",
            )