def testFindEqclassesCircularReferences(self): pdf = pdfsizeopt.PdfData() # The Rs are needed in the trailer, otherwise objects would be discarded. pdf.trailer = pdfsizeopt.PdfObj( '0 0 obj<<4 0 R 5 0 R 9 0 R 10 0 R>>endobj') pdf.objs[4] = pdfsizeopt.PdfObj( '0 0 obj<</Parent 1 0 R/Type/Pages/Kids[9 0 R]/Count 1>>endobj') pdf.objs[5] = pdfsizeopt.PdfObj( '0 0 obj<</Parent 1 0 R/Type/Pages/Kids[10 0 R]/Count 1>>endobj') pdf.objs[9] = pdfsizeopt.PdfObj( '0 0 obj<</Type/Page/MediaBox[0 0 419 534]/CropBox[0 0 419 534]' '/Parent 4 0 R/Resources<</XObject<</S 2 0 R>>/ProcSet[/PDF/ImageB]>>' '/Contents 3 0 R>>endobj') pdf.objs[10] = pdfsizeopt.PdfObj( '10 0 obj<</Type/Page/MediaBox[0 0 419 534]/CropBox[0 0 419 534]' '/Parent 5 0 R/Resources<</XObject<</S 2 0 R>>/ProcSet[/PDF/ImageB]>>' '/Contents 3 0 R>>endobj') pdf.objs['trailer'] = pdf.trailer new_objs = pdfsizeopt.PdfData.FindEqclasses( pdf.objs, do_remove_unused=True, do_renumber=True) del pdf.objs['trailer'] for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual( {1: ('<</Parent null/Type/Pages/Kids[2 0 R]/Count 1>>', None), 2: ('<</Type/Page/MediaBox[0 0 419 534]/CropBox[0 0 419 534]' '/Parent 1 0 R/Resources<</XObject<</S null>>' '/ProcSet[/PDF/ImageB]>>/Contents null>>', None), 'trailer': ('<<1 0 R 1 0 R 2 0 R 2 0 R>>', None)}, new_objs)
def testFindEqclassesAllEquivalentAndUndefined(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj') pdf.objs[1] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 2 0 R /U 6 0 R>>endobj') pdf.objs[2] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 1 0 R /U 7 0 R>>endobj') pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R /U 8 0 R>>endobj') pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 3 0 R /U 9 0 R>>endobj') new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs) for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual({1: ('<</S(q)/P 1 0 R/U null>>', None)}, new_objs)
def testFindEqclassesAllEquivalent(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj') pdf.objs[5] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 6 0 R>>endobj') pdf.objs[6] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 5 0 R >>endobj') pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R >>endobj') pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 3 0 R >>endobj') new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs) for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual({3: ('<</S(q)/P 3 0 R>>', None)}, new_objs)
def testFindEqclassesString(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj( '0 0 obj<</A[3 0 R]>>endobj') pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</A()/B<>/C(:)/D<3a3A4>>>endobj') pdf.objs['trailer'] = pdf.trailer new_objs = pdfsizeopt.PdfData.FindEqclasses( pdf.objs, do_remove_unused=True, do_renumber=True) for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual( {'trailer': ('<</A[1 0 R]>>', None), 1: ('<</A()/B()/C(:)/D(::@)>>', None)}, new_objs)
def testFindEqclassesTwoGroupsByOrder(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj') pdf.objs[1] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 2 0 R>>endobj') pdf.objs[2] = pdfsizeopt.PdfObj('0 0 obj<</P 1 0 R/S(q)>>endobj') pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R>>endobj') pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</P 3 0 R /S<71>>>endobj') new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs) for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual( {1: ('<</S(q)/P 2 0 R>>', None), 2: ('<</P 1 0 R/S(q)>>', None)}, new_objs)
def testFindEqclassesAllDifferentBecauseOfStream(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj') pdf.objs[1] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 2 0 R>>endobj') pdf.objs[2] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 1 0 R >>endobj') pdf.objs[2].stream = 'foo' pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R >>endobj') pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 3 0 R >>endobj') pdf.objs[4].stream = 'fox' new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs) for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual( {1: ('<</S(q)/P 2 0 R>>', None), 2: ('<</S(q)/P 1 0 R>>', 'foo'), 3: ('<</S(q)/P 4 0 R>>', None), 4: ('<</S(q)/P 3 0 R>>', 'fox')}, new_objs)
def testFindEqclassesTwoGroupsWithTrailerRenumber(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj( '0 0 obj<</A[3 0 R 4 0 R 5 0 R 6 0 R 4 0 R]>>endobj') pdf.objs[5] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 6 0 R>>endobj') pdf.objs[6] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 5 0 R >>endobj') pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R >>endobj') pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 3 0 R >>endobj') pdf.objs[10] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj') pdf.objs[11] = pdfsizeopt.PdfObj('0 0 obj[10 0 R]endobj') pdf.objs[12] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj') pdf.objs[12].stream = 'blah' pdf.objs['trailer'] = pdf.trailer new_objs = pdfsizeopt.PdfData.FindEqclasses( pdf.objs, do_remove_unused=True, do_renumber=True) del pdf.objs['trailer'] for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual( {'trailer': ('<</A[2 0 R 1 0 R 2 0 R 1 0 R 1 0 R]>>', None), 2: ('<</S(q)/P 1 0 R>>', None), 1: ('<</S(q)/Q 2 0 R>>', None)}, new_objs)
def testFindEqclassesTwoGroupsWithTrailer(self): pdf = pdfsizeopt.PdfData() pdf.trailer = pdfsizeopt.PdfObj( '0 0 obj<</A[3 0 R 4 0 R 5 0 R 6 0 R 3 0 R]>>endobj') pdf.objs[5] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 6 0 R>>endobj') pdf.objs[6] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 5 0 R >>endobj') pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R >>endobj') pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 3 0 R >>endobj') pdf.objs[10] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj') pdf.objs[11] = pdfsizeopt.PdfObj('0 0 obj[10 0 R]endobj') pdf.objs[12] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj') pdf.objs[12].stream = 'blah' pdf.objs['trailer'] = pdf.trailer new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs) del pdf.objs['trailer'] for obj_num in new_objs: new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream) self.assertEqual( {'trailer': ('<</A[3 0 R 4 0 R 3 0 R 4 0 R 3 0 R]>>', None), 10: ('[10 0 R]', None), 12: ('[10 0 R]', 'blah'), 3: ('<</S(q)/P 4 0 R>>', None), 4: ('<</S(q)/Q 3 0 R>>', None)}, new_objs)