Пример #1
0
def getPdffileBookmark(filename, bookmark_file_savepath):
    pdf = PdfFileReader(open(filename, "rb"))

    pagecount = pdf.getNumPages()
    print('pagecount:', pagecount)

    pageLabels = {
    }  #真实页码的索引 indirectRef  “{'/Type': '/Fit', '/Page': IndirectObject(7871, 0), '/Title': '封面'}”
    for i in range(pagecount):
        page = pdf.getPage(i)
        pageLabels[page.indirectRef.idnum] = i + 1
        # print(page.indirectRef.idnum,i+1)

    bookmark_file = codecs.open(bookmark_file_savepath, 'w', encoding='utf-8')
    title = []
    pagedir = []
    bookmark_jibie = []
    outlines = pdf.getOutlines()
    print(outlines)
    index = 0
    jibie = 0
    for outline in outlines:
        index += 1
        jibie = 0
        print(len(outline), outline)
        if type(outline) == PyPDF2.generic.Destination:
            # print('dict--------')
            # print(list(outline.keys()))
            # for x,j in enumerate(list(outline.keys())):
            #     print(str(outline[j]))
            # print(outline['/Title'])
            # print(outline['/Type'])
            # print(outline.page.idnum)
            bookmark_file.write(outline['/Title'] + '\t' +
                                str(pageLabels[outline.page.idnum]) + '\r\n')
        if type(outline) == list:
            # print('list')
            jibie = 1
            for i, outline in enumerate(outline):
                if type(outline) == PyPDF2.generic.Destination:
                    bookmark_file.write('\t' * jibie + outline['/Title'] +
                                        '\t' +
                                        str(pageLabels[outline.page.idnum]) +
                                        '\r\n')
                elif type(outline) == list:
                    jibie = 2
                    for i, o in enumerate(outline):
                        if type(outline) == PyPDF2.generic.Destination:
                            bookmark_file.write(
                                '\t' * jibie + outline['/Title'] + '\t' +
                                str(pageLabels[outline.page.idnum]) + '\r\n')

        # print('\n')
        # if index>=3:
        #     break
    bookmark_file.close()