Exemplo n.º 1
0
def check(args):
    document = doc.PdfDocument(args.pdffile)
    print '### CHECKING XREF INTEGRITY ###'
    print 'Looking for duplicated references ...'
    print 'Found', document.xref.getNumberOfDuplicatedOffsets(), \
        'entries pointing to the same offset'
    print 'Looking for duplicated streams ...'
    print 'found', document.xref.getNumberOfDuplicatedContents(), \
        'duplicated contents'
Exemplo n.º 2
0
def graph(args):
    document = doc.PdfDocument(args.pdffile)
    component_from = args.fromObject
    component_to = args.toObject

    graph = objectGraph.ObjectGraph(document)
    if component_from:
        print graph.getGraphComponent(int(component_from))
    elif component_to:
        print graph.getReversedGraphComponent(int(component_to))
    else:
        print graph.getFullGraph()
Exemplo n.º 3
0
def replace(args):
    objectnumber = int(args.objectnumber)
    input_filename = args.input
    output_filename = args.output
    replace_stream = args.stream

    input_file = open(input_filename, 'r')
    output_file = open(output_filename, 'w')
    content = input_file.read()
    document = doc.PdfDocument(args.pdffile)
    if replace_stream:
        document.xrefEntry(objectnumber).setStreamContent(
            document.filestream, content)
    else:
        document.xrefEntry(objectnumber).setContent(content)
    document.writeTo(output_file)
    output_file.close()
Exemplo n.º 4
0
def show(args):
    document = doc.PdfDocument(args.pdffile)

    if not args.objectnumber:
        return
    numbers = splitArg(args.objectnumber)
    showstream = args.stream
    for number in numbers:
        if showstream:
            try:
                content = document.fetchStream(number)
            except Exception:
                print "Object doesn't seem to be a stream"
                return
        else:
            content = document.fetchXref(number).content
        print content,
Exemplo n.º 5
0
def showxref(args):
    document = doc.PdfDocument(args.pdffile)
    print '### XREF ###'
    print str(document.xref)
Exemplo n.º 6
0
 def parse_pdf(self, f):
     self.pdfdoc = doc.PdfDocument(f,
                                   progress_cb=lambda status, **kwargs:
                                   (self.status_sync_var.set(status),
                                    self.progress_sync_var.set(kwargs[
                                        'read'] / kwargs['total'] * 100)))