Пример #1
0
 def saveAs(self, fname):
     opdf=PdfWriter()
     #print type(opdf.trailer), type(opdf.trailer.Info), type(opdf.trailer.Info.Author)
     opdf.addpages(self.pdf.pages)        
     opdf.trailer.Info=self.pdf.Info
     opdf.trailer.Root.Outlines=self.pdf.Root.Outlines
     opdf.write(fname)
Пример #2
0
def two_up(data):
    pdf = PdfReader(fdata=data)
    pages = PageMerge() + pdf.pages

    assert len(pages) == 2

    left, right = pages

    rotation = 270
    scale = 0.7071067811865476  # sqrt(0.5)

    x_increment = scale * pages.xobj_box[2]

    left.Rotate = rotation
    left.scale(scale)

    right.Rotate = rotation
    right.scale(scale)
    right.x = x_increment

    writer = PdfWriter()
    writer.addpage(pages.render())

    # retain and update metadata
    pdf.Info.Creator = 'modulo-nic.py %s' % __version__
    writer.trailer.Info = pdf.Info

    sys.stdout.write('Content-Type: application/x-pdf\n\n')
    writer.write(sys.stdout)
Пример #3
0
    def splitting(*varargs,filenameOut ="out"):

        if(len(varargs)<=1):
            raise IndexError("Errore: inserire almeno due file.")

        for file in varargs:
            if False == (isinstance(file,str)):
                raise ValueError("Errore: i file devono essere pdf")

        if False == (isinstance(filenameOut,str)):
                raise ValueError("Errore: il nome del file deve essere di tipo str")

        all = PdfWriter()
        numpage=float("inf")

        for file in varargs:
            reader = PdfReader(file)
            i=0
            for page in reader.pages:
                i=i+1
            if (numpage > i):
                 numpage=i

        for i in range(numpage):
            for filename in varargs:
                reader = PdfReader(filename)
                all.addPage(reader.getPage(i))
        if(filenameOut.endswith('.pdf') == False):
            filenameOut = filenameOut+'.pdf'

        all.write(filenameOut)
Пример #4
0
def splitting(filenameOut ="out",*varargs):

    for file in varargs:
        if False == (isinstance(file,str)):
            raise ValueError("Errore: i file devono essere pdf")

    if False == (isinstance(filenameOut,str)):
            raise ValueError("Errore: il nome del file deve essere di tipo str")


    all = PdfWriter()
    numpage=float("inf")

    for file in varargs:
        reader = PdfReader(file)
        i=0
        for page in reader.pages:
            i=i+1
        if (numpage > i):
             numpage=i

    for i in range(numpage):
        for filename in varargs:
            reader = PdfReader(filename)
            all.addPage(reader.getPage(i))


    all.write(filenameOut+".pdf")
Пример #5
0
def go(inpfn, outfn):
    reader = PdfReader(inpfn, decompress=False)
    page, = reader.pages
    writer = PdfWriter()
    writer.addpage(adjust(page))
    writer.trailer.Info = IndirectPdfDict(reader.Info)
    writer.write(outfn)
def combine_match_sheets(match_sheets):
    output_fn = os.path.join(match_sheet_dir, "combined_match_sheets.pdf")
    writer = PdfWriter()
    for match_sheet in match_sheets:
        writer.addpages(PdfReader(match_sheet).pages)

    writer.write(output_fn)
    return output_fn
Пример #7
0
def save_pdf(infile, outpages):
    trailer = PdfReader(infile)
    outfn = create_filename(infile)
    writer = PdfWriter()
    writer.addpages(outpages)
    writer.trailer.Info = trailer.Info
    writer.trailer.Info.Producer = "https://github.com/sgelb/impositioner"
    writer.write(outfn)
Пример #8
0
def main():

    parser = argparse.ArgumentParser(description="Strip ResearchGate additions from a PDF")
    parser.add_argument("infile", metavar="input-filename", type=str, nargs=1,
                        help="PDF file to process")
    parser.add_argument("outfile", metavar="output-filename", type=str, nargs=1,
                        help="name for processed output file")
    args = parser.parse_args()

    # This regular expression matches the form of the ResearchGate
    # underlinings in the content streams. We match against a truncated form
    # of the distinctive RGB triplet because it's not always given with
    # the same accuracy.
    # "0.3333333333 0.6941176471 0.9607843137"
    regex = re.compile(r"""(0\.33333[0-9]+ 0\.694117[0-9]+ 0\.960784[0-9]+ RG
\d+\.?\d* w
\d+\.?\d* \d+\.?\d* m
\d+\.?\d* \d+\.?\d* )l
S""")

    dict_pages = PdfReader(args.infile[0]).pages

    def fix_stream(contents):
        # Look for underlinings and make them invisible.
        if not hasattr(contents, "stream"):
            return
        s = contents.stream
        # We identify RG underlinings by their (hopefully unique)
        # RGB colour triplet.
        if s is not None and regex.search(s):
            # Minimal change: change the line draw commands to
            # moves, so no line is drawn. It would be more
            # satisfying to remove the stream entirely, but it's
            # simpler and safer to preserve the file structure
            # (in particular, the stream length) wherever possible.
            contents.stream = regex.sub("\\1m\nS", s)        

    for page in dict_pages:
        if "/Annots" in page:
            # Remove all annotations. This may of course cause some
            # collateral damage, but PDFs of articles don't usually have
            # annotations so probably this will just strip ResearchGate
            # links. If this becomes a problem, it should be easy to
            # identify RG annotations and remove only them.
            page.pop("/Annots")
        # There may be a stream in the Contents object and/or in its
        # children, so we check for both.
        fix_stream(page.Contents)
        for contents in page.Contents:
            fix_stream(contents)
    
    writer = PdfWriter()

    # Start at the second page to remove the ResearchGate cover sheet.
    for page in dict_pages[1:]:
        writer.addpage(page)
    writer.write(args.outfile[0])
Пример #9
0
def test_pdf(pdfname):
    outfn = os.path.join(outdir, hashlib.md5(pdfname).hexdigest() + '.pdf')
    print >> stderr, '             ->', outfn
    trailer = PdfReader(pdfname, decompress=False)
    try:
        trailer.Info.OriginalFileName = pdfname
    except AttributeError:
        trailer.OriginalFileName = pdfname
    writer = PdfWriter()
    writer.trailer = trailer
    writer.write(outfn)
Пример #10
0
def combine(inpfn, outfn, x, y, gap):
    # Read all pages from input file
    pages = PdfReader(inpfn).pages
    
    # Object to write output PDF
    writer = PdfWriter()

    while pages:
        writer.addpage(getPages(pages, x, y, gap))
    
    writer.write(outfn)
Пример #11
0
def writepdf():
    outfn = "pwat." + os.path.basename(pdf)
    trailer = PdfReader(pdf)
    trailer.Info.Creator = "NOT"
    trailer.Info.Author = "NOT"
    trailer.Info.Title = "NOT"
    trailer.Info.Producer = "NOT"
    trailer.Info.CreationDate = "6/6/6"
    trailer.Info.ModDate = "6/6/6"
    writer = PdfWriter()
    writer.trailer = trailer
    writer.write(outfn)
Пример #12
0
def makeOnePagers(filename='GPO-CONAN-REV-2014.pdf' ,path='pdf/'):
    infile = PdfReader(filename)
    pages = len(infile.pages)
    print(pages)
    for i in range(pages):
       p = infile.pages[i]
       if(p and len(p)>0):
           outfile = PdfWriter()
           outfile.addPage(p)
           try:
               outfile.write('pdf/pageindex-%s.pdf' % str(i))
           except:
               pass
           print(i)
Пример #13
0
 def get(self,id):
     inpfn = 'teste.pdf'
     ranges = [id]
     #
     assert ranges, "Expected at least one range"
     #
     ranges = ([int(y) for y in x.split('-')] for x in ranges)
     outfn = '%sfrag' % os.path.basename(inpfn)
     pages = PdfReader(inpfn).pages
     outdata = PdfWriter()
     #
     for onerange in ranges:
         onerange = (onerange + onerange[-1:])[:2]
         for pagenum in range(onerange[0], onerange[1]+1):
             outdata.addpage(pages[pagenum-1])
     outdata.write(outfn)
     #
     pdfout = base64.encodestring(open(outfn,"rb").read())
     #
     self.write('<iframe src="data:application/pdf;base64,'+pdfout+'" style="position:fixed; top:0px; left:0px; bottom:0px; right:0px; width:100%; height:100%; border:none; margin:0; padding:0; overflow:hidden; z-index:999999;"/>')
Пример #14
0
def merge(*varargs,merge_file):

    if(merge_file.endswith('.pdf')):
        merge_file = merge_file+".pdf"

    for x in varargs:
        if(isinstance(x,str) == False):
            raise Exception("Errore: Tutti i parametri devono essere stringhe.")

    writer = PdfWriter()
    files = []
    for x in varargs :
        if x.endswith('.pdf'):
            files.add(x)
        else:
            raise Exception("Errore tutti i parametri devono terminare con .pdf")
    for fname in sorted(files):
        writer.addpages(PdfReader(os.path.join('pdf_file', fname)).pages)

    writer.write("output.pdf")
Пример #15
0
    def consolidateAllSheets(self, subDir=None):
        """
        not sure if this is neccessary or maybe I can send multiple sheets to the browser
        """
        writer = PdfWriter()
        if subDir != None:
            directory = self.printdirectory+subDir
        else:
            directory = self.printdirectory

        files = [x for x in os.listdir(directory) if x.endswith('.pdf')]
        for fname in sorted(files):
          writer.addpages(PdfReader(os.path.join(directory, fname)).pages)

        writer.write(directory+"output.pdf")

        for x in os.listdir(directory):
            if x == 'output.pdf':
                continue
            else:
                os.remove(directory+x)
Пример #16
0
    def merge(*varargs, filenameOut='merge_file'):
        if (len(varargs) <=1):
            raise Exception('Errore: utilizzare almeno due file.')

        if(not  (isinstance(filenameOut,str)) ):
            raise Exception('Errore: filenameOut deve essere una stringa.')

        if(filenameOut.endswith('.pdf') == False):
            filenameOut = filenameOut + ".pdf"

        writer = PdfWriter()

        for fname in varargs:
            if(isinstance(fname,str) == False):
                raise ValueError("Errore: Tutti i parametri devono essere stringhe.")
            if not fname.endswith('.pdf'):
                raise Exception("Errore: tutti i parametri devono terminare con .pdf")

            reader = PdfReader(fname)
            writer.addpages(reader.pages)

        writer.write(filenameOut)
    page.AA = PdfDict()
    # You probably should just wrap each JS action with a try/catch,
    # because Chrome does no error reporting or even logging otherwise;
    # you just get a silent failure.
    page.AA.O = make_js_action("""
try {
  %s
} catch (e) {
  app.alert(e.message);
}
    """ % (script))

    page.Annots = PdfArray(annots)
    return page

if len(sys.argv) > 1:
    js_file = open(sys.argv[1], 'r')

    fields = []
    for line in js_file:
        if not line.startswith('/// '): break
        pieces = line.split()
        params = [pieces[1]] + [float(token) for token in pieces[2:]]
        fields.append(make_field(*params))

    js_file.seek(0)

    out = PdfWriter()
    out.addpage(make_page(fields, js_file.read()))
    out.write('result.pdf')
Пример #18
0
def render(source, *, progress_cb=lambda x: None):
    # Exports the self as a PDF document to disk

    # progress_cb will be called with a progress percentage between 0 and
    # 100.  This percentage calculation is split 50% for the rendering
    # of the lines and 50% merging with the base PDF file.  This callback
    # also provides an opportunity to abort the process. If the callback
    # raises an error, this function will take steps to abort gracefullly
    # and pass the error upwards.

    vector = True  # TODO: Different rendering styles
    source = sources.get_source(source)

    # If this is using a base PDF, the percentage is calculated
    # differently.
    uses_base_pdf = source.exists('{ID}.pdf')

    # Document metadata should already be loaded (from device)
    # ...

    # Generate page information
    # If a PDF file was uploaded, but never opened, there may not be
    # a .content file. So, just load a barebones one with a 'pages'
    # key of zero length, so it doesn't break the rest of the
    # process.
    pages = []
    if source.exists('{ID}.content'):
        with source.open('{ID}.content', 'r') as f:
            pages = json.load(f).get('pages', [])

    # Render each page as a pdf
    tmpfh = tempfile.TemporaryFile()
    pdf_canvas = canvas.Canvas(tmpfh, (PDFWIDTH, PDFHEIGHT))
    # TODO: check pageCompression

    # Don't load all the pages into memory, because large notebooks
    # about 500 pages could use up to 3 GB of RAM. Create them by
    # iteration so they get released by garbage collector.
    changed_pages = []
    annotations = []
    for i in range(0, len(pages)):
        page = document.DocumentPage(source, pages[i], i)
        if source.exists(page.rmpath):
            changed_pages.append(i)
        page.render_to_painter(pdf_canvas, vector)
        annotations.append(page.get_grouped_annotations())
        progress_cb((i + 1) / len(pages) * 50)
    pdf_canvas.save()
    tmpfh.seek(0)

    # This new PDF represents just the notebook. If there was a
    # parent PDF, merge it now.
    if uses_base_pdf and not changed_pages:
        # Since there is no stroke data, just return the PDF data
        progress_cb(100)

        log.info('exported pdf')
        return source.open('{ID}.pdf', 'rb')

    # PDF exists, stroke data exists, so mix them together.
    if uses_base_pdf:
        rmpdfr = PdfReader(tmpfh)
        basepdfr = PdfReader(source.open('{ID}.pdf', 'rb'))
    else:
        basepdfr = PdfReader(tmpfh)
        # Alias, which is used for annotations and layers.
        rmpdfr = basepdfr

    # If making a 'layered' PDF (with optional content groups,
    # OCGs), associate the annoatations with the layer.

    # This property list is put into the rmpdfr document, which
    # will not have any existing properties.
    ocgprop = IndirectPdfDict(OCGs=PdfArray(), D=PdfDict(Order=PdfArray()))

    for i in range(0, len(basepdfr.pages)):
        basepage = basepdfr.pages[i]
        rmpage = rmpdfr.pages[i]

        # Apply OCGs
        apply_ocg = False  #TODO configurable? bool(int(QSettings().value(
        #'pane/notebooks/export_pdf_ocg')))
        if apply_ocg:
            ocgorderinner = do_apply_ocg(basepage, rmpage, i, uses_base_pdf,
                                         ocgprop, annotations)
        else:
            ocgorderinner = None

        # Apply annotations to the rmpage. This must come after
        # applying OCGs, because the annotation may belong to
        # one of those groups.
        apply_annotations(rmpage, annotations[i], ocgorderinner)

        # If this is a normal notebook with highlighting,
        # just add the annotations and forget about the rest,
        # which are page geometry transformations.
        if uses_base_pdf:
            merge_pages(basepage, rmpage, i in changed_pages)

        progress_cb(((i + 1) / rmpdfr.numPages * 50) + 50)

    # Apply the OCG order. The basepdf may have already had OCGs
    # and so we must not overwrite them. NOTE: there are other
    # properties that ought to be carried over, but this is the
    # minimum required.
    if apply_ocg:
        if '/OCProperties' in basepdfr.Root:
            basepdfr.Root.OCProperties.OCGs += ocgprop.OCGs
            basepdfr.Root.OCProperties.D.Order += ocgprop.D.Order
        else:
            basepdfr.Root.OCProperties = ocgprop

    pdfw = PdfWriter()
    stream = tempfile.SpooledTemporaryFile(SPOOL_MAX)
    pdfw.write(stream, basepdfr)
    stream.seek(0)

    log.info('exported pdf')
    return stream
Пример #19
0
'''

import sys
import os

# import find_pdfrw
from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
rotate = sys.argv[2]
outfn = sys.argv[3]

rotate = int(rotate)
assert rotate % 90 == 0

# ranges = [[int(y) for y in x.split('-')] for x in ranges]
trailer = PdfReader(inpfn)
pages = trailer.pages

ranges = [[1, len(pages)]]

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0] - 1, onerange[1]):
        pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) +
                                 rotate) % 360

outdata = PdfWriter()
outdata.trailer = trailer
outdata.write(outfn)
Пример #20
0
def debug(event, context):

    # Get Source PDF to watermark
    filename = "sample.pdf"
    existing_pdf = PdfReader(open(filename, "rb"))

    # Get Dimensions of document to make corresponding sized watermark
    mbox = existing_pdf.pages[0].MediaBox
    mediabox = tuple(float(x) for x in mbox)

    with io.BytesIO() as packet:
        height = 40
        width = mediabox[2]
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet)
        can.setPageSize((width, height))

        # Get Copyright content
        copyrightContent = getCopyrightContent()

        # Stylesheet additions
        stylesheet = getSampleStyleSheet()
        style_watermark = stylesheet["Normal"]
        style_watermark.alignment = TA_CENTER
        style_watermark.textColor = colors.Color(0, 0, 0, alpha=0.5)
        style_watermark.fontSize = 8
        style_watermark.font = 'Helvetica'
        # Creating Paragraph
        copyright_paragraph = Paragraph(copyrightContent, style_watermark)
        # Creating Table to wrap Paragraph
        data = [[copyright_paragraph]]
        table = Table(data)
        table.setStyle(
            TableStyle([
                ('BACKGROUND', (0, 0), (-1, -1),
                 colors.Color(255, 255, 255, alpha=0.5)),
            ]))
        # Adding Table to Canvas
        table.wrapOn(can, math.floor(width), 15)
        table.drawOn(can, 0, 0)
        # Saving
        can.save()
        # Move to start of memory pointer
        packet.seek(0)

        # Setting up PDF as a PDFFileReader object
        watermark_input = PdfReader(packet)
        watermark = watermark_input.pages[0]
        # Iterate through pages, updating source file.
        for current_page in range(len(existing_pdf.pages)):
            print(f'page {current_page}')
            merger = PageMerge(existing_pdf.pages[current_page])
            merger.add(watermark).render()

        # write the modified content to disk
        writer_output = PdfWriter()
        outputStream = open(f"processed_{filename}", "wb")

        with outputStream as pdfOutput:
            writer_output.write(pdfOutput, existing_pdf)

        print('Processed PDF - copyright added')
Пример #21
0
def copyrightParse(sourceKey, bucketName, context):
    # BOTO3 objects
    s3 = boto3.resource('s3')
    s3client = boto3.client('s3')
    object = s3.Object(bucketName, sourceKey)

    # Copyright Data
    metadata = object.metadata
    if "copyright" in metadata:
        return 'Copyright already exists - aborting'
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")
    metadata['copyright'] = timestampStr

    # Get prelim data from object
    with io.BytesIO(object.get()['Body'].read()) as pdf_content_sample:
        existing_pdf = PdfReader(pdf_content_sample)
        # Get Dimensions of document to make corresponding sized watermark
        mbox = existing_pdf.pages[0].MediaBox
        mediabox = tuple(float(x) for x in mbox)

        ### ReportLab implementation
        # Get Source PDF to watermark - Load single page to generate watermark to the right size
        # Create memory position for Watermark PDF
        with io.BytesIO() as packet:
            print('Loading PDF file - Watermark generation')
            height = 40
            width = mediabox[2]

            # create a new PDF with Reportlab
            can = canvas.Canvas(packet)
            can.setPageSize((width, height))

            # Get Copyright content
            copyrightContent = getCopyrightContent()

            # Stylesheet additions
            stylesheet = getSampleStyleSheet()
            style_watermark = stylesheet["Normal"]
            style_watermark.alignment = TA_CENTER
            style_watermark.textColor = colors.Color(0, 0, 0, alpha=0.5)
            style_watermark.fontSize = 8
            style_watermark.font = 'Helvetica'
            # Creating Paragraph
            copyright_paragraph = Paragraph(copyrightContent, style_watermark)
            # Creating Table to wrap Paragraph
            data = [[copyright_paragraph]]
            table = Table(data)
            table.setStyle(
                TableStyle([
                    ('BACKGROUND', (0, 0), (-1, -1),
                     colors.Color(255, 255, 255, alpha=0.5)),
                ]))
            # Adding Table to Canvas
            # Make sure the width is an integer!
            print(f'Table width set to {math.floor(width)}')
            table.wrapOn(can, math.floor(width), 15)
            table.drawOn(can, 0, 0)
            # Saving
            can.save()
            # Move to start of memory pointer
            packet.seek(0)

            watermark_input = PdfReader(packet)
            watermark = watermark_input.pages[0]
            # Iterate through pages, updating source file.
            for current_page in range(len(existing_pdf.pages)):
                merger = PageMerge(existing_pdf.pages[current_page])
                merger.add(watermark).render()
            # write the modified content to disk
            writer_output = PdfWriter()
            outputStream = io.BytesIO()
            with outputStream as pdfOutput:
                writer_output.write(pdfOutput, existing_pdf)
                print('File written to PDFWriter')
                pdfOutput.seek(0)
                s3client.upload_fileobj(pdfOutput,
                                        bucketName,
                                        sourceKey,
                                        ExtraArgs={"Metadata": metadata})
            status = f'Copyright Set: {timestampStr}'
    return status
Пример #22
0
                                letters = a, b, c""")
parser.add_argument("--prefix", "-p", default="",
                    help="prefix to the page labels")
parser.add_argument("--firstpagenum", "-f", type=int, default=1,
                    help="number to attribute to the first page of this index")
parser.add_argument("--outfile", "-o", type=Path, default=None, metavar="out.pdf",
                    help="Where to write the output file")
options = parser.parse_args()

reader = PdfReader(str(options.file.resolve()))

if options.delete:
    labels = PageLabels()
else:
    labels = PageLabels.from_pdf(reader)
    newlabel = PageLabelScheme(startpage=options.startpage - 1,
                               style=options.type,
                               prefix=options.prefix,
                               firstpagenum=options.firstpagenum)
    labels.append(newlabel)
# Write the new page labels to the PDF
labels.write(reader)
print("New labels to be written:")
print("\n".join(map(str, labels)))

writer = PdfWriter()
writer.trailer = reader
outfile = options.outfile or options.file
writer.write(str(outfile.resolve()))
print("Resulting pdf file created : {}".format(outfile))
Пример #23
0
    if not isinstance(initial, list):
        initial = [initial]
    files = []
    queue = initial[:]
    while bool(queue):
        current = queue.pop(0)
        if isfile(current) and splitext(current)[1] in ext:
            files.append(current)
        elif isdir(current):
            sub = [join(current,x) for x in listdir(current)]
            queue += sub

    logging.info("Found {} {} files".format(len(files), ext))
    return files

pdfs = get_data_files(args.directory, '.pdf')

logging.info("Chopping pdfs")
for pdf in pdfs:
    logging.info("Reading: {}".format(pdf))
    data = PdfReader(pdf)
    edited = PdfWriter()

    for x in range(1, len(data.pages)):
        edited.addpage(data.pages[x])

    out_name = join(args.out, split(pdf)[1])
    logging.info("Writing to: {}".format(out_name))
    edited.write(out_name)
    logging.info("-----------")
Пример #24
0
from pdfrw import PdfReader, PdfWriter
import os
directory=os.getcwd()
print("Abdullah Faruk ÇİFTLER | farukciftler.com | linkedin.com/in/farukciftler/ \n")
print("İyi işlerde kullanınız :) \n ")
fname=input("Lütfen PDF dosyanızı bu programın olduğu klasöre attıktan sonra XYZ.pdf şeklinde giriniz: ")
path= directory+'\\'+fname
pdf=PdfReader(path)
pages = len(PdfReader(path).pages)
pagepdf=PdfReader(path).pages
startingpage=1

while(pages>=1):
    print("Kalan sayfa sayısı: " + str(pages) + "\n ")
    splitpage=int(input("Lütfen baştan ayırmak istediğiniz sayfa sayısını giriniz: "))
    parts = [(startingpage,startingpage+splitpage)]
    for part in parts:
        outdata = PdfWriter(f'{fname}_sayfa_{part[0]}_{part[1]-1}.pdf')
        for pagenum in range(*part):
            outdata.addpage(pagepdf[pagenum-1])
        outdata.write()
    startingpage=startingpage+splitpage
    pages=pages-splitpage
    

Пример #25
0
def mergepdfs(titles, name):
    outfn = name + '.pdf'
    writer = PdfWriter()
    for inpfn in titles:
        writer.addpages(PdfReader(inpfn).pages)
    writer.write(outfn)
    page.AA = PdfDict()
    # You probably should just wrap each JS action with a try/catch,
    # because Chrome does no error reporting or even logging otherwise;
    # you just get a silent failure.
    page.AA.O = make_js_action("""
try {
  %s
} catch (e) {
  app.alert(e.message);
}
    """ % (script))

    page.Annots = PdfArray(annots)
    return page

if len(sys.argv) > 1:
    js_file = open(sys.argv[1], 'r')

    fields = []
    for line in js_file:
        if not line.startswith('/// '): break
        pieces = line.split()
        params = [pieces[1]] + [float(token) for token in pieces[2:]]
        fields.append(make_field(*params))

    js_file.seek(0)

    out = PdfWriter()
    out.addpage(make_page(fields, js_file.read()))
    out.write('result.pdf')
Пример #27
0
def makePdfs(input_CSV, input_pdf_template):
    #Loads CSV template and creates pdfs

    #Import CSV file with form information and create a python list with that information
    with open(input_CSV) as CSV_Template:
        lines = CSV_Template.readlines()
        cell = []
        row = []
        table = []
        for line in lines:
            for characters in line:

                if characters == ',':
                    cell = ''.join(cell)
                    if cell != '':
                        row.append(cell)
                    cell = []
                elif characters == '\n':
                    table.append(row)
                    row = []
                    cell = []
                else:
                    cell.append(characters)

    file_ammount = len(table[3]) - 1

    #make list element equal sized based on the number of files requested to be created.  Fill in empty cells with ''
    for rows in table:
        while len(rows) < file_ammount + 1:
            rows.append('')
        for cells in rows:
            if cells is None:
                cells = ''

    #Repeats file path, folder name, project name in table
    #also checks for blank entries in file names
    #Changes Current directory text to file path

    for i, rows in enumerate(table):
        for j, cells in enumerate(rows[1:]):

            if rows[j] == 'Current Directory':
                rows[j] = os.getcwd()

            try:
                if rows[j + 1] == '':

                    if rows[j + 1] != rows[j] and i < 3:
                        rows[j + 1] = rows[j]

                    elif i < 4:
                        print(
                            'error, there is a blank where there shouldnt be')

                    else:
                        break
            except:
                continue

    #put pdf names into list
    PDF_Names = []

    for i in range(len(table[1][1:])):
        PDF_Names.append(table[1][i + 1] + '_' + table[0][i + 1])

    working_directory = os.getcwd() + '/'

    folder = ['', '']

    #load up pdf template
    template_pdf = pdfrw.PdfReader(input_pdf_template)
    annotations = template_pdf.pages[0][ANNOT_KEY]

    inputName = []
    #Lets make some PDFs
    for i in range(file_ammount):

        #make directory for PDFs if needed
        folder[0] = table[1][i + 1]

        if folder[0] != folder[1] and not folder[0] in os.listdir(
                table[2][i + 1]):
            os.mkdir(working_directory + folder[0])
        folder[1] = folder[0]

        #create PDF file paths
        destination_folder = working_directory + folder[0] + '/'
        file_name = table[3][i + 1]
        name_ending_each_file = table[0][i + 1]
        PDF_file_path = destination_folder + file_name + name_ending_each_file + '.pdf'
        inputName = inputName + [PDF_file_path]

        #create dictionary of form keys and items information
        data_table = []
        for rows in table[4:]:
            data_table.append([rows[0], rows[i + 1]])
        data_dict = dict(data_table)

        #Edit PDF template and make PDF
        for annotation in annotations:
            if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
                if annotation[ANNOT_FIELD_KEY]:
                    key = annotation[ANNOT_FIELD_KEY][1:-1]
                    if key in data_dict.keys():
                        annotation.update(
                            pdfrw.PdfDict(V='{}'.format(data_dict[key])))

                        annotation.update(
                            pdfrw.PdfDict(AP='{}'.format({'/N': (144, 0)})))

                        annotation.update(
                            pdfrw.PdfDict(DA='{}'.format('/Helv 0 Tf 0 g')))

        pdfrw.PdfWriter().write(PDF_file_path, template_pdf)

    assert inputName
    outfn = destination_folder + '/' + 'combined.pdf'
    writer = PdfWriter()
    for inpfn in inputName:
        writer.addpages(PdfReader(inpfn).pages)

    writer.write(outfn)
Пример #28
0
# -*- coding: utf-8 -*-
import os, sys, datetime
from pdfrw import PdfReader, PdfWriter

writer = PdfWriter()
now = datetime.datetime.now()
data_path = os.getcwd() + "/data/"
dir_path = data_path + str(now.year) + '_' + sys.argv[1] + "week"

if not os.path.exists(dir_path + "/result"):
    os.mkdir(dir_path + "/result")

files = [x for x in os.listdir(dir_path) if x.endswith('.pdf')]
for fname in sorted(files, key = lambda x: int(x.split(".")[0])):
    print ("[" + fname + "] Merged")
    writer.addpages(PdfReader(os.path.join(dir_path, fname)).pages)

writer.write(dir_path + "/result/"+ str(now.year) + "_" + sys.argv[1] + "_merge.pdf")
print("\nENDED MERGE REPORT!")
Пример #29
0
from pdfrw import PdfReader, PdfWriter

mall = PdfReader('Mall.pdf')
text = PdfReader('kandidat.pdf')

writer = PdfWriter()
writer.addpage(mall.pages[0])
writer.addpage(mall.pages[1])
for page in text.pages:
    writer.addpage(page)
writer.addpage(mall.pages[2])
writer.write('KarlJohannesKandidat.pdf')

Пример #30
0
    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        srgb_colorspace = PdfDict()
        srgb_colorspace.indirect = True
        srgb_colorspace.N = 3  # Number of components (red, green, blue)
        with open(SRGB_ICC_FILENAME, "rb") as f:
            srgb_colorspace_stream = f.read()
        srgb_colorspace.Filter = [PdfName.FlateDecode]
        srgb_colorspace.stream = zlib.compress(srgb_colorspace_stream,
                                               9).decode("latin-1")
        srgb_colorspace.Length1 = len(srgb_colorspace_stream)
        default_rgb_colorspace = PdfArray([PdfName.ICCBased, srgb_colorspace])
        default_rgb_colorspace.indirect = True

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

        finished_pages = 0
        yield from asyncio.gather(*[
            make_page(page, pdf_page, process_semaphore)
            for page, pdf_page in zip(self._pages, pdf_pages)
        ])

        trailer = pdf_writer.trailer

        document_id = PdfString().from_bytes(os.urandom(16))
        trailer.ID = [document_id, document_id]

        mark_info = PdfDict()
        mark_info.Marked = PdfBool(True)
        trailer.Root.MarkInfo = mark_info

        struct_tree_root = PdfDict()
        struct_tree_root.Type = PdfName.StructTreeRoot
        trailer.Root.StructTreeRoot = struct_tree_root

        metadata = PdfDict()
        metadata.indirect = True
        metadata.Type = PdfName.Metadata
        metadata.Subtype = PdfName.XML
        xmp = XMPMeta()
        xmp.set_property(XMP_NS_PDFA_ID, "part", "2")
        xmp.set_property(XMP_NS_PDFA_ID, "conformance", "A")
        metadata_stream = xmp.serialize_to_str().encode("utf-8")
        metadata.Filter = [PdfName.FlateDecode]
        metadata.stream = zlib.compress(metadata_stream, 9).decode("latin-1")
        metadata.Length1 = len(metadata_stream)
        trailer.Root.Metadata = metadata

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [
                QPDF_CMD, "--stream-data=preserve",
                "--object-streams=preserve", "--normalize-content=n",
                "--newline-before-endstream"
            ]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([
                path.abspath(path.join(temp_dir, "temp.pdf")),
                path.abspath(outfile)
            ])
            yield from run_command_async(cmd, process_semaphore)
Пример #31
0
args = parser.parse_args()

# The shuffling magic
even = PdfReader(args.evenFile[0])
odd = PdfReader(args.oddFile[0])
isEvenReversed = args.evenrev;
isOddReversed = args.oddrev;
all = PdfWriter()
blank = PageMerge()
blank.mbox = [0, 0, 612, 792] # 8.5 x 11
blank = blank.render()

if isEvenReversed and not isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[i])
        all.addpage(even.pages[len(even.pages)-1-i])
elif isOddReversed and not isEvenReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages)-1-i])
        all.addpage(even.pages[i])
elif isEvenReversed and isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages)-1-i])
        all.addpage(even.pages[len(even.pages)-1-i])
else:
    for x,y in zip(odd.pages, even.pages):
      all.addpage(x)
      all.addpage(y)

all.write(args.resultFile[0])
Пример #32
0
def render(source,
           *,
           progress_cb=lambda x: None,
           expand_pages=True,
           template_alpha=0.3,
           only_annotated=False,
           black='black',
           white='white',
           gray=None,
           highlight=HIGHLIGHT_DEFAULT_COLOR):
    """Render a source document as a PDF file.

    source: The reMarkable document to be rendered.  This may be
              - A filename or pathlib.Path to a zip file containing the
                document, such as is provided by the Cloud API.
              - A filename or pathlib.Path to a root-level file from the
                document, such as might be copied off the device directly.
              - An object implementing the Source API.  See rmrl.sources
                for examples and further documentation.
    progress_cb: A function which will be called with a progress percentage
                 between 0 and 100.  The first 50% indicate rendering the
                 annotations, and the second the merging of these into the
                 base PDF file.  If this callback raises an error, this
                 function will abort gracefully and propagate the error up
                 the stack.
    expand_pages: Boolean value (default True) indicating whether pages
                  should be made larger, to reflect the view provided by
                  the reMarkable device.
    template_alpha: Opacity of the template backgrounds in notebooks.  0
                    makes the templates invisible, 1 makes them fully dark.
    only_annotated: Boolean value (default False) indicating whether only
                    pages with annotations should be output.
    black: A string giving the color to use as "black" in the document.
           Can be a color name or a hex string.  Default: 'black'
    white: A string giving the color to use as "white" in the document.
           See `black` parameter for format.  Default: 'white'
    gray: A string giving the color to use as "gray" in the document.
          See `black` parameter for format.  Default: None, which means to
          pick an average between the "white" and "black" values.
    highlight: A string giving the color to use for the highlighter.
               See `black` parameter for format.
    """

    colors = parse_colors(black, white, gray, highlight)

    vector = True  # TODO: Different rendering styles
    source = sources.get_source(source)

    # If this is using a base PDF, the percentage is calculated
    # differently.
    uses_base_pdf = source.exists('{ID}.pdf')

    # Generate page information
    # If a PDF file was uploaded, but never opened, there may not be
    # a .content file. So, just load a barebones one with a 'pages'
    # key of zero length, so it doesn't break the rest of the
    # process.
    pages = []
    if source.exists('{ID}.content'):
        with source.open('{ID}.content', 'r') as f:
            pages = json.load(f).get('pages', [])

    # Render each page as a pdf
    tmpfh = tempfile.TemporaryFile()
    pdf_canvas = canvas.Canvas(tmpfh, (PDFWIDTH, PDFHEIGHT))
    # TODO: check pageCompression

    # Don't load all the pages into memory, because large notebooks
    # about 500 pages could use up to 3 GB of RAM. Create them by
    # iteration so they get released by garbage collector.
    changed_pages = []
    annotations = []
    for i in range(0, len(pages)):
        page = document.DocumentPage(source, pages[i], i, colors=colors)
        if source.exists(page.rmpath):
            changed_pages.append(i)
        page.render_to_painter(pdf_canvas, vector, template_alpha)
        annotations.append(page.get_grouped_annotations())
        progress_cb((i + 1) / len(pages) * 50)
    pdf_canvas.save()
    tmpfh.seek(0)

    # This new PDF represents just the notebook. If there was a
    # parent PDF, merge it now.
    if uses_base_pdf and not changed_pages:
        # Since there is no stroke data, just return the PDF data
        progress_cb(100)

        log.info('exported pdf')
        return source.open('{ID}.pdf', 'rb')

    # PDF exists, stroke data exists, so mix them together.
    if uses_base_pdf:
        rmpdfr = PdfReader(tmpfh)
        basepdfr = PdfReader(source.open('{ID}.pdf', 'rb'))
    else:
        basepdfr = PdfReader(tmpfh)
        # Alias, which is used for annotations and layers.
        rmpdfr = basepdfr

    # If making a 'layered' PDF (with optional content groups,
    # OCGs), associate the annoatations with the layer.

    # This property list is put into the rmpdfr document, which
    # will not have any existing properties.
    ocgprop = IndirectPdfDict(OCGs=PdfArray(), D=PdfDict(Order=PdfArray()))

    for i in range(0, len(basepdfr.pages)):
        basepage = basepdfr.pages[i]
        rmpage = rmpdfr.pages[i]

        # Apply OCGs
        apply_ocg = False  #TODO configurable? bool(int(QSettings().value(
        #'pane/notebooks/export_pdf_ocg')))
        if apply_ocg:
            ocgorderinner = do_apply_ocg(basepage, rmpage, i, uses_base_pdf,
                                         ocgprop, annotations)
        else:
            ocgorderinner = None

        # Apply annotations to the rmpage. This must come after
        # applying OCGs, because the annotation may belong to
        # one of those groups.
        apply_annotations(rmpage, annotations[i], ocgorderinner)

        # If this is a normal notebook with highlighting,
        # just add the annotations and forget about the rest,
        # which are page geometry transformations.
        if uses_base_pdf:
            merge_pages(basepage, rmpage, i in changed_pages, expand_pages)

        progress_cb(((i + 1) / rmpdfr.numPages * 50) + 50)

    # Apply the OCG order. The basepdf may have already had OCGs
    # and so we must not overwrite them. NOTE: there are other
    # properties that ought to be carried over, but this is the
    # minimum required.
    if apply_ocg:
        if '/OCProperties' in basepdfr.Root:
            basepdfr.Root.OCProperties.OCGs += ocgprop.OCGs
            basepdfr.Root.OCProperties.D.Order += ocgprop.D.Order
        else:
            basepdfr.Root.OCProperties = ocgprop

    stream = tempfile.SpooledTemporaryFile(SPOOL_MAX)
    pdfw = PdfWriter(stream)
    if not only_annotated:
        # We are writing out everything, so we can take this shortcut:
        pdfw.write(trailer=basepdfr)
    else:
        for i, page in enumerate(basepdfr.pages):
            if i in changed_pages:
                pdfw.addpage(page)
        pdfw.write()
    stream.seek(0)

    log.info('exported pdf')
    return stream
Пример #33
0
So she did an 8.5x11" output with 0.5" margin all around
(actual size of useful area 7.5x10") and we scaled it
up by 4.8.

We also copy the Info dict to the new PDF.

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict


def adjust(page, margin=36, scale=4.8):
    info = PageMerge().add(page)
    x1, y1, x2, y2 = info.xobj_box
    viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin)
    page = PageMerge().add(page, viewrect=viewrect)
    page[0].scale(scale)
    return page.render()


inpfn, = sys.argv[1:]
outfn = 'poster.' + os.path.basename(inpfn)
reader = PdfReader(inpfn)
writer = PdfWriter(outfn)
writer.addpage(adjust(reader.pages[0]))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write()
Пример #34
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                      [PdfName.FlateDecode],
                                      [PdfName.CCITTFaxDecode]])
                # test if the colorspace is valid
                self.assertIn(
                    imgprops.ColorSpace, [PdfName.DeviceGray,
                                          PdfName.DeviceRGB,
                                          PdfName.DeviceCMYK])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [[PdfName.DCTDecode],
                                       [PdfName.JPXDecode]]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == [PdfName.FlateDecode]:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    colorspace = imgprops.ColorSpace
                    if colorspace == PdfName.DeviceGray:
                        colorspace = 'L'
                    elif colorspace == PdfName.DeviceRGB:
                        colorspace = 'RGB'
                    elif colorspace == PdfName.DeviceCMYK:
                        colorspace = 'CMYK'
                    else:
                        raise Exception("invalid colorspace")
                    im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                      int(imgprops.Height)),
                                         imgdata)
                    if orig_img.mode == '1':
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("L").tobytes())
                    elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("RGB").tobytes())
                    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not
                    # have the close() method
                    try:
                        im.close()
                    except AttributeError:
                        pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
Пример #35
0
def go(inpfn, outfn):
    pages = PdfReader(inpfn).pages
    writer = PdfWriter()
    while pages:
        writer.addpage(get4(pages))
    writer.write(outfn)
Пример #36
0
    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))
            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem),
                    get_pdf_background(psem),
                    asyncio.gather(*[fg.pdf_image(psem)
                                     for fg in page.foreground]),
                    asyncio.gather(*[get_pdf_mask(fg, psem)
                                     for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray([0, 0,
                                          PdfNumber(page.width),
                                          PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" +
                               "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " +
                             "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None and
                        current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" +
                           "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(),
                        PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [PdfNumber(t.x),
                                      PdfNumber(t.y),
                                      PdfNumber(t.x + t.width),
                                      PdfNumber(t.y + t.height)]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page,
                            PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y),
                            0]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"),
                        9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))
        finished_pages = 0
        yield from asyncio.gather(
            *[make_page(page, pdf_page, process_semaphore)
              for page, pdf_page in zip(self._pages, pdf_pages)])

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [QPDF_CMD,
                   "--stream-data=preserve",
                   "--object-streams=preserve",
                   "--normalize-content=n"]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([path.abspath(path.join(temp_dir, "temp.pdf")),
                        path.abspath(outfile)])
            yield from run_command_async(cmd, process_semaphore)
Пример #37
0
summary = PdfWriter()
summary_path = OUTPUT_PATH_PREFIX.format(now=now, here=here) + ".pdf"

failed_to_add_page_errors = []
for path, article, matching_authors in paths:

    try:
        with open(path, "rb") as rp:
            summary.addpage(PdfReader(rp).getPage(0))

    except PdfParseError:
        print(f"Failed to add page from {path}")
        failed_to_add_page_errors.append((path, article, matching_authors))

with open(summary_path, "wb") as fp:
    summary.write(fp)

if failed_to_add_page_errors or new_articles_with_errors:
    failure_summary = "However, there were some errors that occurred.\n\n"

    for count, (article,
                matching_authors) in enumerate(new_articles_with_errors,
                                               start=1):
        kwds = formatted_summary(article)
        kwds.update(count=count)

        summary = EXECUTIVE_SUMMARY_ARTICLE_FORMAT.format(**kwds).rstrip()

        failure = f"""Could not find PDF for this article:

        {summary}
Пример #38
0
except ImportError:
    print("Instale em seu sistema a biblioteca pdfrw!\n\n")
    print("sudo apt install python3-pdfrw\n")
    quit()

# Limpa o \n do final da linha na lista


def remove_quebra_de_linha(linha):
    return linha.replace('\n', '')


# Vai ser o responsável em escrever o PDFao
writer = PdfWriter()

# Lista contendo arquivos pdf, linha a linha, com o caminho completo do sistema de arquivos
# Deve estar algo como:
# /home/meu_usuario/arquivos_pdf/arquivo1.pdf
# /home/meu_usuario/arquivos_pdf/arquivo2.pdf
pdf_list = open("my_pdfs.txt")

# caminho completo do arquivo de saída. Dessa forma abaixo, gera na pasta do script
pdefao = 'super.pdf'

# Lê linha a linha da lista de pdfs e adiciona ao arquivao
for arquivo in pdf_list:
    arquivo = remove_quebra_de_linha(arquivo)
    writer.addpages(PdfReader(arquivo).pages)

writer.write(pdefao)
Пример #39
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode,
                                      PdfName.FlateDecode,
                                      [PdfName.CCITTFaxDecode]])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [PdfName.DCTDecode,
                                       PdfName.JPXDecode]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == PdfName.FlateDecode:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    if imgprops.DecodeParms:
                        if orig_img.format == 'PNG':
                            pngidat, palette = img2pdf.parse_png(orig_imgdata)
                        elif orig_img.format == 'TIFF' \
                                and orig_img.info['compression'] == "group4":
                            offset, length = \
                                    img2pdf.ccitt_payload_location_from_pil(
                                            orig_img)
                            pngidat = orig_imgdata[offset:offset+length]
                        else:
                            pngbuffer = BytesIO()
                            orig_img.save(pngbuffer, format="png")
                            pngidat, palette = img2pdf.parse_png(
                                    pngbuffer.getvalue())
                        self.assertEqual(zlib.decompress(pngidat), imgdata)
                    else:
                        colorspace = imgprops.ColorSpace
                        if colorspace == PdfName.DeviceGray:
                            colorspace = 'L'
                        elif colorspace == PdfName.DeviceRGB:
                            colorspace = 'RGB'
                        elif colorspace == PdfName.DeviceCMYK:
                            colorspace = 'CMYK'
                        else:
                            raise Exception("invalid colorspace")
                        im = Image.frombytes(colorspace,
                                             (int(imgprops.Width),
                                              int(imgprops.Height)),
                                             imgdata)
                        if orig_img.mode == '1':
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("L").tobytes())
                        elif orig_img.mode not in ("RGB", "L", "CMYK",
                                                   "CMYK;I"):
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("RGB").tobytes())
                        # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does
                        # not have the close() method
                        try:
                            im.close()
                        except AttributeError:
                            pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True)
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
Пример #40
0
import sys
import os

import find_pdfrw
from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
rotate = sys.argv[2]
ranges = sys.argv[3:]

rotate = int(rotate)
assert rotate % 90 == 0

ranges = [[int(y) for y in x.split('-')] for x in ranges]
outfn = 'rotate.%s' % os.path.basename(inpfn)
trailer = PdfReader(inpfn)
pages = trailer.pages

if not ranges:
    ranges = [[1, len(pages)]]

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0]-1, onerange[1]):
        pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) + rotate) % 360

outdata = PdfWriter()
outdata.trailer = trailer
outdata.write(outfn)
Пример #41
0
usage:   4up.py my.pdf

Creates 4up.my.pdf with a single output page for every
4 input pages.
'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge


def get4(srcpages):
    scale = 0.5
    srcpages = PageMerge() + srcpages
    x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
    for i, page in enumerate(srcpages):
        page.scale(scale)
        page.x = x_increment if i & 1 else 0
        page.y = 0 if i & 2 else y_increment
    return srcpages.render()


inpfn, = sys.argv[1:]
outfn = '4up.' + os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
writer = PdfWriter(outfn)
for index in range(0, len(pages), 4):
    writer.addpage(get4(pages[index:index + 4]))
writer.write()
Пример #42
0
    def get(self, format: str, path: str):
        """Handle the GET method call."""
        if format != 'pdf':
            self.log.exception('format must be pdf')
            raise web.HTTPError(500, 'format must be pdf')

        self.config.PDFExporter.preprocessors = [thermohw.ExtractAttachmentsPreprocessor]
        self.config.PDFExporter.template_file = os.path.join(thermohw_dir, 'homework.tpl')
        self.config.PDFExporter.filters = {'convert_div': thermohw.convert_div,
                                           'convert_raw_html': thermohw.convert_raw_html}
        self.config.PDFExporter.latex_count = 1

        exporter = PDFExporter(config=self.config, log=self.log)
        exporter.writer.build_directory = '.'

        pdfs = []

        path = path.strip('/').strip()
        paths = path.split('.ipynb')

        for path in paths:
            if not path:
                continue
            path += '.ipynb'
            # If the notebook relates to a real file (default contents manager),
            # give its path to nbconvert.
            ext_resources_dir: Union[str, None]
            basename: str
            os_path: str
            if hasattr(self.contents_manager, '_get_os_path'):
                os_path = self.contents_manager._get_os_path(path)
                ext_resources_dir, basename = os.path.split(os_path)
            else:
                ext_resources_dir = None

            model: Dict[str, str] = self.contents_manager.get(path=path)
            name: str = model['name']
            if model['type'] != 'notebook':
                # not a notebook, redirect to files
                return FilesRedirectHandler.redirect_to_files(self, path)

            nb = model['content']

            self.set_header('Last-Modified', model['last_modified'])

            # create resources dictionary
            mod_date: str = model['last_modified'].strftime(text.date_format)
            nb_title: str = os.path.splitext(name)[0]

            config_dir: str = self.application.settings['config_dir']

            resource_dict: Dict[str, str] = {
                "metadata": {
                    "name": nb_title,
                    "modified_date": mod_date
                },
                "config_dir": config_dir,
            }

            if ext_resources_dir:
                resource_dict['metadata']['path'] = ext_resources_dir

            output: bytes
            try:
                output, _ = exporter.from_notebook_node(
                    nb,
                    resources=resource_dict
                )
            except Exception as e:
                self.log.exception("nbconvert failed: %s", e)
                raise web.HTTPError(500, "nbconvert failed: %s" % e)

            pdfs.append(io.BytesIO(output))

        writer = PdfWriter()
        for pdf in pdfs:
            writer.addpages(PdfReader(pdf).pages)
        bio = io.BytesIO()
        writer.write(bio)
        bio.seek(0)
        output = bio.read()
        bio.close()

        # Force download if requested
        if self.get_argument('download', 'false').lower() == 'true':
            filename = 'final_output.pdf'
            self.set_header('Content-Disposition',
                            'attachment; filename="{}"'.format(filename))

        # MIME type
        if exporter.output_mimetype:
            self.set_header('Content-Type',
                            '{}; charset=utf-8'.format(exporter.output_mimetype))

        self.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
        self.finish(output)
Пример #43
0
#!/usr/bin/env python

'''
usage:   subset.py my.pdf page[range] [page[range]] ...
         eg. subset.py 1-3 5 7-9

Creates subset.my.pdf

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
ranges = sys.argv[2:]
assert ranges, "Expected at least one range"

ranges = ([int(y) for y in x.split('-')] for x in ranges)
outfn = 'subset.%s' % os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
outdata = PdfWriter(outfn)

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0], onerange[1]+1):
        outdata.addpage(pages[pagenum-1])
outdata.write()
Пример #44
0
from pdfrw import PdfReader, PdfWriter
import os

source_dir = os.getcwd()

writer = PdfWriter()

for item in os.listdir(source_dir):
    if item.endswith('pdf'):
        writer.addpages(PdfReader(item).pages)

writer.write('result.pdf')
Пример #45
0
def popups_write_pdf(file):
    from pdfrw import PdfWriter
    w = PdfWriter(version='1.5', compress=pdf_popup_config['compress'])
    w.trailer = popup_pdf
    w.write(file)
Пример #46
0
                                letters = a, b, c""")
parser.add_argument("--prefix",
                    "-p",
                    default="",
                    help="prefix to the page labels")
parser.add_argument("--firstpagenum",
                    "-f",
                    type=int,
                    default=1,
                    help="number to attribute to the first page of this index")
options = parser.parse_args()

reader = options.file
if options.delete:
    labels = PageLabels()
else:
    labels = PageLabels.from_pdf(reader)
    newlabel = PageLabelScheme(startpage=options.startpage - 1,
                               style=options.type,
                               prefix=options.prefix,
                               firstpagenum=options.firstpagenum)
    labels.append(newlabel)
# Write the new page labels to the PDF
labels.write(reader)
print("New labels to be written:")
print("\n".join(map(str, labels)))

writer = PdfWriter()
writer.trailer = reader
writer.write("/tmp/test.pdf")
Пример #47
0
for section in sections:

    print("++++++++++++++++++++++++++++++++++\n+  Adding section: %s\n+" %
          section)
    big_file = PdfWriter()

    files = os.listdir(section)

    files = sorted(files)

    for f in files:
        fpath = section + '/' + f
        if os.path.isfile(fpath) and fpath.endswith(
                'pptx') and not f == 'Template.pptx':
            print("+   Incorporating: %s" % fpath)
            call([
                "libreoffice", "--headless", "--invisible", "--convert-to",
                "pdf", fpath
            ])
            pdf_file_name = f.replace('pptx', 'pdf')

            pdf_file = PdfReader(pdf_file_name)
            print("+     Adding pages from %s\n+" % pdf_file_name)
            big_file.addpages(pdf_file.pages)
            call(["mv", pdf_file_name, "temp"])

    big_file.write('Part1_%s.pdf' % section)

print("Done.")
Пример #48
0
import os
import sys
from pdfrw import PdfReader, PdfWriter

if len(sys.argv) != 2:
    print("Usage: InvertOrder.py FILETOINVERT")
    sys.exit()

filename = sys.argv[1]
output = PdfWriter()

for p in reversed(PdfReader(filename).pages):
    output.addpage(p)

fname, fext = os.path.splitext(filename)
outname = fname + "_inv" + fext

print("Writing output to "+outname)

output.write(outname)
Пример #49
0
import sys
import argparse
import itertools
from pdfrw import PdfWriter, PdfReader

parser = argparse.ArgumentParser(description='Interlaces two pdf to make one complete pdf.')
parser.add_argument('front_pdf_loc', type=str, help="PDF of fronts of pages")
parser.add_argument('back_pdf_loc', type=str, help="PDF of backs of pages")
parser.add_argument('output_loc', type=str, nargs='?', default="output.pdf",
        help="Output location for interlaced PDF")

args = parser.parse_args()

output = PdfWriter()
front_pdf = PdfReader(args.front_pdf_loc)
back_pdf = PdfReader(args.back_pdf_loc)

if len(front_pdf.pages) != len(back_pdf.pages):
    print("PDFs must have the same number of pages")
    sys.exit(1)

output.addpages(itertools.chain.from_iterable(zip(front_pdf.pages, back_pdf.pages[::-1])))
output.write(args.output_loc)
var BALL_HEIGHT = %(BALL_HEIGHT)s;

var BRICK_ROW_COUNT = %(BRICK_ROW_COUNT)s;
var BRICK_COLUMN_COUNT = %(BRICK_COLUMN_COUNT)s;
var BRICK_WIDTH = %(BRICK_WIDTH)s;
var BRICK_HEIGHT = %(BRICK_HEIGHT)s;
var BRICK_PADDING = %(BRICK_PADDING)s;

var BRICK_OFFSET_BOTTOM = %(BRICK_OFFSET_BOTTOM)s;
var BRICK_OFFSET_LEFT = %(BRICK_OFFSET_LEFT)s;

%(script)s

""" % locals())

page.Contents.stream = """
BT
/F1 24 Tf
150 300 Td (Move your mouse down here!) Tj
40 -100 Td (also, README below...) Tj
ET
"""

readme = PdfReader('README.pdf')

out = PdfWriter()
out.addpage(page)
for readme_page in readme.pages:
    out.addpage(readme_page)
out.write('breakout.pdf')
Пример #51
0
if args.path:
    path = args.path
    if args.verbose:
        print("Searching {} for PDF files.\n".format(path))

# Generate a list of file names (includes the full path)
fileList = []
for filePath in glob(path + "/*.pdf"):
    if args.verbose:
        print("Found {}".format(filePath))
    fileList.append(filePath)

# sort the list in 'natural' order
sortedFiles = natsorted(fileList)

# loop through the list of PDFs, and add them to a new PDF
outFile = PdfWriter()
for pdf in sortedFiles:
    x = PdfReader(pdf)
    if args.verbose:
        print("Adding {} pages from {} to the combined file.".format(
            x.numPages, pdf))
    outFile.addpages(x.pages)

outFile.write(combinedFile)

if args.verbose:
    m = PdfReader(combinedFile)
    print("\nCombined file created at {} with a total of {} pages.".format(
        combinedFile, m.numPages))
Пример #52
0
# Добавление QR-кода в многостраничный PDF документ

from pdfrw import PdfReader, PdfWriter, PageMerge

input_file = "source/Computer-Vision-Resources.pdf"
output_file = "dist/Computer-Vision-Resources-QR-pages.pdf"
watermark_file = "source/waksoft-QR-code.pdf"

# определяем объекты чтения и записи
reader_input = PdfReader(input_file)
writer_output = PdfWriter()
watermark_input = PdfReader(watermark_file)
watermark = watermark_input.pages[0]

# просматривать страницы одну за другой
for current_page in range(len(reader_input.pages)):
    merger = PageMerge(reader_input.pages[current_page])
    merger.add(watermark).render()

# записать измененный контент на диск
writer_output.write(output_file, reader_input)
Пример #53
0
So she did an 8.5x11" output with 0.5" margin all around
(actual size of useful area 7.5x10") and we scaled it
up by 4.8.

We also copy the Info dict to the new PDF.

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict


def adjust(page, margin=36, scale=4.8):
    info = PageMerge().add(page)
    x1, y1, x2, y2 = info.xobj_box
    viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin)
    page = PageMerge().add(page, viewrect=viewrect)
    page[0].scale(scale)
    return page.render()


inpfn, = sys.argv[1:]
outfn = 'poster.' + os.path.basename(inpfn)
reader = PdfReader(inpfn)
writer = PdfWriter()
writer.addpage(adjust(reader.pages[0]))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write(outfn)
Пример #54
0
from pdfrw import PdfReader
"""
x = PdfReader('source/07922XXX2258-2017Apr13-2017May15.pdf')
print x.keys()
print x.Info
print x.Root.keys()
print len(x.pages)
print x.pages[0]
print x.pages[0].Contents
print x.pages[0].Contents.stream

"""

#writing pdfs
from pdfrw import PdfWriter
writer = PdfWriter()
#y.addpage(x.pages[0])
#y.write('out.pdf')

for pdf_filename in pdf_filenames:
    writer.addpages(PdfReader(pdf_filename).pages)

from pdfrw import IndirectPdfDict
writer.trailer.Info = IndirectPdfDict(
    Title='pdf bundle',
    Author='Adobe',
    Subject='pdf',
    Creator='Adobe',
)
writer.write('out.pdf')
Пример #55
0
for (srcpath, _, filenames) in os.walk('ramdisk/reference'):
    for name in filenames:
        if not name.endswith('.pdf'):
            continue
        src = os.path.join(srcpath, name)
        dst = src.replace('/reference/', '/tmp_results/')
        if not os.path.exists(dst):
            continue
        src_digest = get_digest(src)
        if not src_digest or src_digest not in expected:
            continue
        print src
        count += 1
        trailer = make_canonical(PdfReader(src))
        out = PdfWriter(tmp)
        out.write(trailer=trailer)
        match_digest = get_digest(tmp)
        if not match_digest:
            continue
        trailer = make_canonical(PdfReader(dst))
        out = PdfWriter(tmp)
        out.write(trailer=trailer)
        if get_digest(tmp) != match_digest:
            continue
        goodcount += 1
        print "OK"
        changes.append((src_digest, get_digest(dst)))

print count, goodcount

for stuff in changes:
Пример #56
0
#!/usr/bin/env python3

# USAGE: ./add_new_page.py $in_filepath $out_filepath

import sys

from fpdf import FPDF
from pdfrw import PdfReader, PdfWriter

IN_FILEPATH = sys.argv[1]
OUT_FILEPATH = sys.argv[2]
NEW_PAGE_INDEX = 1  # set to None to append at the end


def new_page():
    fpdf = FPDF()
    fpdf.add_page()
    fpdf.set_font("helvetica", size=36)
    fpdf.text(50, 50, "Hello!")
    reader = PdfReader(fdata=bytes(fpdf.output()))
    return reader.pages[0]


writer = PdfWriter(trailer=PdfReader(IN_FILEPATH))
writer.addpage(new_page(), at_index=NEW_PAGE_INDEX)
writer.write(OUT_FILEPATH)
Пример #57
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
sections = [
    'Introduction', '1_Experimental_datasets',
    '2_Structured_data_from_literature', '3_Analysis_tools',
    '4_Simulation_environments', '5_Model_sharing',
    '6_Computing_infrastructure', '7_Open_source_initiatives', '8_Web_portals'
]
#sections = ['Introduction','1_Experimental_datasets', '2_Structured_data_from_literature']

big_file = PdfWriter()

for section in sections:

    for f in os.listdir(section):
        fpath = section + '/' + f
        if os.path.isfile(fpath) and fpath.endswith(
                'pptx') and not f == 'Template.pptx':
            print("Incorporating: %s" % fpath)
            call([
                "libreoffice", "--headless", "--invisible", "--convert-to",
                "pdf", fpath
            ])

            pdf_file_name = f.replace('pptx', 'pdf')
            pdf_file = PdfReader(pdf_file_name)
            print("  Adding pages from %s" % pdf_file_name)
            big_file.addpages(pdf_file.pages)

big_file.write('Part1.pdf')

print("Done.")
Пример #59
0
alt_img = PdfDict(Type=PdfName.XObject,
                  SubType=PdfName.Image,
                  BitsPerComponent=8,
                  ColorSpace=PdfName.DeviceRGB,
                  Height=800,
                  Width=600,
                  Length=0,
                  F=PdfDict(FS=PdfName.URL,
                            F='https://chezsoi.org/lucas/ThePatch.jpg'),
                  FFilter=PdfName.DCTDecode)
alt_img.indirect = true

alternates = PdfArray([PdfDict(DefaultForPrinting=True, Image=alt_img)])
alternates.indirect = true

img_name = PdfName('Image-9960')
img = img_kid.Resources.XObject[img_name]
img.Alternates = alternates
pdf_kid.Resources.XObject = PdfDict()
pdf_kid.Resources.XObject[img_name] = img

out = PdfWriter()
out.addpage(pdf.pages[0])
out.write('out.pdf')

# CONCLUSION: neither Adobe nor Sumatra readers visit the link...
# It may be that readers do not follow this "Alternates" images spec anymore, that HTTPS is not supported, or that I made a mistake in the resulting PDF.
# Anyway, I'm giving up.
# However Canary Tokens use a similar technic that works well (with Adobe not Sumatra): https://github.com/sumatrapdfreader/sumatrapdf/issues/1696
Пример #60
0
 def save_to_file(pdf_obj, file_path):
     short_path_for_logging = '/'.join(file_path.split('/')[-3:])
     logger.debug("Saving to file: " + short_path_for_logging)
     y = PdfWriter()
     y.write(file_path, pdf_obj)