Пример #1
0
def save_with_even_pages(exam_id, exam_pdf_file):
    """Save a finalized exam pdf with evem number of pages.

    The exam is saved in the path returned by `get_exam_dir(exam_id)` with the name `exam.pdf`.

    If the pdf has an odd number of pages, an extra blank page is added at the end,
    this is specially usefull for printing and contatenating multiple copies at once.

    Parameters
    ----------
    exam_id : int
        The exam identifier
    exam_pdf_file : str or File like object
        The exam pdf to be saved inthe data directory
    """
    os.makedirs(exam_dir(exam_id), exist_ok=True)
    pdf_path = exam_pdf_path(exam_id)

    exam_pdf = PdfReader(exam_pdf_file)
    pagecount = len(exam_pdf.pages)

    if (pagecount % 2 == 0):
        exam_pdf_file.seek(0)
        exam_pdf_file.save(pdf_path)
        return

    new = PdfWriter()
    new.addpages(exam_pdf.pages)
    blank = PageMerge()
    box = exam_pdf.pages[0].MediaBox
    blank.mbox = box
    blank = blank.render()
    new.addpage(blank)

    new.write(pdf_path)
Пример #2
0
def re_arrange(file_path, output_file_name, dic):
    """
    The function reorder takes two arguments path and dic
    path is the path of the source pdf file which is in wrong
    order and then creates a modified pdf file with pages in the right order.
    Parameters:
        path : Path of the pdf file to be modified
        dic  : A dictionary with key value pairs of pages.
    Returns:
        None    
    """
    file_path = Path(file_path)
    # create a pdf object using PdfReader that could be read
    pdf_obj = PdfReader(file_path)
    # pdf_obj.pages attribute gives the length of the pages in pdf
    total_pages = len(pdf_obj.pages)
    print("Total Pages in PDF are:", total_pages)
    # Initialising the writer object using the PdfWriter class,from this we would create a new modified Pdf
    writer = PdfWriter()

    # new and old here mean the new position of the "old" page location
    for new, old in dic.items():
        # indexing pages list
        writer.addpage(pdf_obj.pages[old - 1])
        print(f"page{new} added from {old}")

    # accesing the name of the file without .pdf to save it with a new one
    writer.write(Path(os.path.dirname(file_path) + "\\" + output_file_name))
Пример #3
0
def ocr(tar_gz_filename, empty_page_threshold, language='eng'):
    tar = tarfile.open(tar_gz_filename)
    tar.extractall(path=TMP_DIR)
    env = os.environ.copy()
    env.update(dict(LD_LIBRARY_PATH=LIB_DIR, TESSDATA_PREFIX="{}/tessdata".format(SCRIPT_DIR)))

    output = PdfWriter()
    for filename in tar.getnames():
        cmd = ['./tesseract', '-l', language,
            '-c', 'min_orientation_margin=0', # don't leave out characters close to border
            '{}/{}'.format(TMP_DIR, filename),
            '{}/partial'.format(TMP_DIR),
            'pdf']
        try:
            out = subprocess.check_output(cmd, cwd=SCRIPT_DIR, env=env, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            print('tesseract call failed, here\'s the output so far:')
            print(e.output)
            sys.exit(1)
        print(out)
        for p in PdfReader("{}/{}".format(TMP_DIR, "partial.pdf")).pages:
            try:
                if int(p.Contents['/Length']) < empty_page_threshold:
                    continue
            except:
                # if in doubt add the page
                pass
            output.addpage(p)
    output.write('{}/output.pdf'.format(TMP_DIR))

    for f in ['partial.pdf', DOWNLOAD_FILE] + tar.getnames():
        os.remove("{}/{}".format(TMP_DIR, f))
    return '{}/output.pdf'.format(TMP_DIR)
Пример #4
0
    def post(self, request):
        serializer = BookSerializer(data=request.data)
        if serializer.is_valid():

            inpfn = serializer.validated_data['pdf']

            page_range = [
                int(y) for y in serializer.validated_data['page'].split('-')
            ]
            page_start = int(page_range[0])
            page_end = int(page_range[1])
            path = os.path.join(
                '/books/pdfs',
                'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            outfn = os.path.join(
                'media',
                'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            pages = PdfReader(inpfn).pages
            outdata = PdfWriter(outfn)
            page_range = (page_range + page_range[-1:])[:2]

            for pagenum in range(page_range[0], page_range[1] + 1):
                outdata.addpage(pages[pagenum - 1])
            outdata.write()
            serializer.validated_data['pdf'] = os.path.join(
                'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            serializer.save()

            return Response(serializer.data, status=status.HTTP_201_CREATED)
        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Пример #5
0
def subset_pdf(inp_file, ranges):  # Create PDF with subset pages

    ranges = ranges.split(' ')

    for x in ranges:  # If ranges is something like a word or negative
        for y in x.split('-'):
            try:
                int(y)
            except ValueError:
                return -1

    ranges = ([int(y) for y in x.split('-')] for x in ranges)
    pages = PdfReader(inp_file).pages
    out_data = PdfWriter(inp_file)
    num_pages = 0
    try:
        for one_range in ranges:
            one_range = (one_range + one_range[-1:])[:2]
            for page_num in range(one_range[0], one_range[1] + 1):
                out_data.addpage(pages[page_num - 1])
                num_pages += 1
    except IndexError:  # If user gave invalid pages
        return -1
    out_data.write()
    return num_pages
Пример #6
0
def _generate_thumbnail_image_content_file(document):
    content = None

    if document.file_on_server:
        content = document.unique_file.file_field.read()
    else:
        with requests.request('get', document.external_url,
                              stream=True) as response:
            content = response.content

    temp_pdf_path = os.path.join(settings.MEDIA_ROOT, 'document_thumbnails',
                                 'temp.pdf')
    with open(temp_pdf_path, 'w+') as f:
        f.write(content)

    reader = PdfReader(temp_pdf_path)

    if len(reader.pages) > 1:
        page = reader.pages[0]
        writer = PdfWriter()
        writer.addpage(page)
        writer.write(temp_pdf_path)

    images = Image(filename=temp_pdf_path, resolution=38)
    images.background_color = Color('white')
    images.alpha_channel = 'flatten'
    os.remove(temp_pdf_path)
    return ContentFile(images.make_blob('jpg'))
Пример #7
0
def delete(path, del_page):
    """
    The function delete takes two arguments path and del_page
    path is the path of the source pdf file.
    This function deletes the pages from the pdf file.

        Parameters:
        path      : Path of the pdf file.
        del_page  : A list of pages to be deleted.

           Returns:
               None    
    """
    # create a pdf object using PdfReader that could be read
    pdf_obj = PdfReader(path)
    # pdf_obj.pages attribute gives the length of the pages in pdf
    total_pages = len(pdf_obj.pages)
    print("Total Pages in PDF are:", total_pages)
    # Initialising the writer object using the PdfWriter class
    writer = PdfWriter()

    # Adding only those pages that we need to this list excluding del_page
    page_list = [
        page for page in range(1, total_pages + 1) if page not in del_page
    ]

    # Index of pdf_obj.pages starts from 0.
    for page in page_list:
        writer.addpage(pdf_obj.pages[page - 1])

    # removing the original pdf
    os.remove(path)
    # writing the modified file to the memory
    writer.write(path)
Пример #8
0
 def pdfrw(self):
     reader = PdfReader(self.file_name)
     writer = PdfWriter(self.output)
     for i in list(range(0, len(reader.pages))):
         writer.addpage(self._pdfrw_adjust(reader.pages[i]))
     writer.trailer.Info = IndirectPdfDict(reader.Info or {})
     writer.write()
Пример #9
0
def split(fname, usernames, folder, pages=1):
    """
    This function splits a big pdf into individual ones and names them 
    in the order given in a txt files. This function relies on the pdfrw library.

    Disclaimer: I have not tested this function on quizzes with multiple pages :)

    Args:
        fname (str):            Path to the large PDF to split. 
        usernames (str):        List of usernames, in order, to be used as file names. 
        folder (str, optional): Folder to save the new PDFs in. 
        pages (int, optional):  Number of pages to include in the smaller PDFs

    Returns: 
        This function does not return anything. 
    """

    infile = PdfReader(fname)
    page_num = 1
    for i in range(len(infile.pages)):
        out = PdfWriter()
        if page_num < pages:
            out.addpage(infile.pages[i])
            page_num += 1
        else:
            out.addpage(infile.pages[i])
            out.write("%s%s.pdf" % (folder, usernames.pop(0)))
            page_num = 1
Пример #10
0
def upload_book(request):
    if request.method == 'POST':
        form = BookForm(request.POST, request.FILES)
        if form.is_valid():
            form2 = form.save(commit=False)
            inpfn = form.cleaned_data['pdf']
            print(inpfn)
            
            page_range = [int(y) for y in form.cleaned_data['page'].split('-')]
            page_start = int(page_range[0])
            page_end = int(page_range[1])
            path = os.path.join('/books/pdfs', 'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            outfn = os.path.join('media', 'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            pages = PdfReader(inpfn).pages
            outdata = PdfWriter(outfn)  
            page_range = (page_range + page_range[-1:])[:2]
    
            for pagenum in range(page_range[0], page_range[1]+1):
                outdata.addpage(pages[pagenum-1])
            outdata.write()
            form2.pdf = os.path.join('extracted_page_{}-{}.pdf'.format(page_start, page_end))
            form2.save()
            return redirect('book_list')
    else:
        form = BookForm()
    return render(request, 'upload_book.html', {
        'form': form
    })
Пример #11
0
def upscale(file_name, scale=1.5, margin_x=0, margin_y=0, suffix='scaled', tempdir=None):
    """Upscale a PDF to a large size."""
    def adjust(page):
        info = PageMerge().add(page)
        x1, y1, x2, y2 = info.xobj_box
        viewrect = (margin_x, margin_y, x2 - x1 - 2 * margin_x, y2 - y1 - 2 * margin_y)
        page = PageMerge().add(page, viewrect=viewrect)
        page[0].scale(scale)
        return page.render()

    # Set output file name
    if tempdir:
        output = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name
    elif suffix:
        output = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix))
    else:
        output = NamedTemporaryFile(suffix='.pdf').name

    reader = PdfReader(file_name)
    writer = PdfWriter(output)
    for i in list(range(0, len(reader.pages))):
        writer.addpage(adjust(reader.pages[i]))
    writer.trailer.Info = IndirectPdfDict(reader.Info or {})
    writer.write()
    return output
Пример #12
0
def extract_crossword(file_path: Path,
                      output_path: Path,
                      overwrite: bool = True) -> Path:
    """Save page with crossword.

    Open a PDF document, search for a string identifying Le Monde
    crossword, and save the corresponding page to a file.

    Args:
      file_path: Path of the PDF document to process

      output_path: Path of the output directory

      overwrite: Whether to overwrite existing files (default to True)

    Returns:
      Path of the saved file

    """
    LOGGER.debug(f'Processing {file_path}')
    max_extracted_pages = 15
    rsrcmgr = PDFResourceManager(caching=True)
    crossword_page = None
    m = None
    with open(file_path, 'rb') as f:
        pages = [page for page in PDFPage.get_pages(f)]
        LOGGER.debug(f'Found {len(pages)} pages')
        first_checked_pageno = max(0, len(pages) - max_extracted_pages)
        LOGGER.debug(f'Searching last {max_extracted_pages} pages first')
        for i, page in enumerate(pages[first_checked_pageno:]):
            m = _search_in_page(page, rsrcmgr)
            if m:
                crossword_page = first_checked_pageno + i
                break

        if not crossword_page:
            LOGGER.debug(f'Extending search to all pages')
            for i, page in enumerate(pages[:first_checked_pageno]):
                m = _search_in_page(page, rsrcmgr)
                if m:
                    crossword_page = i
                    break

    if not crossword_page or not m:
        raise CrosswordNotFoundError

    LOGGER.debug(f'Crossword found on page {crossword_page}')

    path = output_path / '{}.pdf'.format(m.group(1))
    if path.exists() and not overwrite:
        LOGGER.debug(f'File already exist ${path}')
        raise FileAlreadyExistError

    x = PdfReader(file_path)
    page = x.pages[crossword_page]
    y = PdfWriter()
    y.addpage(page)
    y.write(path)

    return path
Пример #13
0
def go(inpfn, outfn):
    reader = PdfReader(inpfn, decompress=False)
    page, = reader.pages
    writer = PdfWriter()
    writer.addpage(adjust(page))
    writer.trailer.Info = IndirectPdfDict(reader.Info)
    writer.write(outfn)
Пример #14
0
def strip_pages_pdf(indir,
                    infile,
                    outdir=None,
                    outfile=None,
                    numpages=1,
                    keep=False):
    '''
    Deletes the first pages from a PDF. Omit outfile name to replace. Default is one page.
    If option keep is specified, keeps first pages of PDF, dropping rest.
    '''
    if outfile is None:
        outfile = infile

    if outdir is None:
        outdir = indir

    output = PdfWriter()
    inpath = os.path.join(indir, infile)
    outpath = os.path.join(outdir, outfile)

    for i, page in enumerate(PdfReader(inpath).pages):
        if not keep:
            if i > (numpages - 1):
                output.addpage(page)
        if keep:
            if i <= (numpages - 1):
                output.addpage(page)

    output.write(outpath)
Пример #15
0
def two_up(data):
    pdf = PdfReader(fdata=data)
    pages = PageMerge() + pdf.pages

    assert len(pages) == 2

    left, right = pages

    rotation = 270
    scale = 0.7071067811865476  # sqrt(0.5)

    x_increment = scale * pages.xobj_box[2]

    left.Rotate = rotation
    left.scale(scale)

    right.Rotate = rotation
    right.scale(scale)
    right.x = x_increment

    writer = PdfWriter()
    writer.addpage(pages.render())

    # retain and update metadata
    pdf.Info.Creator = 'modulo-nic.py %s' % __version__
    writer.trailer.Info = pdf.Info

    sys.stdout.write('Content-Type: application/x-pdf\n\n')
    writer.write(sys.stdout)
Пример #16
0
def addAction():
    fileName = 'report.pdf'

    pdf_writer = PdfWriter()
    pdf_reader = PdfReader(fileName)

    # JavaScript to be appended to PDF document.
    # To learn more please check:
    # "Developing Acrobat Applications Using JavaScript"
    # and "JavaScript for Acrobat API Reference"
    js = """

    // genIDForm
    var name = this.getField("nameText").value;
    var age = this.getField("ageText").value;
    var gender = this.getField("genderChoice").value;

    // genQuestion1
    var radioGroupSelectedVal = this.getField("q1").value;

    // genQuestion2
    var q2op1 = this.getField("q2op1").value;
    var q2op2 = this.getField("q2op2").value;
    var q2op3 = this.getField("q2op3").value;
    var q2op4 = this.getField("q2op4").value;
    
    var fieldsData = '';
    fieldsData += "Name: " + name + " Age: " + age + " Gender: " + gender + "\\n";
    fieldsData += "Feeling about JC: " + radioGroupSelectedVal + "\\n";
    fieldsData += "1) " + q2op1 + ", 2) " + q2op2 + ", 3) " + q2op3 + ", 4) " + q2op4; 
    app.alert(fieldsData);
    """

    # PDF document - Page 2
    last = pdf_reader.pages[1]

    # Note: We have just one form in the entire pdf!
    # Annots are form fields
    for field in last.Annots:
        # Each field is compound with dictionaries inside of dictionaries inside of dictionaries...inside of dictionaries.
        # buttonSubmit is the name we gave to the last text box - see survey.py line 64
        if (field.get('/T') == '(buttonSubmit)'):
            # AA - (Additional-Actions dictionary)
            #       Acrobat js api reference suggests this is where we should insert hidden actions - like javascript.
            #       For more please check: JavaScript for Acrobat API Reference
            #       Site: https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/js_api_reference.pdf
            #       Page: 303 or search for AA
            # D - An action that shall be performed when the mouse button is pressed inside the annotation's (field) active area.
            #       For more please check: Document management - Portable document format - part 1: PDF 1.7
            #       Site: https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
            #       Page: 423 or search for "Entries in an annotation's additional-actions dictionary"
            field.update(PdfDict(AA=PdfDict(D=make_js_action(js))))
            break

    # Make a copy of the original file
    for page in pdf_reader.pages:
        pdf_writer.addpage(page)

    pdf_writer.write('reportChanged.pdf')
Пример #17
0
def duplicate_pages(original_filepath, output_filename, num_of_duplicates):
    original = PdfReader(original_filepath)
    output = PdfWriter()
    for i in range(num_of_duplicates + 1):
        for page in original.pages:
            output.addpage(page)
    output_stream = open(app.config['DOWNLOAD_FOLDER'] + output_filename, 'wb')
    output.write(output_stream)
Пример #18
0
def split(number_of_pages, output):
    pdf_obj = PdfReader(r'C:\Users\DELL\Python stuffs\pdf pypy\reportlab-sample.pdf')
    total_pages = len(pdf_obj.pages)
    writer = PdfWriter()
    for page in range(number_of_pages):
        if page <= total_pages:
            writer.addpage(pdf_obj.pages[page])
    writer.write(output)
Пример #19
0
    def post(self, request):
        fontname_g = "HeiseiKakuGo-W5"
        pdfmetrics.registerFont(UnicodeCIDFont(fontname_g))
        buffer = io.BytesIO()
        cc = canvas.Canvas(buffer)
        cc.setFont(fontname_g, 24)
        page = PdfReader('media/pdf/sample.pdf', decompress=False).pages
        pp = pagexobj(page[0])
        # reader = PdfFileReader('media/pdf/sample.pdf')
        # writer = PdfFileWriter()
        test = {
            "test1":
            "test1",
            "test2":
            "S2",
            "test3":
            "テスト",
            "test4": [
                {
                    "key_label": "テスト1",
                    "flag": True
                },
                {
                    "key_label": "テスト2",
                    "flag": True
                },
                {
                    "key_label": "テスト3",
                    "flag": True
                },
            ]
        }
        a = request.data['test_list']
        for i in a:
            self.test(cc, i.get('contents'), test)
        cc.doForm(makerl(cc, pp))
        cc.showPage()
        cc.save()
        buffer.seek(0)
        # new_pdf = PdfFileReader(buffer)
        # existing_page = reader.getPage(0)
        # existing_page.mergePage(new_pdf.getPage(0))
        # writer.addPage(existing_page)

        # new = io.BytesIO()
        # writer.write(new)
        # new.seek(0)

        # output_pdf = open('media/pdf/sample2.pdf', 'wb')
        r = PdfReader(buffer)
        y = PdfWriter()
        y.addpage(r.pages[0])
        with open('media/pdf/sample2.pdf', 'wb') as f:
            y.write(f)
        # writer.write(output_pdf)
        # output_pdf.close()
        return Response({'detail': _('Successfully confirmed email.')},
                        status=status.HTTP_201_CREATED)
Пример #20
0
def main():

    parser = argparse.ArgumentParser(description="Strip ResearchGate additions from a PDF")
    parser.add_argument("infile", metavar="input-filename", type=str, nargs=1,
                        help="PDF file to process")
    parser.add_argument("outfile", metavar="output-filename", type=str, nargs=1,
                        help="name for processed output file")
    args = parser.parse_args()

    # This regular expression matches the form of the ResearchGate
    # underlinings in the content streams. We match against a truncated form
    # of the distinctive RGB triplet because it's not always given with
    # the same accuracy.
    # "0.3333333333 0.6941176471 0.9607843137"
    regex = re.compile(r"""(0\.33333[0-9]+ 0\.694117[0-9]+ 0\.960784[0-9]+ RG
\d+\.?\d* w
\d+\.?\d* \d+\.?\d* m
\d+\.?\d* \d+\.?\d* )l
S""")

    dict_pages = PdfReader(args.infile[0]).pages

    def fix_stream(contents):
        # Look for underlinings and make them invisible.
        if not hasattr(contents, "stream"):
            return
        s = contents.stream
        # We identify RG underlinings by their (hopefully unique)
        # RGB colour triplet.
        if s is not None and regex.search(s):
            # Minimal change: change the line draw commands to
            # moves, so no line is drawn. It would be more
            # satisfying to remove the stream entirely, but it's
            # simpler and safer to preserve the file structure
            # (in particular, the stream length) wherever possible.
            contents.stream = regex.sub("\\1m\nS", s)        

    for page in dict_pages:
        if "/Annots" in page:
            # Remove all annotations. This may of course cause some
            # collateral damage, but PDFs of articles don't usually have
            # annotations so probably this will just strip ResearchGate
            # links. If this becomes a problem, it should be easy to
            # identify RG annotations and remove only them.
            page.pop("/Annots")
        # There may be a stream in the Contents object and/or in its
        # children, so we check for both.
        fix_stream(page.Contents)
        for contents in page.Contents:
            fix_stream(contents)
    
    writer = PdfWriter()

    # Start at the second page to remove the ResearchGate cover sheet.
    for page in dict_pages[1:]:
        writer.addpage(page)
    writer.write(args.outfile[0])
Пример #21
0
def run_stage(src, out):
    i = PdfReader(src)
    o = PdfWriter()
    sum_i = len(i.pages)
    num_i = i.Root.PageLabels.Nums
    for r in range(1, len(num_i) // 2):
        o.addpage(i.pages[int(num_i[r * 2]) - 1])
    o.addpage(i.pages[sum_i - 1])
    o.write(out)
Пример #22
0
    def merge_attachment(self):
        filename = 'Print Packing List.pdf'
        picking_obj = self.env['stock.picking']
        picking = picking_obj.browse(self._context.get('active_ids'))
        lst = []
        writer = PdfWriter()
        for pick in picking:
            if pick.packing_list_bool:
                pick.packing_list_print_bool = True
            ship_name = 'Packing List' "%s" % pick.name
            attachments = self.env['ir.attachment'].search([
                ('res_id', '=', pick.id), ('name', '=', ship_name)
            ])
            for att in attachments:
                lst.append(att)

        def get4_fedex(srcpages):
            scale = 0.88
            srcpages = PageMerge() + srcpages
            x_increment, y_increment = (scale * i
                                        for i in srcpages.xobj_box[2:])
            for i, page in enumerate(srcpages):
                page.scale(scale)
            return srcpages.render()

        for pdf in lst:
            pages = PdfReader(BytesIO(base64.decodestring(pdf.datas))).pages
            pick1 = picking_obj.browse(pdf.res_id)
            for index in range(0, len(pages), 1):
                writer.addpage(get4_fedex(pages[index:index + 1]))

        s = BytesIO()
        writer.write(s)
        reader = PdfFileReader(s)
        writer = PdfFileWriter()
        for page in range(0, reader.getNumPages()):
            p = reader.getPage(page)
            writer.addPage(p)
        s = BytesIO()
        writer.write(s)
        out = base64.b64encode(s.getvalue())
        view_report_status_id = self.env['view.report'].create({
            'file_name':
            out,
            'datas_fname':
            filename
        })
        return {
            'res_id': view_report_status_id.id,
            'name': 'Print Packing List',
            'view_type': 'form',
            'view_mode': 'form',
            'res_model': 'view.report',
            'view_id': False,
            'type': 'ir.actions.act_window',
        }
Пример #23
0
def resize_2_a4(infn):
    outfn = infn[:-4] + '-A4.pdf'
    reader = PdfReader(infn)
    writer = PdfWriter(outfn)
    a4_size = get_size('A4.pdf', 0)
    params = get_scale_margin(infn, a4_size, 0)
    for page in reader.pages:
        writer.addpage(adjust(page, params))
    writer.trailer.Info = IndirectPdfDict(reader.Info or {})
    writer.write()
Пример #24
0
def test_cut(start, end):
    ipdf = PdfReader('book.pdf')
    opdf = PdfWriter()
    for i in range(start, end):
        opdf.addpage(ipdf.pages[i])
    opdf.write('pdfs/result.pdf')

    ipdf = PdfReader('../pdfs/result.pdf')

    return send_file('../pdfs/result.pdf')
Пример #25
0
def rotate(path, bad_page):
    output=path[:-4]
    reader = PdfReader(path)
    writer = PdfWriter()
    pages = reader.pages
    for page in range(len(pages)):
        if page == bad_page :
            pages[bad_page].Rotate = 90
            writer.addpage(pages[bad_page])
    writer.write(output)
Пример #26
0
 def appendPdfs(self, src, dst):
     from pdfrw import PdfReader, PdfWriter
     new_pdf = PdfWriter()
     x = PdfReader(src)
     y = PdfReader(dst)
     new_pdf.addpage(x.pages[0])
     print("Janraj")
     new_pdf.addpage(y.pages[0])
     print("CJ")
     new_pdf.write("result.pdf")
Пример #27
0
def rotate_odd(path, output):
    reader = PdfReader(path)
    writer = PdfWriter()
    pages = reader.pages

    for page in range(len(pages)):
        if page % 2:
            pages[page].Rotate = 90
            writer.addpage(pages[page])

    writer.write(output)
Пример #28
0
def combine(inpfn, outfn, x, y, gap):
    # Read all pages from input file
    pages = PdfReader(inpfn).pages
    
    # Object to write output PDF
    writer = PdfWriter()

    while pages:
        writer.addpage(getPages(pages, x, y, gap))
    
    writer.write(outfn)
Пример #29
0
def rotate_all_page(path):
    output=path[:-4] +'_converted.pdf'
    reader = PdfReader(path)
    writer = PdfWriter()
    pages = reader.pages
    for page in range(len(pages)):
        pages[page].Rotate = 90
        writer.addpage(pages[page])
    writer.write(output)
    return(output)
    print("It worked ooo")
Пример #30
0
def split(path, number_of_pages, output):
    pdf_obj = PdfReader(path)
    total_pages = len(pdf_obj.pages)

    writer = PdfWriter()

    for page in range(number_of_pages):
        if page <= total_pages:
            writer.addpage(pdf_obj.pages[page])

    writer.write(output)
Пример #31
0
def pdf(rm_files_path, path_original_pdf, path_annotated_pdf, path_oap_pdf):
    """ Render pdf with annotations. The path_oap_pdf defines the pdf 
        which includes only annotated pages.
    """

    base_pdf = PdfReader(open(path_original_pdf, "rb"))

    # Parse remarkable files and write into pdf
    annotations_pdf = []

    for page_nr in range(base_pdf.numPages):
        rm_file_name = "%s/%d" % (rm_files_path, page_nr)
        rm_file = "%s.rm" % rm_file_name
        if not os.path.exists(rm_file):
            annotations_pdf.append(None)
            continue

        page_layout = base_pdf.pages[page_nr].MediaBox
        crop_box = base_pdf.pages[page_nr].CropBox
        if page_layout is None:
            page_layout = base_pdf.pages[page_nr].ArtBox

            if page_layout is None:
                annotations_pdf.append(None)
                continue

        image_width, image_height = float(page_layout[2]), float(
            page_layout[3])
        annotated_page = _render_rm_file(rm_file_name,
                                         image_width=image_width,
                                         image_height=image_height,
                                         crop_box=crop_box)
        if len(annotated_page.pages) <= 0:
            annotations_pdf.append(None)
        else:
            page = annotated_page.pages[0]
            annotations_pdf.append(page)

    # Merge annotations pdf and original pdf
    writer_full = PdfWriter()
    writer_oap = PdfWriter()
    for i in range(base_pdf.numPages):
        annotations_page = annotations_pdf[i]

        if annotations_page != None:
            merger = PageMerge(base_pdf.pages[i])
            merger.add(annotations_page).render()
            writer_oap.addpage(base_pdf.pages[i])

        writer_full.addpage(base_pdf.pages[i])

    writer_full.write(path_annotated_pdf)
    writer_oap.write(path_oap_pdf)
Пример #32
0
def Add_Title_Page(input_file, title_file, output_file, page):

    # define the reader and writer objects
    reader_input = PdfReader(input_file)
    writer_output = PdfWriter()
    watermark_input = PdfReader(title_file)
    watermark = watermark_input.pages[page]

    writer_output.addpage(watermark)
    writer_output.addpages(reader_input.pages)

    writer_output.write(output_file)
Пример #33
0
 def get(self,id):
     inpfn = 'teste.pdf'
     ranges = [id]
     #
     assert ranges, "Expected at least one range"
     #
     ranges = ([int(y) for y in x.split('-')] for x in ranges)
     outfn = '%sfrag' % os.path.basename(inpfn)
     pages = PdfReader(inpfn).pages
     outdata = PdfWriter()
     #
     for onerange in ranges:
         onerange = (onerange + onerange[-1:])[:2]
         for pagenum in range(onerange[0], onerange[1]+1):
             outdata.addpage(pages[pagenum-1])
     outdata.write(outfn)
     #
     pdfout = base64.encodestring(open(outfn,"rb").read())
     #
     self.write('<iframe src="data:application/pdf;base64,'+pdfout+'" style="position:fixed; top:0px; left:0px; bottom:0px; right:0px; width:100%; height:100%; border:none; margin:0; padding:0; overflow:hidden; z-index:999999;"/>')
Пример #34
0
So she did an 8.5x11" output with 0.5" margin all around
(actual size of useful area 7.5x10") and we scaled it
up by 4.8.

We also copy the Info dict to the new PDF.

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict


def adjust(page, margin=36, scale=4.8):
    info = PageMerge().add(page)
    x1, y1, x2, y2 = info.xobj_box
    viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin)
    page = PageMerge().add(page, viewrect=viewrect)
    page[0].scale(scale)
    return page.render()


inpfn, = sys.argv[1:]
outfn = 'poster.' + os.path.basename(inpfn)
reader = PdfReader(inpfn)
writer = PdfWriter(outfn)
writer.addpage(adjust(reader.pages[0]))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write()
import re
import sys
import os

from pdfrw import PdfReader, PdfWriter

loc_pages = "pages"
loc_books = "books"

# Store the human-coded page nums in simple txt files separated by commas
for f in os.listdir(loc_pages):
    with open(os.path.join(loc_pages, f)) as handle:
        data = handle.read().strip('\n')
        data = [int(p) for p in data.split(',')]
        print data
        
        # Corresponding book has same filename, diff extension
        path_book = os.path.splitext(f)[0] + ".pdf"
        path_book = os.path.join(loc_books, path_book)
        
        # Try to open it
        pages = PdfReader(path_book).pages
        out_data = PdfWriter()
        
        for p in data:
            out_data.addpage(pages[p-1])
        
        out_data.write('subset.%s' % os.path.basename(path_book))
Пример #36
0
args = parser.parse_args()

# The shuffling magic
even = PdfReader(args.evenFile[0])
odd = PdfReader(args.oddFile[0])
isEvenReversed = args.evenrev;
isOddReversed = args.oddrev;
all = PdfWriter()
blank = PageMerge()
blank.mbox = [0, 0, 612, 792] # 8.5 x 11
blank = blank.render()

if isEvenReversed and not isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[i])
        all.addpage(even.pages[len(even.pages)-1-i])
elif isOddReversed and not isEvenReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages)-1-i])
        all.addpage(even.pages[i])
elif isEvenReversed and isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages)-1-i])
        all.addpage(even.pages[len(even.pages)-1-i])
else:
    for x,y in zip(odd.pages, even.pages):
      all.addpage(x)
      all.addpage(y)

all.write(args.resultFile[0])
# /usr/bin/python
# coding=utf-8

from pdfrw import PdfReader, PdfWriter

# Путь до автореферата
synopsis_path = '../synopsis.pdf'
# Путь до титульника Научного Доклада ГИА (должно быть две страницы: титульник и пустая)
gia_title_path = './gia_title.pdf'


synopsis = PdfReader(synopsis_path)
gia_title = PdfReader(gia_title_path)

sci_rep = PdfWriter()


for i, p in enumerate(synopsis.pages):
    if i < 2:
        sci_rep.addpage(gia_title.pages[i])
    else:
        sci_rep.addpage(p)


# Сохранение результата
sci_rep.write('./sci_rep.pdf')

Пример #38
0
def go(inpfn, outfn):
    pages = PdfReader(inpfn, decompress=False).pages
    writer = PdfWriter()
    while pages:
        writer.addpage(get4(pages))
    writer.write(outfn)
    page.AA = PdfDict()
    # You probably should just wrap each JS action with a try/catch,
    # because Chrome does no error reporting or even logging otherwise;
    # you just get a silent failure.
    page.AA.O = make_js_action("""
try {
  %s
} catch (e) {
  app.alert(e.message);
}
    """ % (script))

    page.Annots = PdfArray(annots)
    return page

if len(sys.argv) > 1:
    js_file = open(sys.argv[1], 'r')

    fields = []
    for line in js_file:
        if not line.startswith('/// '): break
        pieces = line.split()
        params = [pieces[1]] + [float(token) for token in pieces[2:]]
        fields.append(make_field(*params))

    js_file.seek(0)

    out = PdfWriter()
    out.addpage(make_page(fields, js_file.read()))
    out.write('result.pdf')
Пример #40
0
import os
import sys
from pdfrw import PdfReader, PdfWriter

if len(sys.argv) != 2:
    print("Usage: InvertOrder.py FILETOINVERT")
    sys.exit()

filename = sys.argv[1]
output = PdfWriter()

for p in reversed(PdfReader(filename).pages):
    output.addpage(p)

fname, fext = os.path.splitext(filename)
outname = fname + "_inv" + fext

print("Writing output to "+outname)

output.write(outname)
var BALL_HEIGHT = %(BALL_HEIGHT)s;

var BRICK_ROW_COUNT = %(BRICK_ROW_COUNT)s;
var BRICK_COLUMN_COUNT = %(BRICK_COLUMN_COUNT)s;
var BRICK_WIDTH = %(BRICK_WIDTH)s;
var BRICK_HEIGHT = %(BRICK_HEIGHT)s;
var BRICK_PADDING = %(BRICK_PADDING)s;

var BRICK_OFFSET_BOTTOM = %(BRICK_OFFSET_BOTTOM)s;
var BRICK_OFFSET_LEFT = %(BRICK_OFFSET_LEFT)s;

%(script)s

""" % locals())

page.Contents.stream = """
BT
/F1 24 Tf
150 300 Td (Move your mouse down here!) Tj
40 -100 Td (also, README below...) Tj
ET
"""

readme = PdfReader('README.pdf')

out = PdfWriter()
out.addpage(page)
for readme_page in readme.pages:
    out.addpage(readme_page)
out.write('breakout.pdf')
Пример #42
0
    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))
            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem),
                    get_pdf_background(psem),
                    asyncio.gather(*[fg.pdf_image(psem)
                                     for fg in page.foreground]),
                    asyncio.gather(*[get_pdf_mask(fg, psem)
                                     for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray([0, 0,
                                          PdfNumber(page.width),
                                          PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" +
                               "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " +
                             "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None and
                        current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" +
                           "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(),
                        PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [PdfNumber(t.x),
                                      PdfNumber(t.y),
                                      PdfNumber(t.x + t.width),
                                      PdfNumber(t.y + t.height)]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page,
                            PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y),
                            0]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"),
                        9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))
        finished_pages = 0
        yield from asyncio.gather(
            *[make_page(page, pdf_page, process_semaphore)
              for page, pdf_page in zip(self._pages, pdf_pages)])

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [QPDF_CMD,
                   "--stream-data=preserve",
                   "--object-streams=preserve",
                   "--normalize-content=n"]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([path.abspath(path.join(temp_dir, "temp.pdf")),
                        path.abspath(outfile)])
            yield from run_command_async(cmd, process_semaphore)
Пример #43
0
usage:   4up.py my.pdf

Creates 4up.my.pdf with a single output page for every
4 input pages.
"""

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge


def get4(srcpages):
    scale = 0.5
    srcpages = PageMerge() + srcpages
    x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
    for i, page in enumerate(srcpages):
        page.scale(scale)
        page.x = x_increment if i & 1 else 0
        page.y = 0 if i & 2 else y_increment
    return srcpages.render()


inpfn, = sys.argv[1:]
outfn = "4up." + os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
writer = PdfWriter()
for index in range(0, len(pages), 4):
    writer.addpage(get4(pages[index : index + 4]))
writer.write(outfn)
Пример #44
0
#!/usr/bin/env python

'''
usage:   subset.py my.pdf page[range] [page[range]] ...
         eg. subset.py 1-3 5 7-9

Creates subset.my.pdf

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
ranges = sys.argv[2:]
assert ranges, "Expected at least one range"

ranges = ([int(y) for y in x.split('-')] for x in ranges)
outfn = 'subset.%s' % os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
outdata = PdfWriter(outfn)

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0], onerange[1]+1):
        outdata.addpage(pages[pagenum-1])
outdata.write()