示例#1
0
def concatenate_files(path_list, pdfa=False, password=None):
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    subprocess_arguments = [PDFTK_PATH]
    new_path_list = list()
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype.startswith('image'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                       mode="wb",
                                                       suffix=".pdf",
                                                       delete=False)
            args = ["convert", path, new_pdf_file.name]
            result = call(args)
            if result != 0:
                logmessage("failed to convert image to PDF: " + " ".join(args))
                continue
            new_path_list.append(new_pdf_file.name)
        elif mimetype in (
                'application/rtf',
                'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                'application/msword',
                'application/vnd.oasis.opendocument.text'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                       mode="wb",
                                                       suffix=".pdf",
                                                       delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
                ext = 'docx'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            docassemble.base.pandoc.word_to_pdf(path,
                                                ext,
                                                new_pdf_file.name,
                                                pdfa=False)
            new_path_list.append(new_pdf_file.name)
        elif mimetype == 'application/pdf':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    subprocess_arguments.extend(new_path_list)
    subprocess_arguments.extend(['cat', 'output', pdf_file.name])
    #logmessage("Arguments are " + str(subprocess_arguments))
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to concatenate PDF files")
        raise DAError(
            "Call to pdftk failed for concatenation where arguments were " +
            " ".join(subprocess_arguments))
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
示例#2
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None):
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    while tries < 5:
        subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file]
        p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
        result = p.wait()
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        time.sleep(2 + tries*random.random())
        continue
    if result == 0:
        if pdfa:
            pdf_to_pdfa(to_file)
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
示例#3
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_references=False):
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    while tries < 5:
        if update_references:
            subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.PysIndexerPdf(' + from_file + ',' + to_file + ')']
        else:
            subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file]
        p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
        result = p.wait()
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        time.sleep(2 + tries*random.random())
        logmessage("Retrying libreoffice with " + repr(subprocess_arguments))
        continue
    if result == 0:
        if pdfa:
            pdf_to_pdfa(to_file)
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
示例#4
0
def concatenate_files(path_list, pdfa=False, password=None):
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
    subprocess_arguments = [PDFTK_PATH]
    new_path_list = list()
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype.startswith('image'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            args = [daconfig.get('imagemagick', 'convert'), path, new_pdf_file.name]
            try:
                result = subprocess.run(args, timeout=60).returncode
            except subprocess.TimeoutExpired:
                logmessage("concatenate_files: convert took too long")
                result = 1
            if result != 0:
                logmessage("failed to convert image to PDF: " + " ".join(args))
                continue
            new_path_list.append(new_pdf_file.name)
        elif mimetype in ('application/rtf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/msword', 'application/vnd.oasis.opendocument.text'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
                ext = 'docx'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            word_to_pdf(path, ext, new_pdf_file.name, pdfa=False)
            new_path_list.append(new_pdf_file.name)
        elif mimetype == 'application/pdf':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    subprocess_arguments.extend(new_path_list)
    subprocess_arguments.extend(['cat', 'output', pdf_file.name])
    #logmessage("Arguments are " + str(subprocess_arguments))
    try:
        result = subprocess.run(subprocess_arguments, timeout=60).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("concatenate_files: call to cat took too long")
    if result != 0:
        logmessage("Failed to concatenate PDF files")
        raise DAError("Call to pdftk failed for concatenation where arguments were " + " ".join(subprocess_arguments))
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    replicate_js_and_calculations(new_path_list[0], pdf_file.name, password)
    return pdf_file.name
示例#5
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False):
    temp_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".md")
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    subprocess_arguments = [
        LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file
    ]
    p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
    result = p.wait()
    if result == 0:
        if pdfa:
            pdf_to_pdfa(to_file)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
示例#6
0
def concatenate_files(path_list, pdfa=False, password=None):
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
    subprocess_arguments = [PDFTK_PATH]
    new_path_list = list()
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype.startswith('image'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            args = ["convert", path, new_pdf_file.name]
            result = call(args)
            if result != 0:
                logmessage("failed to convert image to PDF: " + " ".join(args))
                continue
            new_path_list.append(new_pdf_file.name)
        elif mimetype in ('application/rtf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/msword', 'application/vnd.oasis.opendocument.text'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
                ext = 'docx'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            word_to_pdf(path, ext, new_pdf_file.name, pdfa=False)
            new_path_list.append(new_pdf_file.name)
        elif mimetype == 'application/pdf':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    subprocess_arguments.extend(new_path_list)
    subprocess_arguments.extend(['cat', 'output', pdf_file.name])
    #logmessage("Arguments are " + str(subprocess_arguments))
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to concatenate PDF files")
        raise DAError("Call to pdftk failed for concatenation where arguments were " + " ".join(subprocess_arguments))
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    replicate_js_and_calculations(new_path_list[0], pdf_file.name, password)
    return pdf_file.name
示例#7
0
def fill_template(template,
                  data_strings=None,
                  data_names=None,
                  hidden=None,
                  readonly=None,
                  images=None,
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None,
                  template_password=None,
                  default_export_value=None):
    if data_strings is None:
        data_strings = []
    if data_names is None:
        data_names = []
    if hidden is None:
        hidden = []
    if readonly is None:
        readonly = []
    if images is None:
        images = []
    if pdf_url is None:
        pdf_url = 'file.pdf'
    if not pdf_url.endswith('.pdf'):
        pdf_url += '.pdf'
    the_fields = read_fields(template)
    if len(the_fields) == 0:
        raise DAError("PDF template has no fields in it.")
    export_values = {}
    for field, default, pageno, rect, field_type, export_value in the_fields:
        field_type = re.sub(r'[^/A-Za-z]', '', str(field_type))
        if field_type in ('/Btn', "/'Btn'"):
            export_values[
                field] = export_value or default_export_value or 'Yes'
    if len(export_values) > 0:
        new_data_strings = []
        for key, val in data_strings:
            if key in export_values:
                if str(val) in ('Yes', 'yes', 'True', 'true', 'On', 'on',
                                export_values[key]):
                    val = export_values[key]
                else:
                    if export_values[key] == 'On':
                        val = 'Off'
                    elif export_values[key] == 'on':
                        val = 'off'
                    elif export_values[key] == 'yes':
                        val = 'no'
                    else:
                        val = 'No'
            new_data_strings.append((key, val))
        data_strings = new_data_strings
    data_dict = {}
    for key, val in data_strings:
        data_dict[key] = val
    fdf = Xfdf(pdf_url, data_dict)
    #fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".xfdf",
                                           delete=False)
    #fdf_file.write(fdf)
    fdf_file.close()
    fdf.write_xfdf(fdf_file.name)
    #     if False:
    #         fdf_dict = {}
    #         for key, val in data_strings:
    #             fdf_dict[key] = val
    #         xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
    #         xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
    # ".xfdf", delete=False)
    #         shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                    mode="wb",
                                                    suffix=".pdf",
                                                    delete=False)
        qpdf_subprocess_arguments = [
            QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images) > 0:
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    try:
        result = subprocess.run(subprocess_arguments, timeout=600,
                                check=False).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("fill_template: call to pdftk fill_form took too long")
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images) > 0:
        fields = {}
        for field, default, pageno, rect, field_type, export_value in the_fields:
            if str(field_type) in ('/Sig', "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = []
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                daconfig.get('imagemagick',
                             'convert'), file_info['fullpath'], "-trim",
                "+repage", "+profile", '*', '-density', '0', temp_png.name
            ]
            try:
                result = subprocess.run(args, timeout=60,
                                        check=False).returncode
            except subprocess.TimeoutExpired:
                logmessage("fill_template: convert took too long")
                result = 1
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if dppx > dppy:
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                           mode="wb",
                                                           suffix=".pdf",
                                                           delete=False)
            args = [
                daconfig.get('imagemagick', 'convert'), temp_png.name,
                "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            try:
                result = subprocess.run(args, timeout=60,
                                        check=False).returncode
            except subprocess.TimeoutExpired:
                result = 1
                logmessage("fill_template: call to convert took too long")
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({
                'overlay_file': overlay_pdf_file.name,
                'pageno': fields[field]['pageno']
            })
        if len(image_todo) > 0:
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            original = safe_pypdf_reader(pdf_file.name)
            original.idnum_to_page = get_page_hash(original.trailer)
            catalog = original.trailer["/Root"]
            writer = DAPdfFileWriter()
            tree = {}
            for part in pdf_parts:
                if part in catalog:
                    tree[part] = catalog[part]
            for i in range(original.getNumPages()):
                for item in image_todo:
                    if (item['pageno'] - 1) == i:
                        page = original.getPage(i)
                        foreground_file = safe_pypdf_reader(
                            item['overlay_file'])
                        foreground_page = foreground_file.getPage(0)
                        page.mergePage(foreground_page)
            for i in range(original.getNumPages()):
                newpage = original.getPage(i)
                writer.addPage(newpage)
            for key, val in tree.items():
                writer._root_object.update(
                    {pypdf.generic.NameObject(key): val})
            writer.page_list = []
            recursive_get_pages(writer._root_object['/Pages'],
                                writer.page_list)
            try:
                recursive_add_bookmark(original, writer,
                                       original.getOutlines())
            except:
                pass
            with open(new_pdf_file.name, "wb") as outFile:
                writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
    if (not editable) and len(images) > 0:
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
示例#8
0
def fill_template(template,
                  data_strings=[],
                  data_names=[],
                  hidden=[],
                  readonly=[],
                  images=[],
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None,
                  template_password=None):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".fdf",
                                           delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                    mode="wb",
                                                    suffix=".pdf",
                                                    delete=False)
        qpdf_subprocess_arguments = [
            QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images):
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = list()
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                "convert", file_info['fullpath'], "-trim", "+repage",
                temp_png.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                           mode="wb",
                                                           suffix=".pdf",
                                                           delete=False)
            args = [
                "convert", temp_png.name, "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({
                'overlay_stream':
                open(overlay_pdf_file.name, "rb"),
                'pageno':
                fields[field]['pageno']
            })
        if len(image_todo):
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            with open(pdf_file.name, "rb") as inFile:
                original = pypdf.PdfFileReader(inFile)
                original.idnum_to_page = get_page_hash(original.trailer)
                catalog = original.trailer["/Root"]
                writer = DAPdfFileWriter()
                tree = dict()
                for part in pdf_parts:
                    if part in catalog:
                        tree[part] = catalog[part]
                for i in range(original.getNumPages()):
                    for item in image_todo:
                        if (item['pageno'] - 1) == i:
                            page = original.getPage(i)
                            foreground_file = pypdf.PdfFileReader(
                                item['overlay_stream'])
                            foreground_page = foreground_file.getPage(0)
                            page.mergePage(foreground_page)
                for i in range(original.getNumPages()):
                    newpage = original.getPage(i)
                    writer.addPage(newpage)
                for key, val in tree.items():
                    writer._root_object.update(
                        {pypdf.generic.NameObject(key): val})
                writer.page_list = list()
                recursive_get_pages(writer._root_object['/Pages'],
                                    writer.page_list)
                recursive_add_bookmark(original, writer,
                                       original.getOutlines())
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
            for item in image_todo:
                item['overlay_stream'].close()
    if (not editable) and len(images):
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
示例#9
0
def fill_template(template,
                  data_strings=[],
                  data_names=[],
                  hidden=[],
                  readonly=[],
                  images=[],
                  pdf_url=None,
                  editable=True,
                  pdfa=False):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".fdf",
                                           delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    subprocess_arguments = [
        PDFTK_PATH, template, 'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    logmessage("Arguments are " + str(subprocess_arguments))
    if not editable:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) == '/Sig':
                fields[field] = {'pageno': pageno, 'rect': rect}
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            logmessage("Need to put image on page " +
                       str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                "convert", file_info['fullpath'], "-trim", "+repage",
                temp_png.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                           suffix=".pdf")
            args = [
                "convert", temp_png.name, "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            with open(pdf_file.name,
                      "rb") as inFile, open(overlay_pdf_file.name,
                                            "rb") as overlay:
                original = pypdf.PdfFileReader(inFile)
                background = original.getPage(fields[field]['pageno'] - 1)
                foreground = pypdf.PdfFileReader(overlay).getPage(0)
                background.mergePage(foreground)
                writer = pypdf.PdfFileWriter()
                for i in range(original.getNumPages()):
                    page = original.getPage(i)
                    writer.addPage(page)
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    return pdf_file.name
示例#10
0
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=None, editable=True, pdfa=False, password=None, template_password=None):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".fdf", delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
        qpdf_subprocess_arguments = [QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output', pdf_file.name]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images):
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = list()
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) + " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = ["convert", file_info['fullpath'], "-trim", "+repage", temp_png.name]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width/(xtwo-xone)
            dppy = height/(ytwo-yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone*dpp+width, yone*dpp+height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            args = ["convert", temp_png.name, "-background", "none", "-density", str(int(dpp*72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({'overlay_stream': open(overlay_pdf_file.name, "rb"), 'pageno': fields[field]['pageno']})
        if len(image_todo):
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf")
            with open(pdf_file.name, "rb") as inFile:
                original = pypdf.PdfFileReader(inFile)
                original.idnum_to_page = get_page_hash(original.trailer)
                catalog = original.trailer["/Root"]
                writer = DAPdfFileWriter()
                tree = dict()
                for part in pdf_parts:
                    if part in catalog:
                        tree[part] = catalog[part]
                for i in range(original.getNumPages()):
                    for item in image_todo:
                        if (item['pageno'] - 1) == i:
                            page = original.getPage(i)
                            foreground_file = pypdf.PdfFileReader(item['overlay_stream'])
                            foreground_page = foreground_file.getPage(0)
                            page.mergePage(foreground_page)
                for i in range(original.getNumPages()):
                    newpage = original.getPage(i)
                    writer.addPage(newpage)
                for key, val in tree.items():
                    writer._root_object.update({pypdf.generic.NameObject(key): val})
                writer.page_list = list()
                recursive_get_pages(writer._root_object['/Pages'], writer.page_list)
                recursive_add_bookmark(original, writer, original.getOutlines())
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
            for item in image_todo:
                item['overlay_stream'].close()
    if (not editable) and len(images):
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name