Пример #1
0
 def test_gen_xfdf(self):
     xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA)
     xfdf = read(xfdf_path)
     expected = read(TEST_XFDF_PATH)
     # XML can have sibling elements in different order. So: 
     # * Parse the XML, get list of the root's children, convert to string, sort
     xfdf_standard_order     = [ET.tostring(i) for i in list(ET.fromstring(xfdf).iter())]
     expected_standard_order = [ET.tostring(i) for i in list(ET.fromstring(expected).iter())]
     xfdf_standard_order.sort()
     expected_standard_order.sort()
     self.assertEqual(xfdf_standard_order, expected_standard_order)
Пример #2
0
 def test_gen_xfdf(self):
     xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA)
     xfdf = read(xfdf_path)
     expected = read(TEST_XFDF_PATH)
     # XML can have sibling elements in different order. So:
     # * Parse the XML, get list of the root's children, convert to string, sort
     xfdf_standard_order = [
         ET.tostring(i) for i in list(ET.fromstring(xfdf).iter())
     ]
     expected_standard_order = [
         ET.tostring(i) for i in list(ET.fromstring(expected).iter())
     ]
     xfdf_standard_order.sort()
     expected_standard_order.sort()
     self.assertEqual(xfdf_standard_order, expected_standard_order)
Пример #3
0
def fill_template(template,
                  data_strings=[],
                  data_names=[],
                  hidden=[],
                  readonly=[],
                  images=[],
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None,
                  template_password=None,
                  default_export_value=None):
    if pdf_url is None:
        pdf_url = 'file.pdf'
    if not pdf_url.endswith('.pdf'):
        pdf_url += '.pdf'
    the_fields = read_fields(template)
    export_values = dict()
    for field, default, pageno, rect, field_type, export_value in the_fields:
        field_type = re.sub(r'[^/A-Za-z]', '', str(field_type))
        if field_type in ('/Btn', "/'Btn'"):
            export_values[
                field] = export_value or default_export_value or 'Yes'
    if len(export_values):
        new_data_strings = list()
        for key, val in data_strings:
            if key in export_values:
                if str(val) in ('Yes', 'yes', 'True', 'true', 'On', 'on',
                                export_values[key]):
                    val = export_values[key]
                else:
                    if export_values[key] == 'On':
                        val = 'Off'
                    elif export_values[key] == 'on':
                        val = 'off'
                    elif export_values[key] == 'yes':
                        val = 'no'
                    else:
                        val = 'No'
            new_data_strings.append((key, val))
        data_strings = new_data_strings
    data_dict = {}
    for key, val in data_strings:
        data_dict[key] = val
    fdf = Xfdf(pdf_url, data_dict)
    #fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".xfdf",
                                           delete=False)
    #fdf_file.write(fdf)
    fdf_file.close()
    fdf.write_xfdf(fdf_file.name)
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                    mode="wb",
                                                    suffix=".pdf",
                                                    delete=False)
        qpdf_subprocess_arguments = [
            QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images):
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    try:
        result = subprocess.run(subprocess_arguments, timeout=600).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("fill_template: call to pdftk fill_form took too long")
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type, export_value in the_fields:
            if str(field_type) in ('/Sig', "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = list()
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                daconfig.get('imagemagick',
                             'convert'), file_info['fullpath'], "-trim",
                "+repage", "+profile", '*', '-density', '0', temp_png.name
            ]
            try:
                result = subprocess.run(args, timeout=60).returncode
            except subprocess.TimeoutExpired:
                logmessage("fill_template: convert took too long")
                result = 1
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                           mode="wb",
                                                           suffix=".pdf",
                                                           delete=False)
            args = [
                daconfig.get('imagemagick', 'convert'), temp_png.name,
                "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            try:
                result = subprocess.run(args, timeout=60).returncode
            except subprocess.TimeoutExpired:
                result = 1
                logmessage("fill_template: call to convert took too long")
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({
                'overlay_file': overlay_pdf_file.name,
                'pageno': fields[field]['pageno']
            })
        if len(image_todo):
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            original = safe_pypdf_reader(pdf_file.name)
            original.idnum_to_page = get_page_hash(original.trailer)
            catalog = original.trailer["/Root"]
            writer = DAPdfFileWriter()
            tree = dict()
            for part in pdf_parts:
                if part in catalog:
                    tree[part] = catalog[part]
            for i in range(original.getNumPages()):
                for item in image_todo:
                    if (item['pageno'] - 1) == i:
                        page = original.getPage(i)
                        foreground_file = safe_pypdf_reader(
                            item['overlay_file'])
                        foreground_page = foreground_file.getPage(0)
                        page.mergePage(foreground_page)
            for i in range(original.getNumPages()):
                newpage = original.getPage(i)
                writer.addPage(newpage)
            for key, val in tree.items():
                writer._root_object.update(
                    {pypdf.generic.NameObject(key): val})
            writer.page_list = list()
            recursive_get_pages(writer._root_object['/Pages'],
                                writer.page_list)
            recursive_add_bookmark(original, writer, original.getOutlines())
            with open(new_pdf_file.name, "wb") as outFile:
                writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
    if (not editable) and len(images):
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
Пример #4
0
def fill_template(template,
                  data_strings=[],
                  data_names=[],
                  hidden=[],
                  readonly=[],
                  images=[],
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None,
                  template_password=None):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".fdf",
                                           delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                    mode="wb",
                                                    suffix=".pdf",
                                                    delete=False)
        qpdf_subprocess_arguments = [
            QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images):
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = list()
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                "convert", file_info['fullpath'], "-trim", "+repage",
                temp_png.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                           mode="wb",
                                                           suffix=".pdf",
                                                           delete=False)
            args = [
                "convert", temp_png.name, "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({
                'overlay_stream':
                open(overlay_pdf_file.name, "rb"),
                'pageno':
                fields[field]['pageno']
            })
        if len(image_todo):
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            with open(pdf_file.name, "rb") as inFile:
                original = pypdf.PdfFileReader(inFile)
                original.idnum_to_page = get_page_hash(original.trailer)
                catalog = original.trailer["/Root"]
                writer = DAPdfFileWriter()
                tree = dict()
                for part in pdf_parts:
                    if part in catalog:
                        tree[part] = catalog[part]
                for i in range(original.getNumPages()):
                    for item in image_todo:
                        if (item['pageno'] - 1) == i:
                            page = original.getPage(i)
                            foreground_file = pypdf.PdfFileReader(
                                item['overlay_stream'])
                            foreground_page = foreground_file.getPage(0)
                            page.mergePage(foreground_page)
                for i in range(original.getNumPages()):
                    newpage = original.getPage(i)
                    writer.addPage(newpage)
                for key, val in tree.items():
                    writer._root_object.update(
                        {pypdf.generic.NameObject(key): val})
                writer.page_list = list()
                recursive_get_pages(writer._root_object['/Pages'],
                                    writer.page_list)
                recursive_add_bookmark(original, writer,
                                       original.getOutlines())
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
            for item in image_todo:
                item['overlay_stream'].close()
    if (not editable) and len(images):
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
Пример #5
0
 def test_gen_xfdf(self):
     xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA)
     xfdf = read(xfdf_path)
     expected = read(TEST_XFDF_PATH)
     self.assertEqual(xfdf, expected)
Пример #6
0
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=None, editable=True, pdfa=False, password=None, template_password=None):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".fdf", delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
        qpdf_subprocess_arguments = [QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output', pdf_file.name]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images):
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = list()
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) + " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = ["convert", file_info['fullpath'], "-trim", "+repage", temp_png.name]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width/(xtwo-xone)
            dppy = height/(ytwo-yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone*dpp+width, yone*dpp+height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            args = ["convert", temp_png.name, "-background", "none", "-density", str(int(dpp*72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({'overlay_stream': open(overlay_pdf_file.name, "rb"), 'pageno': fields[field]['pageno']})
        if len(image_todo):
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf")
            with open(pdf_file.name, "rb") as inFile:
                original = pypdf.PdfFileReader(inFile)
                original.idnum_to_page = get_page_hash(original.trailer)
                catalog = original.trailer["/Root"]
                writer = DAPdfFileWriter()
                tree = dict()
                for part in pdf_parts:
                    if part in catalog:
                        tree[part] = catalog[part]
                for i in range(original.getNumPages()):
                    for item in image_todo:
                        if (item['pageno'] - 1) == i:
                            page = original.getPage(i)
                            foreground_file = pypdf.PdfFileReader(item['overlay_stream'])
                            foreground_page = foreground_file.getPage(0)
                            page.mergePage(foreground_page)
                for i in range(original.getNumPages()):
                    newpage = original.getPage(i)
                    writer.addPage(newpage)
                for key, val in tree.items():
                    writer._root_object.update({pypdf.generic.NameObject(key): val})
                writer.page_list = list()
                recursive_get_pages(writer._root_object['/Pages'], writer.page_list)
                recursive_add_bookmark(original, writer, original.getOutlines())
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
            for item in image_todo:
                item['overlay_stream'].close()
    if (not editable) and len(images):
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
Пример #7
0
def fill_template(template,
                  data_strings=[],
                  data_names=[],
                  hidden=[],
                  readonly=[],
                  images=[],
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".fdf",
                                           delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    subprocess_arguments = [
        PDFTK_PATH, template, 'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable:
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) == '/Sig':
                fields[field] = {'pageno': pageno, 'rect': rect}
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                "convert", file_info['fullpath'], "-trim", "+repage",
                temp_png.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                           suffix=".pdf")
            args = [
                "convert", temp_png.name, "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            with open(pdf_file.name,
                      "rb") as inFile, open(overlay_pdf_file.name,
                                            "rb") as overlay:
                original = pypdf.PdfFileReader(inFile)
                background = original.getPage(fields[field]['pageno'] - 1)
                foreground = pypdf.PdfFileReader(overlay).getPage(0)
                background.mergePage(foreground)
                writer = pypdf.PdfFileWriter()
                for i in range(original.getNumPages()):
                    page = original.getPage(i)
                    writer.addPage(page)
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name