def test_gen_xfdf(self): xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA) xfdf = read(xfdf_path) expected = read(TEST_XFDF_PATH) # XML can have sibling elements in different order. So: # * Parse the XML, get list of the root's children, convert to string, sort xfdf_standard_order = [ET.tostring(i) for i in list(ET.fromstring(xfdf).iter())] expected_standard_order = [ET.tostring(i) for i in list(ET.fromstring(expected).iter())] xfdf_standard_order.sort() expected_standard_order.sort() self.assertEqual(xfdf_standard_order, expected_standard_order)
def test_gen_xfdf(self): xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA) xfdf = read(xfdf_path) expected = read(TEST_XFDF_PATH) # XML can have sibling elements in different order. So: # * Parse the XML, get list of the root's children, convert to string, sort xfdf_standard_order = [ ET.tostring(i) for i in list(ET.fromstring(xfdf).iter()) ] expected_standard_order = [ ET.tostring(i) for i in list(ET.fromstring(expected).iter()) ] xfdf_standard_order.sort() expected_standard_order.sort() self.assertEqual(xfdf_standard_order, expected_standard_order)
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=None, editable=True, pdfa=False, password=None, template_password=None, default_export_value=None): if pdf_url is None: pdf_url = 'file.pdf' if not pdf_url.endswith('.pdf'): pdf_url += '.pdf' the_fields = read_fields(template) export_values = dict() for field, default, pageno, rect, field_type, export_value in the_fields: field_type = re.sub(r'[^/A-Za-z]', '', str(field_type)) if field_type in ('/Btn', "/'Btn'"): export_values[ field] = export_value or default_export_value or 'Yes' if len(export_values): new_data_strings = list() for key, val in data_strings: if key in export_values: if str(val) in ('Yes', 'yes', 'True', 'true', 'On', 'on', export_values[key]): val = export_values[key] else: if export_values[key] == 'On': val = 'Off' elif export_values[key] == 'on': val = 'off' elif export_values[key] == 'yes': val = 'no' else: val = 'No' new_data_strings.append((key, val)) data_strings = new_data_strings data_dict = {} for key, val in data_strings: data_dict[key] = val fdf = Xfdf(pdf_url, data_dict) #fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly) fdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".xfdf", delete=False) #fdf_file.write(fdf) fdf_file.close() fdf.write_xfdf(fdf_file.name) if False: fdf_dict = dict() for key, val in data_strings: fdf_dict[key] = val xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict) xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\ ".xfdf", delete=False) shutil.copyfile(xfdf_temp_filename, xfdf_file.name) pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) if template_password is not None: template_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) qpdf_subprocess_arguments = [ QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output', pdf_file.name ] #logmessage("Arguments are " + str(subprocess_arguments)) if editable or len(images): subprocess_arguments.append('need_appearances') else: subprocess_arguments.append('flatten') try: result = subprocess.run(subprocess_arguments, timeout=600).returncode except subprocess.TimeoutExpired: result = 1 logmessage("fill_template: call to pdftk fill_form took too long") if result != 0: logmessage("Failed to fill PDF form " + str(template)) raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments)) if len(images): fields = dict() for field, default, pageno, rect, field_type, export_value in the_fields: if str(field_type) in ('/Sig', "/'Sig'"): fields[field] = {'pageno': pageno, 'rect': rect} image_todo = list() for field, file_info in images: if field not in fields: logmessage("field name " + str(field) + " not found in PDF file") continue #logmessage("Need to put image on page " + str(fields[field]['pageno'])) temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png") args = [ daconfig.get('imagemagick', 'convert'), file_info['fullpath'], "-trim", "+repage", "+profile", '*', '-density', '0', temp_png.name ] try: result = subprocess.run(args, timeout=60).returncode except subprocess.TimeoutExpired: logmessage("fill_template: convert took too long") result = 1 if result == 1: logmessage("failed to trim file: " + " ".join(args)) continue im = Image.open(temp_png.name) width, height = im.size xone, yone, xtwo, ytwo = fields[field]['rect'] dppx = width / (xtwo - xone) dppy = height / (ytwo - yone) if (dppx > dppy): dpp = dppx else: dpp = dppy extent_x, extent_y = xone * dpp + width, yone * dpp + height overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) args = [ daconfig.get('imagemagick', 'convert'), temp_png.name, "-background", "none", "-density", str(int(dpp * 72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name ] try: result = subprocess.run(args, timeout=60).returncode except subprocess.TimeoutExpired: result = 1 logmessage("fill_template: call to convert took too long") if result == 1: logmessage("failed to make overlay: " + " ".join(args)) continue image_todo.append({ 'overlay_file': overlay_pdf_file.name, 'pageno': fields[field]['pageno'] }) if len(image_todo): new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf") original = safe_pypdf_reader(pdf_file.name) original.idnum_to_page = get_page_hash(original.trailer) catalog = original.trailer["/Root"] writer = DAPdfFileWriter() tree = dict() for part in pdf_parts: if part in catalog: tree[part] = catalog[part] for i in range(original.getNumPages()): for item in image_todo: if (item['pageno'] - 1) == i: page = original.getPage(i) foreground_file = safe_pypdf_reader( item['overlay_file']) foreground_page = foreground_file.getPage(0) page.mergePage(foreground_page) for i in range(original.getNumPages()): newpage = original.getPage(i) writer.addPage(newpage) for key, val in tree.items(): writer._root_object.update( {pypdf.generic.NameObject(key): val}) writer.page_list = list() recursive_get_pages(writer._root_object['/Pages'], writer.page_list) recursive_add_bookmark(original, writer, original.getOutlines()) with open(new_pdf_file.name, "wb") as outFile: writer.write(outFile) shutil.copyfile(new_pdf_file.name, pdf_file.name) if (not editable) and len(images): flatten_pdf(pdf_file.name) if pdfa: pdf_to_pdfa(pdf_file.name) if editable: replicate_js_and_calculations(template, pdf_file.name, password) elif password: pdf_encrypt(pdf_file.name, password) return pdf_file.name
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=None, editable=True, pdfa=False, password=None, template_password=None): if pdf_url is None: pdf_url = '' fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly) fdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".fdf", delete=False) fdf_file.write(fdf) fdf_file.close() if False: fdf_dict = dict() for key, val in data_strings: fdf_dict[key] = val xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict) xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\ ".xfdf", delete=False) shutil.copyfile(xfdf_temp_filename, xfdf_file.name) pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) if template_password is not None: template_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) qpdf_subprocess_arguments = [ QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output', pdf_file.name ] #logmessage("Arguments are " + str(subprocess_arguments)) if editable or len(images): subprocess_arguments.append('need_appearances') else: subprocess_arguments.append('flatten') result = call(subprocess_arguments) if result != 0: logmessage("Failed to fill PDF form " + str(template)) raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments)) if len(images): fields = dict() for field, default, pageno, rect, field_type in read_fields(template): if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"): fields[field] = {'pageno': pageno, 'rect': rect} image_todo = list() for field, file_info in images: if field not in fields: logmessage("field name " + str(field) + " not found in PDF file") continue #logmessage("Need to put image on page " + str(fields[field]['pageno'])) temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png") args = [ "convert", file_info['fullpath'], "-trim", "+repage", temp_png.name ] result = call(args) if result == 1: logmessage("failed to trim file: " + " ".join(args)) continue im = Image.open(temp_png.name) width, height = im.size xone, yone, xtwo, ytwo = fields[field]['rect'] dppx = width / (xtwo - xone) dppy = height / (ytwo - yone) if (dppx > dppy): dpp = dppx else: dpp = dppy extent_x, extent_y = xone * dpp + width, yone * dpp + height overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) args = [ "convert", temp_png.name, "-background", "none", "-density", str(int(dpp * 72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name ] result = call(args) if result == 1: logmessage("failed to make overlay: " + " ".join(args)) continue image_todo.append({ 'overlay_stream': open(overlay_pdf_file.name, "rb"), 'pageno': fields[field]['pageno'] }) if len(image_todo): new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf") with open(pdf_file.name, "rb") as inFile: original = pypdf.PdfFileReader(inFile) original.idnum_to_page = get_page_hash(original.trailer) catalog = original.trailer["/Root"] writer = DAPdfFileWriter() tree = dict() for part in pdf_parts: if part in catalog: tree[part] = catalog[part] for i in range(original.getNumPages()): for item in image_todo: if (item['pageno'] - 1) == i: page = original.getPage(i) foreground_file = pypdf.PdfFileReader( item['overlay_stream']) foreground_page = foreground_file.getPage(0) page.mergePage(foreground_page) for i in range(original.getNumPages()): newpage = original.getPage(i) writer.addPage(newpage) for key, val in tree.items(): writer._root_object.update( {pypdf.generic.NameObject(key): val}) writer.page_list = list() recursive_get_pages(writer._root_object['/Pages'], writer.page_list) recursive_add_bookmark(original, writer, original.getOutlines()) with open(new_pdf_file.name, "wb") as outFile: writer.write(outFile) shutil.copyfile(new_pdf_file.name, pdf_file.name) for item in image_todo: item['overlay_stream'].close() if (not editable) and len(images): flatten_pdf(pdf_file.name) if pdfa: pdf_to_pdfa(pdf_file.name) if editable: replicate_js_and_calculations(template, pdf_file.name, password) elif password: pdf_encrypt(pdf_file.name, password) return pdf_file.name
def test_gen_xfdf(self): xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA) xfdf = read(xfdf_path) expected = read(TEST_XFDF_PATH) self.assertEqual(xfdf, expected)
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=None, editable=True, pdfa=False, password=None, template_password=None): if pdf_url is None: pdf_url = '' fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly) fdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".fdf", delete=False) fdf_file.write(fdf) fdf_file.close() if False: fdf_dict = dict() for key, val in data_strings: fdf_dict[key] = val xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict) xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\ ".xfdf", delete=False) shutil.copyfile(xfdf_temp_filename, xfdf_file.name) pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) if template_password is not None: template_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) qpdf_subprocess_arguments = [QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output', pdf_file.name] #logmessage("Arguments are " + str(subprocess_arguments)) if editable or len(images): subprocess_arguments.append('need_appearances') else: subprocess_arguments.append('flatten') result = call(subprocess_arguments) if result != 0: logmessage("Failed to fill PDF form " + str(template)) raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments)) if len(images): fields = dict() for field, default, pageno, rect, field_type in read_fields(template): if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"): fields[field] = {'pageno': pageno, 'rect': rect} image_todo = list() for field, file_info in images: if field not in fields: logmessage("field name " + str(field) + " not found in PDF file") continue #logmessage("Need to put image on page " + str(fields[field]['pageno'])) temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png") args = ["convert", file_info['fullpath'], "-trim", "+repage", temp_png.name] result = call(args) if result == 1: logmessage("failed to trim file: " + " ".join(args)) continue im = Image.open(temp_png.name) width, height = im.size xone, yone, xtwo, ytwo = fields[field]['rect'] dppx = width/(xtwo-xone) dppy = height/(ytwo-yone) if (dppx > dppy): dpp = dppx else: dpp = dppy extent_x, extent_y = xone*dpp+width, yone*dpp+height overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) args = ["convert", temp_png.name, "-background", "none", "-density", str(int(dpp*72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name] result = call(args) if result == 1: logmessage("failed to make overlay: " + " ".join(args)) continue image_todo.append({'overlay_stream': open(overlay_pdf_file.name, "rb"), 'pageno': fields[field]['pageno']}) if len(image_todo): new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf") with open(pdf_file.name, "rb") as inFile: original = pypdf.PdfFileReader(inFile) original.idnum_to_page = get_page_hash(original.trailer) catalog = original.trailer["/Root"] writer = DAPdfFileWriter() tree = dict() for part in pdf_parts: if part in catalog: tree[part] = catalog[part] for i in range(original.getNumPages()): for item in image_todo: if (item['pageno'] - 1) == i: page = original.getPage(i) foreground_file = pypdf.PdfFileReader(item['overlay_stream']) foreground_page = foreground_file.getPage(0) page.mergePage(foreground_page) for i in range(original.getNumPages()): newpage = original.getPage(i) writer.addPage(newpage) for key, val in tree.items(): writer._root_object.update({pypdf.generic.NameObject(key): val}) writer.page_list = list() recursive_get_pages(writer._root_object['/Pages'], writer.page_list) recursive_add_bookmark(original, writer, original.getOutlines()) with open(new_pdf_file.name, "wb") as outFile: writer.write(outFile) shutil.copyfile(new_pdf_file.name, pdf_file.name) for item in image_todo: item['overlay_stream'].close() if (not editable) and len(images): flatten_pdf(pdf_file.name) if pdfa: pdf_to_pdfa(pdf_file.name) if editable: replicate_js_and_calculations(template, pdf_file.name, password) elif password: pdf_encrypt(pdf_file.name, password) return pdf_file.name
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=None, editable=True, pdfa=False, password=None): if pdf_url is None: pdf_url = '' fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly) fdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".fdf", delete=False) fdf_file.write(fdf) fdf_file.close() if False: fdf_dict = dict() for key, val in data_strings: fdf_dict[key] = val xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict) xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\ ".xfdf", delete=False) shutil.copyfile(xfdf_temp_filename, xfdf_file.name) pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False) subprocess_arguments = [ PDFTK_PATH, template, 'fill_form', fdf_file.name, 'output', pdf_file.name ] #logmessage("Arguments are " + str(subprocess_arguments)) if editable: subprocess_arguments.append('need_appearances') else: subprocess_arguments.append('flatten') result = call(subprocess_arguments) if result != 0: logmessage("Failed to fill PDF form " + str(template)) raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments)) if len(images): fields = dict() for field, default, pageno, rect, field_type in read_fields(template): if str(field_type) == '/Sig': fields[field] = {'pageno': pageno, 'rect': rect} for field, file_info in images: if field not in fields: logmessage("field name " + str(field) + " not found in PDF file") continue #logmessage("Need to put image on page " + str(fields[field]['pageno'])) temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png") args = [ "convert", file_info['fullpath'], "-trim", "+repage", temp_png.name ] result = call(args) if result == 1: logmessage("failed to trim file: " + " ".join(args)) continue im = Image.open(temp_png.name) width, height = im.size xone, yone, xtwo, ytwo = fields[field]['rect'] dppx = width / (xtwo - xone) dppy = height / (ytwo - yone) if (dppx > dppy): dpp = dppx else: dpp = dppy extent_x, extent_y = xone * dpp + width, yone * dpp + height overlay_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf") args = [ "convert", temp_png.name, "-background", "none", "-density", str(int(dpp * 72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name ] result = call(args) if result == 1: logmessage("failed to make overlay: " + " ".join(args)) continue new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf") with open(pdf_file.name, "rb") as inFile, open(overlay_pdf_file.name, "rb") as overlay: original = pypdf.PdfFileReader(inFile) background = original.getPage(fields[field]['pageno'] - 1) foreground = pypdf.PdfFileReader(overlay).getPage(0) background.mergePage(foreground) writer = pypdf.PdfFileWriter() for i in range(original.getNumPages()): page = original.getPage(i) writer.addPage(page) with open(new_pdf_file.name, "wb") as outFile: writer.write(outFile) shutil.copyfile(new_pdf_file.name, pdf_file.name) if pdfa: pdf_to_pdfa(pdf_file.name) if editable: replicate_js_and_calculations(template, pdf_file.name, password) elif password: pdf_encrypt(pdf_file.name, password) return pdf_file.name