def generate_facturx_from_file(pdf_invoice, facturx_xml, facturx_level='autodetect', check_xsd=True, pdf_metadata=None, output_pdf_file=None, additional_attachments=None): """ Generate a Factur-X invoice from a regular PDF invoice and a factur-X XML file. The method uses a file as input (regular PDF invoice) and re-writes the file (Factur-X PDF invoice). :param pdf_invoice: the regular PDF invoice as file path (type string) or as file object :type pdf_invoice: string or file :param facturx_xml: the Factur-X XML :type facturx_xml: bytes, string, file or etree object :param facturx_level: the level of the Factur-X XML file. Default value is 'autodetect'. The only advantage to specifiy a particular value instead of using the autodetection is for a very very small perf improvement. Possible values: minimum, basicwl, basic, en16931. :type facturx_level: string :param check_xsd: if enable, checks the Factur-X XML file against the XSD (XML Schema Definition). If this step has already been performed beforehand, you should disable this feature to avoid a double check and get a small performance improvement. :type check_xsd: boolean :param pdf_metadata: Specify the metadata of the generated Factur-X PDF. If pdf_metadata is None (default value), this lib will generate some metadata in English by extracting relevant info from the Factur-X XML. Here is an example for the pdf_metadata argument: pdf_metadata = { 'author': 'Akretion', 'keywords': 'Factur-X, Invoice', 'title': 'Akretion: Invoice I1242', 'subject': 'Factur-X invoice I1242 dated 2017-08-17 issued by Akretion', } If you pass the pdf_metadata argument, you will not use the automatic generation based on the extraction of the Factur-X XML file, which will bring a very small perf improvement. :type pdf_metadata: dict :param output_pdf_file: File Path to the output Factur-X PDF file :type output_pdf_file: string or unicode :param additional_attachments: Specify the other files that you want to embed in the PDF file. It is a dict where keys are filepath and value is the description of the file (as unicode or string). :type additional_attachments: dict :return: Returns True. This method re-writes the input PDF invoice file, unless if the output_pdf_file is provided. :rtype: bool """ assert isinstance(facturx_xml, bytes) xml_string = facturx_xml facturx_level = facturx_level.lower() additional_attachments_read = {} if additional_attachments: for attach_filepath, attach_desc in additional_attachments.items(): filename = os.path.basename(attach_filepath) mod_timestamp = os.path.getmtime(attach_filepath) mod_dt = datetime.fromtimestamp(mod_timestamp) with open(attach_filepath, 'rb') as fa: fa.seek(0) additional_attachments_read[fa.read()] = { 'filename': filename, 'desc': attach_desc, 'mod_date': mod_dt, } fa.close() original_pdf = PdfFileReader(pdf_invoice) # Extract /OutputIntents obj from original invoice output_intents = _get_original_output_intents(original_pdf) new_pdf_filestream = PdfFileWriter() new_pdf_filestream._header = b_("%PDF-1.6") new_pdf_filestream.appendPagesFromReader(original_pdf) original_pdf_id = original_pdf.trailer.get('/ID') if original_pdf_id: new_pdf_filestream._ID = original_pdf_id # else : generate some ? _facturx_update_metadata_add_attachment( new_pdf_filestream, xml_string, pdf_metadata, facturx_level, output_intents=output_intents, additional_attachments=additional_attachments_read) if output_pdf_file: with open(output_pdf_file, 'wb') as output_f: new_pdf_filestream.write(output_f) output_f.close() else: with open(pdf_invoice, 'wb') as f: new_pdf_filestream.write(f) f.close() return True
def generate_facturx_from_file(pdf_invoice, facturx_xml, facturx_level='autodetect', check_xsd=True, pdf_metadata=None, output_pdf_file=None, additional_attachments=None, attachments=None): """ Generate a Factur-X invoice from a regular PDF invoice and a factur-X XML file. The method uses a file as input (regular PDF invoice) and re-writes the file (Factur-X PDF invoice). :param pdf_invoice: the regular PDF invoice as file path (type string) or as file object :type pdf_invoice: string or file :param facturx_xml: the Factur-X XML :type facturx_xml: bytes, string, file or etree object :param facturx_level: the level of the Factur-X XML file. Default value is 'autodetect'. The only advantage to specifiy a particular value instead of using the autodetection is for a very very small perf improvement. Possible values: minimum, basicwl, basic, en16931. :type facturx_level: string :param check_xsd: if enable, checks the Factur-X XML file against the XSD (XML Schema Definition). If this step has already been performed beforehand, you should disable this feature to avoid a double check and get a small performance improvement. :type check_xsd: boolean :param pdf_metadata: Specify the metadata of the generated Factur-X PDF. If pdf_metadata is None (default value), this lib will generate some metadata in English by extracting relevant info from the Factur-X XML. Here is an example for the pdf_metadata argument: pdf_metadata = { 'author': 'Akretion', 'keywords': 'Factur-X, Invoice', 'title': 'Akretion: Invoice I1242', 'subject': 'Factur-X invoice I1242 dated 2017-08-17 issued by Akretion', } If you pass the pdf_metadata argument, you will not use the automatic generation based on the extraction of the Factur-X XML file, which will bring a very small perf improvement. :type pdf_metadata: dict :param output_pdf_file: File Path to the output Factur-X PDF file :type output_pdf_file: string or unicode :param attachments: Specify the other files that you want to embed in the PDF file. It is a dict where key is the filename and value is a dict. In this dict, keys are 'filepath' (value is the full file path) or 'filedata' (value is the encoded file), 'description' (text description, optional) and 'modification_datetime' (modification date and time as datetime object, optional). 'creation_datetime' (creation date and time as datetime object, optional). :type attachments: dict :param additional_attachments: DEPRECATED. Use attachments instead. Undocumented. :return: Returns True. This method re-writes the input PDF invoice file, unless if the output_pdf_file is provided. :rtype: bool """ start_chrono = datetime.now() logger.debug('generate_facturx_from_file with factur-x lib %s', __version__) logger.debug('1st arg pdf_invoice type=%s', type(pdf_invoice)) logger.debug('2nd arg facturx_xml type=%s', type(facturx_xml)) logger.debug('optional arg facturx_level=%s', facturx_level) logger.debug('optional arg check_xsd=%s', check_xsd) logger.debug('optional arg pdf_metadata=%s', pdf_metadata) logger.debug('optional arg additional_attachments=%s', additional_attachments) if not pdf_invoice: raise ValueError('Missing pdf_invoice argument') if not facturx_xml: raise ValueError('Missing facturx_xml argument') if not isinstance(facturx_level, (str, unicode)): raise ValueError('Wrong facturx_level argument') if not isinstance(check_xsd, bool): raise ValueError('check_xsd argument must be a boolean') if not isinstance(pdf_metadata, (type(None), dict)): raise ValueError('pdf_metadata argument must be a dict or None') if not isinstance(pdf_metadata, (dict, type(None))): raise ValueError('pdf_metadata argument must be a dict or None') if not isinstance(additional_attachments, (dict, type(None))): raise ValueError( 'additional_attachments argument must be a dict or None') if not isinstance(output_pdf_file, (type(None), str, unicode)): raise ValueError('output_pdf_file argument must be a string or None') if isinstance(pdf_invoice, (str, unicode)): file_type = 'path' else: file_type = 'file' xml_root = None # in Python3, xml_string is a byte if isinstance(facturx_xml, (str, bytes)): xml_string = facturx_xml elif isinstance(facturx_xml, unicode): xml_string = facturx_xml.encode('utf8') elif isinstance(facturx_xml, type(etree.Element('pouet'))): xml_root = facturx_xml xml_string = etree.tostring(xml_root, pretty_print=True, encoding='UTF-8', xml_declaration=True) elif isinstance(facturx_xml, file): facturx_xml.seek(0) xml_string = facturx_xml.read() facturx_xml.close() else: raise TypeError( "The second argument of the method generate_facturx must be " "either a string, an etree.Element() object or a file " "(it is a %s)." % type(facturx_xml)) # The additional_attachments arg is deprecated if attachments is None: attachments = {} if additional_attachments and not attachments: logger.warning( 'The argument additional_attachments is deprecated. ' 'It will be removed in future versions. Use the argument ' 'attachments instead.') for attach_filepath, attach_desc in additional_attachments.items(): filename = os.path.basename(attach_filepath) mod_timestamp = os.path.getmtime(attach_filepath) mod_dt = datetime.fromtimestamp(mod_timestamp) with open(attach_filepath, 'rb') as fa: fa.seek(0) attachments[filename] = { 'filedata': fa.read(), 'description': attach_desc, 'modification_datetime': mod_dt, } fa.close() if attachments: for filename, fadict in attachments.items(): if filename in [FACTURX_FILENAME] + ZUGFERD_FILENAMES: logger.warning( 'You cannot provide as attachment a file named %s. ' 'This file will NOT be attached.', filename) attachments.pop(filename) continue if fadict.get('filepath') and not fadict.get('filedata'): with open(fadict['filepath'], 'rb') as fa: fa.seek(0) fadict['filedata'] = fa.read() fa.close() # As explained here # https://stackoverflow.com/questions/237079/how-to-get-file-creation-modification-date-times-in-python # creation date is not easy to get. # So we only implement getting the modification date if not fadict.get('modification_datetime'): mod_timestamp = os.path.getmtime(fadict['filepath']) fadict['modification_datetime'] = datetime.fromtimestamp( mod_timestamp) if pdf_metadata is None: if xml_root is None: xml_root = etree.fromstring(xml_string) base_info = _extract_base_info(xml_root) pdf_metadata = _base_info2pdf_metadata(base_info) else: # clean-up pdf_metadata dict for key, value in pdf_metadata.items(): if not isinstance(value, (str, unicode)): pdf_metadata[key] = '' facturx_level = facturx_level.lower() if facturx_level not in FACTURX_LEVEL2xsd: if xml_root is None: xml_root = etree.fromstring(xml_string) logger.debug('Factur-X level will be autodetected') facturx_level = get_facturx_level(xml_root) if check_xsd: check_facturx_xsd(xml_string, flavor='factur-x', facturx_level=facturx_level) original_pdf = PdfFileReader(pdf_invoice) # Extract /OutputIntents obj from original invoice output_intents = _get_original_output_intents(original_pdf) new_pdf_filestream = PdfFileWriter() new_pdf_filestream._header = b_("%PDF-1.6") new_pdf_filestream.appendPagesFromReader(original_pdf) original_pdf_id = original_pdf.trailer.get('/ID') logger.debug('original_pdf_id=%s', original_pdf_id) if original_pdf_id: new_pdf_filestream._ID = original_pdf_id # else : generate some ? _facturx_update_metadata_add_attachment(new_pdf_filestream, xml_string, pdf_metadata, facturx_level, output_intents=output_intents, additional_attachments=attachments) if output_pdf_file: with open(output_pdf_file, 'wb') as output_f: new_pdf_filestream.write(output_f) output_f.close() else: if file_type == 'path': with open(pdf_invoice, 'wb') as f: new_pdf_filestream.write(f) f.close() elif file_type == 'file': new_pdf_filestream.write(pdf_invoice) logger.info('%s file added to PDF invoice', FACTURX_FILENAME) end_chrono = datetime.now() logger.info('Factur-X invoice generated in %s seconds', (end_chrono - start_chrono).total_seconds()) return True