Пример #1
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None):
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    while tries < 5:
        subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file]
        p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
        result = p.wait()
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        time.sleep(2 + tries*random.random())
        continue
    if result == 0:
        if pdfa:
            pdf_to_pdfa(to_file)
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
Пример #2
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_references=False):
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    while tries < 5:
        if update_references:
            subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.PysIndexerPdf(' + from_file + ',' + to_file + ')']
        else:
            subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file]
        p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
        result = p.wait()
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        time.sleep(2 + tries*random.random())
        logmessage("Retrying libreoffice with " + repr(subprocess_arguments))
        continue
    if result == 0:
        if pdfa:
            pdf_to_pdfa(to_file)
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
Пример #3
0
 def convert_to_file(self, question):
     metadata_as_dict = dict()
     if type(self.metadata) is dict:
         metadata_as_dict = self.metadata
     elif type(self.metadata) is list:
         for data in self.metadata:
             if type(data) is dict:
                 for key in data:
                     metadata_as_dict[key] = data[key]
     if self.output_format == 'rtf to docx':
         self.output_extension = 'rtf'
     else:
         self.output_extension = self.output_format
     if self.output_format in (
             'rtf', 'rtf to docx') and self.template_file is None:
         self.template_file = docassemble.base.functions.standard_template_filename(
             'Legal-Template.rtf')
     if self.output_format == 'docx' and self.reference_file is None:
         self.reference_file = docassemble.base.functions.standard_template_filename(
             'Legal-Template.docx')
     if (self.output_format == 'pdf'
             or self.output_format == 'tex') and self.template_file is None:
         self.template_file = docassemble.base.functions.standard_template_filename(
             'Legal-Template.tex')
     yaml_to_use = list()
     if self.output_format in ('rtf', 'rtf to docx'):
         #logmessage("pre input content is " + str(self.input_content))
         self.input_content = docassemble.base.filter.rtf_prefilter(
             self.input_content, metadata=metadata_as_dict)
         #logmessage("post input content is " + str(self.input_content))
     if self.output_format == 'docx':
         self.input_content = docassemble.base.filter.docx_filter(
             self.input_content,
             metadata=metadata_as_dict,
             question=question)
     if self.output_format == 'pdf' or self.output_format == 'tex':
         if len(self.initial_yaml) == 0:
             standard_file = docassemble.base.functions.standard_template_filename(
                 'Legal-Template.yml')
             if standard_file is not None:
                 self.initial_yaml.append(standard_file)
         for yaml_file in self.initial_yaml:
             if yaml_file is not None:
                 yaml_to_use.append(yaml_file)
         for yaml_file in self.additional_yaml:
             if yaml_file is not None:
                 yaml_to_use.append(yaml_file)
         #logmessage("Before: " + repr(self.input_content))
         self.input_content = docassemble.base.filter.pdf_filter(
             self.input_content,
             metadata=metadata_as_dict,
             question=question)
         #logmessage("After: " + repr(self.input_content))
     temp_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                             mode="wb",
                                             suffix=".md",
                                             delete=False)
     temp_file.write(self.input_content.encode('utf8'))
     temp_file.close()
     temp_outfile = tempfile.NamedTemporaryFile(prefix="datemp",
                                                mode="wb",
                                                suffix="." +
                                                str(self.output_extension),
                                                delete=False)
     temp_outfile.close()
     current_temp_dir = 'epsconv'
     latex_conversion_directory = os.path.join(tempfile.gettempdir(),
                                               'latex_convert')
     if not os.path.isdir(latex_conversion_directory):
         os.makedirs(latex_conversion_directory)
     if not os.path.isdir(latex_conversion_directory):
         raise Exception("Could not create latex conversion directory")
     icc_profile_in_temp = os.path.join(
         tempfile.gettempdir(), 'sRGB_IEC61966-2-1_black_scaled.icc')
     if not os.path.isfile(icc_profile_in_temp):
         shutil.copyfile(
             docassemble.base.functions.standard_template_filename(
                 'sRGB_IEC61966-2-1_black_scaled.icc'), icc_profile_in_temp)
     subprocess_arguments = [
         PANDOC_PATH, '--smart', '-M',
         'latextmpdir=' + os.path.join('latex_convert', ''), '-M',
         'pdfa=' + ('true' if self.pdfa else 'false')
     ]
     if len(yaml_to_use) > 0:
         subprocess_arguments.extend(yaml_to_use)
     if self.template_file is not None:
         subprocess_arguments.extend(['--template=%s' % self.template_file])
     if self.reference_file is not None:
         subprocess_arguments.extend(
             ['--reference-docx=%s' % self.reference_file])
     subprocess_arguments.extend(['-s', '-o', temp_outfile.name])
     subprocess_arguments.extend([temp_file.name])
     subprocess_arguments.extend(self.arguments)
     #logmessage("Arguments are " + str(subprocess_arguments))
     the_temp_dir = tempfile.gettempdir()
     try:
         msg = subprocess.check_output(subprocess_arguments,
                                       cwd=the_temp_dir,
                                       stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as err:
         raise Exception("Failed to assemble file: " + unicode(err.output))
     if msg:
         self.pandoc_message = msg
     os.remove(temp_file.name)
     if os.path.exists(temp_outfile.name):
         if self.output_format in ('rtf', 'rtf to docx'):
             with open(temp_outfile.name) as the_file:
                 file_contents = the_file.read()
             # with open('/tmp/asdf.rtf', 'w') as deb_file:
             #     deb_file.write(file_contents)
             file_contents = docassemble.base.filter.rtf_filter(
                 file_contents,
                 metadata=metadata_as_dict,
                 styles=get_rtf_styles(self.template_file),
                 question=question)
             with open(temp_outfile.name, "wb") as the_file:
                 the_file.write(file_contents)
             if self.output_format == 'rtf to docx':
                 docx_outfile = tempfile.NamedTemporaryFile(prefix="datemp",
                                                            mode="wb",
                                                            suffix=".docx",
                                                            delete=False)
                 success = rtf_to_docx(temp_outfile.name, docx_outfile.name)
                 if not success:
                     raise Exception("Could not convert RTF to DOCX.")
                 temp_outfile = docx_outfile
         if self.output_filename is not None:
             shutil.copyfile(temp_outfile.name, self.output_filename)
         else:
             self.output_filename = temp_outfile.name
         self.output_content = None
         if self.output_format == 'pdf' and self.password:
             pdf_encrypt(self.output_filename, self.password)
     else:
         raise IOError("Failed creating file: %s" % output_filename)
     return
Пример #4
0
def word_to_pdf(in_file,
                in_format,
                out_file,
                pdfa=False,
                password=None,
                update_refs=False,
                tagged=False):
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    if pdfa:
        method = 'pdfa'
    elif tagged:
        method = 'tagged'
    else:
        method = 'default'
    while tries < 5:
        use_libreoffice = True
        if update_refs:
            if daconfig.get('convertapi secret', None) is not None:
                update_references(from_file)
                try:
                    convertapi_to_pdf(from_file, to_file)
                    result = 0
                except:
                    logmessage("Call to convertapi failed")
                    result = 1
                use_libreoffice = False
            else:
                subprocess_arguments = [
                    LIBREOFFICE_PATH, '--headless', '--invisible',
                    'macro:///Standard.Module1.ConvertToPdf(' + from_file +
                    ',' + to_file + ',True,' + method + ')'
                ]
        elif daconfig.get('convertapi secret', None) is not None:
            try:
                convertapi_to_pdf(from_file, to_file)
                result = 0
            except:
                logmessage("Call to convertapi failed")
                result = 1
            use_libreoffice = False
        else:
            if method == 'default':
                subprocess_arguments = [
                    LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf',
                    from_file
                ]
            else:
                subprocess_arguments = [
                    LIBREOFFICE_PATH, '--headless', '--invisible',
                    'macro:///Standard.Module1.ConvertToPdf(' + from_file +
                    ',' + to_file + ',False,' + method + ')'
                ]
        if use_libreoffice:
            initialize_libreoffice()
            #logmessage("Trying libreoffice with " + repr(subprocess_arguments))
            p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
            result = p.wait()
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        time.sleep(2 + tries * random.random())
        if use_libreoffice:
            logmessage("Retrying libreoffice with " +
                       repr(subprocess_arguments))
        else:
            logmessage("Retrying convertapi")
        continue
    if result == 0:
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
Пример #5
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_refs=False, tagged=False, filename=None, retry=True):
    if filename is None:
        filename = 'file'
    filename = docassemble.base.functions.secure_filename(filename)
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, filename + "." + in_format)
    to_file = os.path.join(tempdir, filename + ".pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    if pdfa:
        method = 'pdfa'
    elif tagged:
        method = 'tagged'
    else:
        method = 'default'
    if retry:
        num_tries = 5
    else:
        num_tries = 1
    while tries < num_tries:
        use_libreoffice = True
        if update_refs:
            if daconfig.get('convertapi secret', None) is not None:
                update_references(from_file)
                try:
                    convertapi_to_pdf(from_file, to_file)
                    result = 0
                except:
                    logmessage("Call to convertapi failed")
                    result = 1
                use_libreoffice = False
            elif daconfig.get('cloudconvert secret', None) is not None:
                try:
                    cloudconvert_to_pdf(in_format, from_file, to_file, pdfa, password)
                    result = 0
                except Exception as err:
                    logmessage("Call to cloudconvert failed")
                    logmessage(err.__class__.__name__ + ": " + str(err))
                    result = 1
                use_libreoffice = False
                password = False
            else:
                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',True,' + method + ')']
        elif daconfig.get('convertapi secret', None) is not None:
            try:
                convertapi_to_pdf(from_file, to_file)
                result = 0
            except:
                logmessage("Call to convertapi failed")
                result = 1
            use_libreoffice = False
        elif daconfig.get('cloudconvert secret', None) is not None:
            try:
                cloudconvert_to_pdf(in_format, from_file, to_file, pdfa, password)
                result = 0
            except Exception as err:
                logmessage("Call to cloudconvert failed")
                logmessage(err.__class__.__name__ + ": " + str(err))
                result = 1
            use_libreoffice = False
            password = False
        else:
            if method == 'default':
                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')']
#                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', '--convert-to', 'pdf', from_file, '--outdir', tempdir]
            else:
                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')']
        if use_libreoffice:
            initialize_libreoffice()
            start_time = time.time()
            #logmessage("Trying libreoffice with " + repr(subprocess_arguments))
            docassemble.base.functions.server.applock('obtain', 'libreoffice')
            logmessage("Obtained libreoffice lock after {:.4f} seconds.".format(time.time() - start_time))
            try:
                result = subprocess.run(subprocess_arguments, cwd=tempdir, timeout=120).returncode
            except subprocess.TimeoutExpired:
                logmessage("word_to_pdf: libreoffice took too long")
                result = 1
                tries = 5
            logmessage("Finished libreoffice after {:.4f} seconds.".format(time.time() - start_time))
            docassemble.base.functions.server.applock('release', 'libreoffice')
        if os.path.isfile(to_file):
            break
        time.sleep(0.1)
        if os.path.isfile(to_file):
            break
        time.sleep(0.1)
        if os.path.isfile(to_file):
            break
        time.sleep(0.1)
        if os.path.isfile(to_file):
            break
        time.sleep(0.1)
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        if tries < num_tries:
            time.sleep(tries*random.random())
            if use_libreoffice:
                logmessage("Retrying libreoffice with " + repr(subprocess_arguments))
            elif daconfig.get('convertapi secret', None) is not None:
                logmessage("Retrying convertapi")
            else:
                logmessage("Retrying cloudconvert")
    if result == 0:
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
Пример #6
0
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_refs=False, tagged=False):
    tempdir = tempfile.mkdtemp()
    from_file = os.path.join(tempdir, "file." + in_format)
    to_file = os.path.join(tempdir, "file.pdf")
    shutil.copyfile(in_file, from_file)
    tries = 0
    if pdfa:
        method = 'pdfa'
    elif tagged:
        method = 'tagged'
    else:
        method = 'default'
    while tries < 5:
        use_libreoffice = True
        if update_refs:
            if daconfig.get('convertapi secret', None) is not None:
                update_references(from_file)
                try:
                    convertapi_to_pdf(from_file, to_file)
                    result = 0
                except:
                    logmessage("Call to convertapi failed")
                    result = 1
                use_libreoffice = False
            else:
                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',True,' + method + ')']
        elif daconfig.get('convertapi secret', None) is not None:
            try:
                convertapi_to_pdf(from_file, to_file)
                result = 0
            except:
                logmessage("Call to convertapi failed")
                result = 1
            use_libreoffice = False
        else:
            if method == 'default':
                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file]
            else:
                subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')']
        if use_libreoffice:
            initialize_libreoffice()
            #logmessage("Trying libreoffice with " + repr(subprocess_arguments))
            p = subprocess.Popen(subprocess_arguments, cwd=tempdir)
            result = p.wait()
        if os.path.isfile(to_file):
            break
        result = 1
        tries += 1
        time.sleep(2 + tries*random.random())
        if use_libreoffice:
            logmessage("Retrying libreoffice with " + repr(subprocess_arguments))
        else:
            logmessage("Retrying convertapi")
        continue
    if result == 0:
        if password:
            pdf_encrypt(to_file, password)
        shutil.copyfile(to_file, out_file)
    if tempdir is not None:
        shutil.rmtree(tempdir)
    if result != 0:
        return False
    return True
Пример #7
0
 def convert_to_file(self, question):
     metadata_as_dict = dict()
     if type(self.metadata) is dict:
         metadata_as_dict = self.metadata
     elif type(self.metadata) is list:
         for data in self.metadata:
             if type(data) is dict:
                 for key in data:
                     metadata_as_dict[key] = data[key]
     if self.output_format == 'rtf to docx':
         self.output_extension = 'rtf'
     else:
         self.output_extension = self.output_format
     if self.output_format in ('rtf', 'rtf to docx') and self.template_file is None:
         self.template_file = docassemble.base.functions.standard_template_filename('Legal-Template.rtf')
     if self.output_format == 'docx' and self.reference_file is None:
         self.reference_file = docassemble.base.functions.standard_template_filename('Legal-Template.docx')
     if (self.output_format == 'pdf' or self.output_format == 'tex') and self.template_file is None:
         self.template_file = docassemble.base.functions.standard_template_filename('Legal-Template.tex')
     yaml_to_use = list()
     if self.output_format in ('rtf', 'rtf to docx'):
         #logmessage("pre input content is " + str(self.input_content))
         self.input_content = docassemble.base.filter.rtf_prefilter(self.input_content, metadata=metadata_as_dict)
         #logmessage("post input content is " + str(self.input_content))
     if self.output_format == 'docx':
         self.input_content = docassemble.base.filter.docx_filter(self.input_content, metadata=metadata_as_dict, question=question)
     if self.output_format == 'pdf' or self.output_format == 'tex':
         if len(self.initial_yaml) == 0:
             standard_file = docassemble.base.functions.standard_template_filename('Legal-Template.yml')
             if standard_file is not None:
                 self.initial_yaml.append(standard_file)
         for yaml_file in self.initial_yaml:
             if yaml_file is not None:
                 yaml_to_use.append(yaml_file)
         for yaml_file in self.additional_yaml:
             if yaml_file is not None:
                 yaml_to_use.append(yaml_file)
         #logmessage("Before: " + repr(self.input_content))
         self.input_content = docassemble.base.filter.pdf_filter(self.input_content, metadata=metadata_as_dict, question=question)
         #logmessage("After: " + repr(self.input_content))
     if not re.search(r'[^\s]', self.input_content):
         self.input_content = u"\\textbf{}\n"
     if PY3:
         temp_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix=".md", delete=False, encoding='utf-8')
         temp_file.write(self.input_content)
     else:
         temp_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix=".md", delete=False)
         with open(temp_file.name, 'w', encoding='utf-8') as fp:
             fp.write(self.input_content)
     temp_file.close()
     temp_outfile = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix="." + str(self.output_extension), delete=False)
     temp_outfile.close()
     current_temp_dir = 'epsconv'
     latex_conversion_directory = os.path.join(tempfile.gettempdir(), 'conv')
     if not os.path.isdir(latex_conversion_directory):
         os.makedirs(latex_conversion_directory)
     if not os.path.isdir(latex_conversion_directory):
         raise Exception("Could not create latex conversion directory")
     icc_profile_in_temp = os.path.join(tempfile.gettempdir(), 'sRGB_IEC61966-2-1_black_scaled.icc')
     if not os.path.isfile(icc_profile_in_temp):
         shutil.copyfile(docassemble.base.functions.standard_template_filename('sRGB_IEC61966-2-1_black_scaled.icc'), icc_profile_in_temp)
     subprocess_arguments = [PANDOC_PATH, PANDOC_ENGINE]
     if PANDOC_OLD:
         subprocess_arguments.append("--smart")
     subprocess_arguments.extend(['-M', 'latextmpdir=' + os.path.join('.', 'conv'), '-M', 'pdfa=' + ('true' if self.pdfa else 'false')])
     if len(yaml_to_use) > 0:
         subprocess_arguments.extend(yaml_to_use)
     if self.template_file is not None:
         subprocess_arguments.extend(['--template=%s' % self.template_file])
     if self.reference_file is not None:
         if PANDOC_OLD:
             subprocess_arguments.extend(['--reference-docx=%s' % self.reference_file])
         else:
             subprocess_arguments.extend(['--reference-doc=%s' % self.reference_file])
     subprocess_arguments.extend(['-s', '-o', temp_outfile.name])
     subprocess_arguments.extend([temp_file.name])
     subprocess_arguments.extend(self.arguments)
     #logmessage("Arguments are " + str(subprocess_arguments) + " and directory is " + tempfile.gettempdir())
     try:
         msg = subprocess.check_output(subprocess_arguments, cwd=tempfile.gettempdir(), stderr=subprocess.STDOUT).decode('utf-8', 'ignore')
     except subprocess.CalledProcessError as err:
         raise Exception("Failed to assemble file: " + err.output.decode())
     if msg:
         self.pandoc_message = msg
     os.remove(temp_file.name)
     if os.path.exists(temp_outfile.name):
         if self.output_format in ('rtf', 'rtf to docx'):
             with open(temp_outfile.name, encoding='utf-8') as the_file:
                 file_contents = the_file.read()
             # with open('/tmp/asdf.rtf', 'w') as deb_file:
             #     deb_file.write(file_contents)
             file_contents = docassemble.base.filter.rtf_filter(file_contents, metadata=metadata_as_dict, styles=get_rtf_styles(self.template_file), question=question)
             with open(temp_outfile.name, "wb") as the_file:
                 the_file.write(bytearray(file_contents, encoding='utf-8'))
             if self.output_format == 'rtf to docx':
                 docx_outfile = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False)
                 success = rtf_to_docx(temp_outfile.name, docx_outfile.name)
                 if not success:
                     raise Exception("Could not convert RTF to DOCX.")
                 temp_outfile = docx_outfile
         if self.output_filename is not None:
             shutil.copyfile(temp_outfile.name, self.output_filename)
         else:
             self.output_filename = temp_outfile.name
         self.output_content = None
         if self.output_format == 'pdf' and self.password:
             pdf_encrypt(self.output_filename, self.password)
     else:
         raise IOError("Failed creating file: %s" % output_filename)
     return