示例#1
0
def extract_files_to_process(options, company_file):
    """Extract the files from the ENER zip file and the ITR/DFP inside of it,
    and collect all the XML files
    """
    force_download = options.get("force_download", False)

    local_base_path = _doc_local_base_path(options, company_file)

    # Make sure the file is in the local cache
    local_file = "{0}/{1}". \
        format(local_base_path, company_file.file_name)
    if not exists(options, local_file):
        copy_file(options, company_file.file_url, local_file)

    working_local_base_path = \
        _doc_local_working_base_path(options, company_file)
    file_to_export = "{0}/{1}".format(local_base_path, company_file.file_name)

    if exists(options, working_local_base_path):
        if force_download:
            # Clean the folder of the company file (working folder)
            delete_all(options, working_local_base_path)
            files_ref = extract_zip(options, file_to_export,
                                    working_local_base_path)
        else:
            files_ref = listdir(options, working_local_base_path)
            # If the folder is empty
            if not files_ref:
                mkdirs(options, working_local_base_path)
                files_ref = extract_zip(options, file_to_export,
                                        working_local_base_path)
    else:
        mkdirs(options, working_local_base_path)
        files_ref = extract_zip(options, file_to_export,
                                working_local_base_path)

    available_files = {}

    if company_file.doc_type in ["ITR", "DFP"]:
        for the_file in files_ref:
            if re.match(RE_FILE_BY_XML, the_file, re.IGNORECASE):
                filename = ntpath.basename(the_file)
                available_files[filename] = the_file
            elif re.match(RE_FILE_BY_ITR, the_file, re.IGNORECASE):
                itr_dest_folder = "{0}/itr_content/".\
                    format(working_local_base_path)
                itr_files = extract_zip(options, the_file, itr_dest_folder)
                for itr_file in itr_files:
                    filename = ntpath.basename(itr_file)
                    available_files["itr/{}".format(filename)] = itr_file
                # Once unzipped, we can delete the original file from the
            elif re.match(RE_FILE_BY_DFP, the_file, re.IGNORECASE):
                dfp_dest_folder = "{0}/dfp_content/".\
                    format(working_local_base_path)
                dfp_files = extract_zip(options, the_file, dfp_dest_folder)
                for dfp_file in dfp_files:
                    filename = ntpath.basename(dfp_file)
                    available_files["dfp/{}".format(filename)] = dfp_file

    return available_files
示例#2
0
def _doc_local_working_base_path(options, company_file):
    path = "{0}/working/ccvm_{1}/{2}/date_{3:%Y%m%d}_{4}".\
        format(get_local_base_dir(options),
               company_file.ccvm,
               company_file.doc_type,
               company_file.fiscal_date.date(),
               company_file.version).replace(".", "_")

    _logger.debug("Doc base path: {}".format(path))

    # Make sure the path exists
    mkdirs(options, "{}/".format(path))

    return path