def extract_files_to_process(options, company_file): """Extract the files from the ENER zip file and the ITR/DFP inside of it, and collect all the XML files """ force_download = options.get("force_download", False) local_base_path = _doc_local_base_path(options, company_file) # Make sure the file is in the local cache local_file = "{0}/{1}". \ format(local_base_path, company_file.file_name) if not exists(options, local_file): copy_file(options, company_file.file_url, local_file) working_local_base_path = \ _doc_local_working_base_path(options, company_file) file_to_export = "{0}/{1}".format(local_base_path, company_file.file_name) if exists(options, working_local_base_path): if force_download: # Clean the folder of the company file (working folder) delete_all(options, working_local_base_path) files_ref = extract_zip(options, file_to_export, working_local_base_path) else: files_ref = listdir(options, working_local_base_path) # If the folder is empty if not files_ref: mkdirs(options, working_local_base_path) files_ref = extract_zip(options, file_to_export, working_local_base_path) else: mkdirs(options, working_local_base_path) files_ref = extract_zip(options, file_to_export, working_local_base_path) available_files = {} if company_file.doc_type in ["ITR", "DFP"]: for the_file in files_ref: if re.match(RE_FILE_BY_XML, the_file, re.IGNORECASE): filename = ntpath.basename(the_file) available_files[filename] = the_file elif re.match(RE_FILE_BY_ITR, the_file, re.IGNORECASE): itr_dest_folder = "{0}/itr_content/".\ format(working_local_base_path) itr_files = extract_zip(options, the_file, itr_dest_folder) for itr_file in itr_files: filename = ntpath.basename(itr_file) available_files["itr/{}".format(filename)] = itr_file # Once unzipped, we can delete the original file from the elif re.match(RE_FILE_BY_DFP, the_file, re.IGNORECASE): dfp_dest_folder = "{0}/dfp_content/".\ format(working_local_base_path) dfp_files = extract_zip(options, the_file, dfp_dest_folder) for dfp_file in dfp_files: filename = ntpath.basename(dfp_file) available_files["dfp/{}".format(filename)] = dfp_file return available_files
def _doc_local_working_base_path(options, company_file): path = "{0}/working/ccvm_{1}/{2}/date_{3:%Y%m%d}_{4}".\ format(get_local_base_dir(options), company_file.ccvm, company_file.doc_type, company_file.fiscal_date.date(), company_file.version).replace(".", "_") _logger.debug("Doc base path: {}".format(path)) # Make sure the path exists mkdirs(options, "{}/".format(path)) return path