def stage_file(name, composite_file_path, is_binary=False): dp = composite_file_path['path'] path, is_url = to_path(dp) if is_url: dataset.path = path dp = path auto_decompress = composite_file_path.get('auto_decompress', True) if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress( dp): # It isn't an explicitly composite datatype, so these are just extra files to attach # as composite data. It'd be better if Galaxy was communicating this to the tool # a little more explicitly so we didn't need to dispatch on the datatype and so we # could attach arbitrary extra composite data to an existing composite datatype if # if need be? Perhaps that would be a mistake though. CompressedFile(dp).extract(files_path) else: tmpdir = output_adjacent_tmpdir(output_path) tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id sniff.handle_composite_file( datatype, dp, files_path, name, is_binary, tmpdir, tmp_prefix, composite_file_path, )
def stage_file(name, composite_file_path, is_binary=False): dp = composite_file_path['path'] path, is_url = to_path(dp) if is_url: dataset.path = path dp = path auto_decompress = composite_file_path.get('auto_decompress', True) if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress( dp): # It isn't an explictly composite datatype, so these are just extra files to attach # as composite data. It'd be better if Galaxy was communicating this to the tool # a little more explicitly so we didn't need to dispatch on the datatype and so we # could attach arbitrary extra composite data to an existing composite datatype if # if need be? Perhaps that would be a mistake though. CompressedFile(dp).extract(files_path) else: if not is_binary: tmpdir = output_adjacent_tmpdir(output_path) tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id if composite_file_path.get('space_to_tab'): sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) else: sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) file_output_path = os.path.join(files_path, name) shutil.move(dp, file_output_path) # groom the dataset file content if required by the corresponding datatype definition if datatype.dataset_content_needs_grooming(file_output_path): datatype.groom_dataset_content(file_output_path)
def assert_safety(self, path, expected_to_be_safe): temp_dir = tempfile.mkdtemp() try: if expected_to_be_safe: CompressedFile(path).extract(temp_dir) else: with self.assertRaises(Exception): CompressedFile(path).extract(temp_dir) finally: shutil.rmtree(temp_dir, ignore_errors=True)
def _decompress_target(upload_config, target): elements_from_name, elements_from_path = _has_src_to_path(upload_config, target, is_dataset=False) # by default Galaxy will check for a directory with a single file and interpret that # as the new root for expansion, this is a good user experience for uploading single # files in a archive but not great from an API perspective. Allow disabling by setting # fuzzy_root to False to literally interpret the target. fuzzy_root = target.get("fuzzy_root", True) temp_directory = os.path.abspath(tempfile.mkdtemp(prefix=elements_from_name, dir=".")) cf = CompressedFile(elements_from_path) result = cf.extract(temp_directory) return result if fuzzy_root else temp_directory
def groom_dataset_content(self, file_name): """This method is called by Galaxy to extract files contained in a composite data type.""" # XXX Is the right place to extract files? Should this step not be a cleaning step instead? # Could extracting be done earlier and composite files declared as files contained inside the archive # instead of the archive itself? # extract basename and folder of the current file whose content has to be groomed basename = os.path.basename(file_name) output_path = os.path.dirname(file_name) # extract archive if the file corresponds to the ISA archive if basename == ISA_ARCHIVE_NAME: # perform extraction # For some ZIP files CompressedFile::extract() extract the file inside <output_folder>/<file_name> instead of outputing it inside <output_folder>. So we first create a temporary folder, extract inside it, and move content to final destination. temp_folder = tempfile.mkdtemp() CompressedFile(file_name).extract(temp_folder) shutil.rmtree(output_path) extracted_files = os.listdir(temp_folder) logger.debug(' '.join(extracted_files)) if len(extracted_files) == 0: os.makedirs(output_path) shutil.rmtree(temp_folder) elif len(extracted_files) == 1 and os.path.isdir( os.path.join(temp_folder, extracted_files[0])): shutil.move(os.path.join(temp_folder, extracted_files[0]), output_path) shutil.rmtree(temp_folder) else: shutil.move(temp_folder, output_path)
def stage_file(name, composite_file_path, is_binary=False): dp = composite_file_path['path'] path, is_url = to_path(dp) if is_url: dataset.path = path dp = path auto_decompress = composite_file_path.get('auto_decompress', True) if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(dp): # It isn't an explictly composite datatype, so these are just extra files to attach # as composite data. It'd be better if Galaxy was communicating this to the tool # a little more explicitly so we didn't need to dispatch on the datatype and so we # could attach arbitrary extra composite data to an existing composite datatype if # if need be? Perhaps that would be a mistake though. CompressedFile(dp).extract(files_path) else: if not is_binary: tmpdir = output_adjacent_tmpdir(output_path) tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id if composite_file_path.get('space_to_tab'): sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) else: sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) file_output_path = os.path.join(files_path, name) shutil.move(dp, file_output_path) # groom the dataset file content if required by the corresponding datatype definition if datatype.dataset_content_needs_grooming(file_output_path): datatype.groom_dataset_content(file_output_path)
def _decompress_target(upload_config, target): elements_from_name, elements_from_path = _has_src_to_path(upload_config, target, is_dataset=False) temp_directory = tempfile.mkdtemp(prefix=elements_from_name, dir=".") decompressed_directory = CompressedFile(elements_from_path).extract( temp_directory) return decompressed_directory
def assert_safety(path, expected_to_be_safe=False): d = tempfile.mkdtemp() is_safe = True try: CompressedFile(path).extract(d) except Exception: is_safe = False assert is_safe is expected_to_be_safe
def import_archive(archive_path, app, user): dest_parent = mkdtemp() dest_dir = CompressedFile(archive_path).extract(dest_parent) new_history = None model_store = store.get_import_model_store_for_directory(dest_dir, app=app, user=user) with model_store.target_history(default_history=None) as new_history: model_store.perform_import(new_history) shutil.rmtree(dest_parent) return new_history
def _decompress_target(target): elements_from_name, elements_from_path = _has_src_to_path(target) temp_directory = tempfile.mkdtemp(prefix=elements_from_name, dir=".") decompressed_directory = CompressedFile(elements_from_path).extract( temp_directory) return decompressed_directory