class PulsarComputeEnvironment( ComputeEnvironment ): def __init__( self, pulsar_client, job_wrapper, remote_job_config ): self.pulsar_client = pulsar_client self.job_wrapper = job_wrapper self.local_path_config = job_wrapper.default_compute_environment() self.unstructured_path_rewrites = {} # job_wrapper.prepare is going to expunge the job backing the following # computations, so precalculate these paths. self._wrapper_input_paths = self.local_path_config.input_paths() self._wrapper_output_paths = self.local_path_config.output_paths() self.path_mapper = PathMapper(pulsar_client, remote_job_config, self.local_path_config.working_directory()) self._config_directory = remote_job_config[ "configs_directory" ] self._working_directory = remote_job_config[ "working_directory" ] self._sep = remote_job_config[ "system_properties" ][ "separator" ] self._tool_dir = remote_job_config[ "tools_directory" ] version_path = self.local_path_config.version_path() new_version_path = self.path_mapper.remote_version_path_rewrite(version_path) if new_version_path: version_path = new_version_path self._version_path = version_path def output_paths( self ): local_output_paths = self._wrapper_output_paths results = [] for local_output_path in local_output_paths: wrapper_path = str( local_output_path ) remote_path = self.path_mapper.remote_output_path_rewrite( wrapper_path ) results.append( self._dataset_path( local_output_path, remote_path ) ) return results def input_paths( self ): local_input_paths = self._wrapper_input_paths results = [] for local_input_path in local_input_paths: wrapper_path = str( local_input_path ) # This will over-copy in some cases. For instance in the case of task # splitting, this input will be copied even though only the work dir # input will actually be used. remote_path = self.path_mapper.remote_input_path_rewrite( wrapper_path ) results.append( self._dataset_path( local_input_path, remote_path ) ) return results def _dataset_path( self, local_dataset_path, remote_path ): remote_extra_files_path = None if remote_path: remote_extra_files_path = "%s_files" % remote_path[ 0:-len( ".dat" ) ] return local_dataset_path.with_path_for_job( remote_path, remote_extra_files_path ) def working_directory( self ): return self._working_directory def config_directory( self ): return self._config_directory def new_file_path( self ): return self.working_directory() # Problems with doing this? def sep( self ): return self._sep def version_path( self ): return self._version_path def rewriter( self, parameter_value ): unstructured_path_rewrites = self.unstructured_path_rewrites if parameter_value in unstructured_path_rewrites: # Path previously mapped, use previous mapping. return unstructured_path_rewrites[ parameter_value ] if parameter_value in unstructured_path_rewrites.itervalues(): # Path is a rewritten remote path (this might never occur, # consider dropping check...) return parameter_value rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite( parameter_value ) if rewrite: unstructured_path_rewrites.update(new_unstructured_path_rewrites) return rewrite else: # Did need to rewrite, use original path or value. return parameter_value def unstructured_path_rewriter( self ): return self.rewriter def tool_directory( self ): return self._tool_dir
class PulsarComputeEnvironment(ComputeEnvironment): def __init__(self, pulsar_client, job_wrapper, remote_job_config): self.pulsar_client = pulsar_client self.job_wrapper = job_wrapper self.local_path_config = job_wrapper.default_compute_environment() self.unstructured_path_rewrites = {} # job_wrapper.prepare is going to expunge the job backing the following # computations, so precalculate these paths. self._wrapper_input_paths = self.local_path_config.input_paths() self._wrapper_output_paths = self.local_path_config.output_paths() self.path_mapper = PathMapper( pulsar_client, remote_job_config, self.local_path_config.working_directory()) self._config_directory = remote_job_config["configs_directory"] self._working_directory = remote_job_config["working_directory"] self._sep = remote_job_config["system_properties"]["separator"] self._tool_dir = remote_job_config["tools_directory"] version_path = self.local_path_config.version_path() new_version_path = self.path_mapper.remote_version_path_rewrite( version_path) if new_version_path: version_path = new_version_path self._version_path = version_path def output_paths(self): local_output_paths = self._wrapper_output_paths results = [] for local_output_path in local_output_paths: wrapper_path = str(local_output_path) remote_path = self.path_mapper.remote_output_path_rewrite( wrapper_path) results.append(self._dataset_path(local_output_path, remote_path)) return results def input_paths(self): local_input_paths = self._wrapper_input_paths results = [] for local_input_path in local_input_paths: wrapper_path = str(local_input_path) # This will over-copy in some cases. For instance in the case of task # splitting, this input will be copied even though only the work dir # input will actually be used. remote_path = self.path_mapper.remote_input_path_rewrite( wrapper_path) results.append(self._dataset_path(local_input_path, remote_path)) return results def _dataset_path(self, local_dataset_path, remote_path): remote_extra_files_path = None if remote_path: remote_extra_files_path = "%s_files" % remote_path[0:-len(".dat")] return local_dataset_path.with_path_for_job(remote_path, remote_extra_files_path) def working_directory(self): return self._working_directory def config_directory(self): return self._config_directory def new_file_path(self): return self.working_directory() # Problems with doing this? def sep(self): return self._sep def version_path(self): return self._version_path def rewriter(self, parameter_value): unstructured_path_rewrites = self.unstructured_path_rewrites if parameter_value in unstructured_path_rewrites: # Path previously mapped, use previous mapping. return unstructured_path_rewrites[parameter_value] if parameter_value in unstructured_path_rewrites.itervalues(): # Path is a rewritten remote path (this might never occur, # consider dropping check...) return parameter_value rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite( parameter_value) if rewrite: unstructured_path_rewrites.update(new_unstructured_path_rewrites) return rewrite else: # Did need to rewrite, use original path or value. return parameter_value def unstructured_path_rewriter(self): return self.rewriter def tool_directory(self): return self._tool_dir
class PulsarComputeEnvironment(ComputeEnvironment): def __init__(self, pulsar_client, job_wrapper, remote_job_config): self.pulsar_client = pulsar_client self.job_wrapper = job_wrapper self.local_path_config = job_wrapper.default_compute_environment() self.path_rewrites_unstructured = {} self.path_rewrites_input_extra = {} self.path_rewrites_input_metadata = {} # job_wrapper.prepare is going to expunge the job backing the following # computations, so precalculate these paths. self.path_mapper = PathMapper(pulsar_client, remote_job_config, self.local_path_config.working_directory()) self._config_directory = remote_job_config["configs_directory"] self._working_directory = remote_job_config["working_directory"] self._sep = remote_job_config["system_properties"]["separator"] self._tool_dir = remote_job_config["tools_directory"] version_path = self.local_path_config.version_path() new_version_path = self.path_mapper.remote_version_path_rewrite(version_path) if new_version_path: version_path = new_version_path self._version_path = version_path def output_names(self): # Maybe this should use the path mapper, but the path mapper just uses basenames return self.job_wrapper.get_output_basenames() def input_path_rewrite(self, dataset): local_input_path_rewrite = self.local_path_config.input_path_rewrite(dataset) if local_input_path_rewrite is not None: local_input_path = local_input_path_rewrite else: local_input_path = dataset.file_name remote_path = self.path_mapper.remote_input_path_rewrite(local_input_path) return remote_path def output_path_rewrite(self, dataset): local_output_path_rewrite = self.local_path_config.output_path_rewrite(dataset) if local_output_path_rewrite is not None: local_output_path = local_output_path_rewrite else: local_output_path = dataset.file_name remote_path = self.path_mapper.remote_output_path_rewrite(local_output_path) return remote_path def input_extra_files_rewrite(self, dataset): input_path_rewrite = self.input_path_rewrite(dataset) base_input_path = input_path_rewrite[0:-len(".dat")] remote_extra_files_path_rewrite = "%s_files" % base_input_path self.path_rewrites_input_extra[dataset.extra_files_path] = remote_extra_files_path_rewrite return remote_extra_files_path_rewrite def output_extra_files_rewrite(self, dataset): output_path_rewrite = self.output_path_rewrite(dataset) base_output_path = output_path_rewrite[0:-len(".dat")] remote_extra_files_path_rewrite = "%s_files" % base_output_path return remote_extra_files_path_rewrite def input_metadata_rewrite(self, dataset, metadata_val): # May technically be incorrect to not pass through local_path_config.input_metadata_rewrite # first but that adds untested logic that wouln't ever be used. remote_input_path = self.path_mapper.remote_input_path_rewrite(metadata_val, client_input_path_type=CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH) if remote_input_path: log.info("input_metadata_rewrite is %s from %s" % (remote_input_path, metadata_val)) self.path_rewrites_input_metadata[metadata_val] = remote_input_path return remote_input_path # No rewrite... return None def unstructured_path_rewrite(self, parameter_value): path_rewrites_unstructured = self.path_rewrites_unstructured if parameter_value in path_rewrites_unstructured: # Path previously mapped, use previous mapping. return path_rewrites_unstructured[parameter_value] rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite(parameter_value) if rewrite: path_rewrites_unstructured.update(new_unstructured_path_rewrites) return rewrite else: # Did not need to rewrite, use original path or value. return None def working_directory(self): return self._working_directory def config_directory(self): return self._config_directory def new_file_path(self): return self.working_directory() # Problems with doing this? def sep(self): return self._sep def version_path(self): return self._version_path def tool_directory(self): return self._tool_dir def home_directory(self): # TODO: revisit and implement this, won't break anything working in the # meantime. return None def tmp_directory(self): # TODO: revisit and implement this, won't break anything working in the # meantime. return None def galaxy_url(self): return self.job_wrapper.get_destination_configuration("galaxy_infrastructure_url")