def stderr(self, value): if not isinstance(value, basestring): raise ree.TypeError(expected_type=basestring, actual_type=type(value)) self._stderr = value
def tag(self, value): if not isinstance(value, basestring): raise ree.TypeError(entity='tag', expected_type=basestring, actual_type=type(value)) self._tag = value
def path(self, value): if not isinstance(value, basestring): raise ree.TypeError(entity='path', expected_type=basestring, actual_type=type(value)) self._path = value
def exit_code(self, value): if not isinstance(value, int): raise ree.TypeError(entity='exit_code', expected_type=int, actual_type=type(value)) self._exit_code = value
def executable(self, value): if isinstance(value, list): value = value[0] if not isinstance(value, basestring): raise ree.TypeError(expected_type='basestring', actual_type=type(value)) self._executable = value
def gpu_reqs(self, value): if not isinstance(value, dict): raise ree.TypeError(expected_type=dict, actual_type=type(value)) expected_keys = set([ 'processes', 'threads_per_process', 'process_type', 'thread_type' ]) if set(value.keys()) < expected_keys: raise ree.MissingError(obj='gpu_reqs', missing_attribute=expected_keys - set(value.keys())) if not isinstance(value.get('processes'), (type(None), int)): raise ree.TypeError(expected_type=dict, entity='processes', actual_type=type(value.get('processes'))) if value.get('process_type') not in [None, 'MPI', '']: raise ree.ValueError(expected_value='None or MPI', obj='gpu_reqs', actual_value=value.get('process_type'), attribute='process_type') if not isinstance(value.get('threads_per_process'), (type(None), int)): raise ree.TypeError(expected_type=int, entity='threads_per_process', actual_type=type( value.get('threads_per_process'))) if value.get('thread_type') not in [None, 'OpenMP', 'CUDA', '']: raise ree.ValueError(expected_value='None or OpenMP or CUDA', actual_value=value.get('thread_type'), obj='gpu_reqs', attribute='thread_type') self._gpu_reqs['processes'] = value.get('processes', 1) self._gpu_reqs['process_type'] = value.get('process_type') self._gpu_reqs['threads_per_process'] = value.get( 'threads_per_process', 1) self._gpu_reqs['thread_type'] = value.get('thread_type')
def name(self, value): if not isinstance(value, basestring): raise ree.TypeError(expected_type=basestring, actual_type=type(value)) if ',' in value: raise ree.ValueError( obj=self._uid, attribute='name', actual_value=value, expected_value="Using ',' in an object's name will " "corrupt the profiling and internal mapping tables") self._name = value
def state(self, value): if not isinstance(value, basestring): raise ree.TypeError(expected_type=basestring, actual_type=type(value)) if value not in states._task_state_values: raise ree.ValueError( obj=self._uid, attribute='state', expected_value=states._task_state_values.keys(), actual_value=value) self._state = value self._state_history.append(value)
def from_dict(self, d): """ Create a Task from a dictionary. The change is in inplace. :argument: python dictionary :return: None """ if d.get('uid'): self._uid = d['uid'] if d.get('name'): self._name = d['name'] if 'state' not in d: self._state = states.INITIAL else: # avoid adding state to state history, thus do typecheck here if not isinstance(d['state'], basestring): raise ree.TypeError(entity='state', expected_type=basestring, actual_type=type(d['state'])) self._state = d['state'] if 'state_history' in d: # there is no setter check for state_histore, thus check here if not isinstance(d['state_history'], list): raise ree.TypeError(entity='state_history', expected_type=list, actual_type=type(d['state_history'])) self._state_history = d['state_history'] # for all other attributes, we use the type and value checks in the # class setters if d.get('pre_exec') is not None: self.pre_exec = d['pre_exec'] if d.get('executable') is not None: self.executable = d['executable'] if d.get('arguments') is not None: self.arguments = d['arguments'] if d.get('post_exec') is not None: self.post_exec = d['post_exec'] if d.get('cpu_reqs') is not None: self.cpu_reqs = d['cpu_reqs'] if d.get('gpu_reqs') is not None: self.gpu_reqs = d['gpu_reqs'] if d.get('lfs_per_process') is not None: self.lfs_per_process = d['lfs_per_process'] if d.get('upload_input_data') is not None: self.upload_input_data = d['upload_input_data'] if d.get('copy_input_data') is not None: self.copy_input_data = d['copy_input_data'] if d.get('link_input_data') is not None: self._link_input_data = d['link_input_data'] if d.get('move_input_data') is not None: self._move_input_data = d['move_input_data'] if d.get('copy_output_data') is not None: self._copy_output_data = d['copy_output_data'] if d.get('move_output_data') is not None: self._move_output_data = d['move_output_data'] if d.get('download_output_data') is not None: self._download_output_data = d['download_output_data'] if d.get('stdout') is not None: self._stdout = d['stdout'] if d.get('stderr') is not None: self._stderr = d['stderr'] if d.get('exit_code') is not None: self._exit_code = d['exit_code'] if d.get('path') is not None: self._path = d['path'] if d.get('tag') is not None: self._tag = d['tag'] if d.get('parent_stage') is not None: self._p_stage = d['parent_stage'] if d.get('parent_pipeline') is not None: self._p_pipeline = d['parent_pipeline']
def parent_pipeline(self, value): if not isinstance(value, dict): raise ree.TypeError(expected_type=dict, actual_type=type(value)) self._p_pipeline = value
def link_input_data(self, value): if not isinstance(value, list): raise ree.TypeError(expected_type=list, actual_type=type(value)) self._link_input_data = value
def lfs_per_process(self, value): if not isinstance(value, int): raise ree.TypeError(expected_type=int, actual_type=type(value)) self._lfs_per_process = value
def post_exec(self, value): if not isinstance(value, list): raise ree.TypeError(expected_type=list, actual_type=type(value)) self._post_exec = value
def arguments(self, value): if not isinstance(value, list): raise ree.TypeError(expected_type=list, actual_type=type(value)) self._arguments = value
def resolve_placeholders(path, placeholders): """ **Purpose**: Substitute placeholders in staging attributes of a Task with actual paths to the corresponding tasks. :arguments: :path: string describing the staging paths, possibly containing a placeholder :placeholders: dictionary holding the values for placeholders """ try: if isinstance(path, str): path = str(path) if not isinstance(path, str): raise ree.TypeError(expected_type=str, actual_type=type(path)) if '$' not in path: return path # Extract placeholder from path if len(path.split('>')) == 1: placeholder = path.split('/')[0] else: if path.split('>')[0].strip().startswith('$'): placeholder = path.split('>')[0].strip().split('/')[0] else: placeholder = path.split('>')[1].strip().split('/')[0] # SHARED if placeholder == "$SHARED": return path.replace(placeholder, 'pilot://') # Expected placeholder format: # $Pipeline_{pipeline.uid}_Stage_{stage.uid}_Task_{task.uid} elems = placeholder.split('/')[0].split('_') if not len(elems) == 6: expected = '$Pipeline_(pipeline_name)_' \ 'Stage_(stage_name)_' \ 'Task_(task_name) or $SHARED', raise ree.ValueError(obj='placeholder', attribute='task', expected_value=expected, actual_value=elems) pname = elems[1] sname = elems[3] tname = elems[5] resolved = None if pname in placeholders: if sname in placeholders[pname]: if tname in placeholders[pname][sname]: resolved = path.replace(placeholder, placeholders[pname][sname][tname]['path']) else: logger.warning('%s not assigned to any task in Stage %s Pipeline %s' % (tname, sname, pname)) else: logger.warning('%s not assigned to any Stage in Pipeline %s' % ( sname, pname)) else: logger.warning('%s not assigned to any Pipeline' % (pname)) if not resolved: logger.warning('No placeholder could be found for task name %s \ stage name %s and pipeline name %s. Please be sure to \ use object names and not uids in your references,i.e, \ $Pipeline_(pipeline_name)_Stage_(stage_name)_Task_(task_name)') expected = '$Pipeline_(pipeline_name)_' \ 'Stage_(stage_name)_' \ 'Task_(task_name) or $SHARED' raise ree.ValueError(obj='placeholder', attribute='task', expected_value=expected, actual_value=elems) return resolved except Exception as ex: logger.exception('Failed to resolve placeholder %s, error: %s' % (path, ex)) raise
def get_output_list_from_task(task, placeholders): """ Purpose: Parse Task object to extract the files to be staged as the output. Details: The extracted data is then converted into the appropriate RP directive depending on whether the data is to be copied/downloaded. :arguments: :task: EnTK Task object :placeholders: dictionary holding the values for placeholders :return: list of RP directives for the files that need to be staged out """ try: if not isinstance(task, Task): raise ree.TypeError(expected_type=Task, actual_type=type(task)) output_data = list() if task.link_output_data: for path in task.link_output_data: path = resolve_placeholders(path, placeholders) if len(path.split('>')) > 1: temp = { 'source': path.split('>')[0].strip(), 'target': path.split('>')[1].strip(), 'action': rp.LINK } else: temp = { 'source': path.split('>')[0].strip(), 'target': os.path.basename(path.split('>')[0].strip()), 'action': rp.LINK } output_data.append(temp) if task.download_output_data: for path in task.download_output_data: path = resolve_placeholders(path, placeholders) if len(path.split('>')) > 1: temp = { 'source': path.split('>')[0].strip(), 'target': path.split('>')[1].strip() } else: temp = { 'source': path.split('>')[0].strip(), 'target': os.path.basename(path.split('>')[0].strip()) } output_data.append(temp) if task.copy_output_data: for path in task.copy_output_data: path = resolve_placeholders(path, placeholders) if len(path.split('>')) > 1: temp = { 'source': path.split('>')[0].strip(), 'target': path.split('>')[1].strip(), 'action': rp.COPY } else: temp = { 'source': path.split('>')[0].strip(), 'target': os.path.basename(path.split('>')[0].strip()), 'action': rp.COPY } output_data.append(temp) if task.move_output_data: for path in task.move_output_data: path = resolve_placeholders(path, placeholders) if len(path.split('>')) > 1: temp = { 'source': path.split('>')[0].strip(), 'target': path.split('>')[1].strip(), 'action': rp.MOVE } else: temp = { 'source': path.split('>')[0].strip(), 'target': os.path.basename(path.split('>')[0].strip()), 'action': rp.MOVE } output_data.append(temp) return output_data except Exception: logger.exception('Failed to get output list of files from task') raise
def download_output_data(self, value): if not isinstance(value, list): raise ree.TypeError(expected_type=list, actual_type=type(value)) self._download_output_data = value