Пример #1
0
    def fetch_equipment_name(self):
        """
        fetch equipment name
        filter by regex if there is one
        """
        if self.ms.get_equipment_name_list()==0:
            print("No element fetched : fetch_equipment_name()==0")
            return False

        #nameListToFilter=misc.natsorted(nameListToSort)
        self.ms.set_list_node(misc.natsorted(self.ms.get_list_node()))
        self.ms.set_list_hwmanager(misc.natsorted(self.ms.get_list_hwmanager()))
        self.ms.set_list_switch(misc.natsorted(self.ms.get_list_switch()))
        self.ms.set_list_diskarray(misc.natsorted(self.ms.get_list_diskarray()))
        self.ms.set_list_metaservice(misc.natsorted(self.ms.get_list_metaservice()))
        
        if self.regex_name==None:
            return True
        else:
            if len(self.ms.get_list_node())>0:
                self.ms.set_list_node(         self.filter_by_regex(self.ms.get_list_node()))
            if len(self.ms.get_list_hwmanager())>0:
                self.ms.set_list_hwmanager(    self.filter_by_regex(self.ms.get_list_hwmanager()))
            if len(self.ms.get_list_switch())>0:
                self.ms.set_list_switch(       self.filter_by_regex(self.ms.get_list_switch()))
            if len(self.ms.get_list_diskarray())>0:
                self.ms.set_list_diskarray(    self.filter_by_regex(self.ms.get_list_diskarray()))
            if len(self.ms.get_list_metaservice())>0:
                self.ms.set_list_metaservice(  self.filter_by_regex(self.ms.get_list_metaservice()))
Пример #2
0
    def __init__(self, **kwargs):
        self.caught_signal = None
        
        self.git_dirty_diff = None
        
        self.cluster_type = None
        '''
        The cluster type to be used (must be one of the keys specified in
        cluster_config).
        '''

        # Check the availability of git
        command = ['git', '--version']
        try:
            with open(os.devnull, 'w') as devnull:
                subprocess.check_call(command, stdout = devnull)

        except subprocess.CalledProcessError as e:
            raise StandardError("Execution of %s failed. Git seems to be "
                                "unavailable." % " ".join(command))


        # now determine the Git hash of the repository
        command = ['git', 'describe', '--all', '--dirty', '--long']
        try:
            self.git_hash_tag = subprocess.check_output(command).strip()
        except:
            raise StandardError("Execution of %s failed." % " ".join(command))

        # check if we got passed an 'arguments' parameter
        # this parameter should contain a argparse.Namespace object
        args = None
        if 'arguments' in kwargs:
            args = kwargs['arguments']
      
        if self.git_hash_tag.endswith('-dirty'):
            if not args.even_if_dirty:
                print("The repository has uncommitted changes, which is why " +
                      "we will exit right now.")
                print("If this is not a production environment, you can skip " +
                      "this test by specifying --even-if-dirty on the command " +
                      "line.")
                print(self.git_hash_tag)
                exit(1)
                command = ['git', 'diff']
                try:
                    self.git_dirty_diff = subprocess.check_output(command)
                except:
                    raise StandardError("Execution of %s failed." % 
                                        " ".join(command))
        try:
            # set cluster type
            if args.cluster == 'auto':
                self.set_cluster_type(self.autodetect_cluster_type())
            else:
                self.set_cluster_type(args.cluster)
        except AttributeError:
            # cluster type is not an applicable parameter here, and that's fine
            # (we're probably in run-locally.py)
            pass

        self._config_filepath = args.config.name
        '''
        Name of the YAML configuration file
        '''

        self.config = dict()
        '''
        Dictionary representation of configuration YAML file.
        '''

        self.steps = dict()
        '''
        This dict stores step objects by their name. Each step knows his
        dependencies.
        '''

        self.topological_step_order = list()
        '''
        List with topologically ordered steps. 
        '''
        
        self.file_dependencies = dict()
        '''
        This dict stores file dependencies within this pipeline, but regardless
        of step, output file tag or run ID. This dict has, for all output 
        files generated by the pipeline, a set of input files that output 
        file depends on.
        '''
        
        self.file_dependencies_reverse = dict()
        '''
        This dict stores file dependencies within this pipeline, but regardless
        of step, output file tag or run ID. This dict has, for all input
        files required pipeline, a set of output files which are generated
        using this input file.
        '''
        
        self.task_id_for_output_file = dict()
        '''
        This dict stores a task ID for every output file created by the pipeline.
        '''

        self.task_ids_for_input_file = dict()
        '''
        This dict stores a set of task IDs for every input file used in the
        pipeline.
        '''

        self.input_files_for_task_id = dict()
        '''
        This dict stores a set of input files for every task id in the pipeline.
        '''

        self.output_files_for_task_id = dict()
        '''
        This dict stores a set of output files for every task id in the pipeline.
        '''

        self.task_for_task_id = dict()
        '''
        This dict stores task objects by task IDs.
        '''

        self.all_tasks_topologically_sorted = list()
        '''
        List of all tasks in topological order. 
        '''

        self.config_file_name = args.config.name
        '''
        This stores the name of the configuration file of the current analysis
        '''

        self.read_config(args.config)

        # collect all tasks
        for step_name in self.topological_step_order:
            step = self.get_step(step_name)
            logger.debug("Collect now all tasks for step: %s" % step)
            for run_index, run_id in enumerate(misc.natsorted(step.get_run_ids())):
                task = task_module.Task(self, step, run_id, run_index)
                # if any run of a step contains an exec_groups,
                # the task (step/run) is added to the task list
                run = step.get_run(run_id)
                logger.debug("Step: %s, Run: %s" % (step, run_id))
                run_has_exec_groups = False
                if len(run.get_exec_groups()) > 0:
                    run_has_exec_groups = True
                if run_has_exec_groups:
                    logger.debug("Task: %s" % task)
                    self.all_tasks_topologically_sorted.append(task)
                # Fail if multiple tasks with the same name exist
                if str(task) in self.task_for_task_id:
                    raise ConfigurationException("Duplicate task ID %s." % str(task))
                self.task_for_task_id[str(task)] = task

        self.tool_versions = {}
        self.check_tools()
Пример #3
0
 def print_source_runs(self):
     for step_name in self.topological_step_order:
         step = self.steps[step_name]
         if isinstance(step, abstract_step.AbstractSourceStep):
             for run_id in misc.natsorted(step.get_run_ids()):
                 print("%s/%s" % (step, run_id))
Пример #4
0
    def build_steps(self):
        self.steps = {}
        if not 'steps' in self.config:
            raise ConfigurationException("Missing key: steps")
        
        re_simple_key = re.compile('^[a-zA-Z0-9_]+$')
        re_complex_key = re.compile('^([a-zA-Z0-9_]+)\s+\(([a-zA-Z0-9_]+)\)$')

        # step one: instantiate all steps
        for step_key, step_description in self.config['steps'].items():
            
            # the step keys in the configuration may be either:
            # - MODULE_NAME 
            # - DIFFERENT_STEP_NAME\s+\(MODULE_NAME\)
            step_name = None
            module_name = None
            if re_simple_key.match(step_key):
                step_name = step_key
                module_name = step_key
            else:
                match = re_complex_key.match(step_key)
                if match:
                    step_name = match.group(1)
                    module_name = match.group(2)
            
            if step_name == 'temp':
                # A step cannot be named 'temp' because we need the out/temp
                # directory to store temporary files.
                raise ConfigurationException("A step name cannot be 'temp'.")
            
            step_class = abstract_step.AbstractStep.get_step_class_for_key(module_name)
            step = step_class(self)
            
            step.set_step_name(step_name)
            step.set_options(step_description)
            
            self.steps[step_name] = step
            
        # step two: set dependencies
        for step_name, step in self.steps.items():
            if not step.needs_parents:
                if '_depends' in step._options:
                    raise ConfigurationException("%s must not have dependencies "
                        "because it declares no in/* connections (remove the "
                        "_depends key)." % step_name)
            else:
                if not '_depends' in step._options:
                    raise ConfigurationException("Missing key in step '%s': "
                        "_depends (set to null if the step has no dependencies)." 
                        % step_name)
                depends = step._options['_depends']
                if depends == None:
                    pass
                else:
                    temp_list = depends
                    if depends.__class__ == str:
                        temp_list = [depends]
                    for d in temp_list:
                        if not d in self.steps:
                            raise ConfigurationException("Step %s specifies "
                                "an undefined dependency: %s." % (step_name, d))
                        step.add_dependency(self.steps[d])
                        
        # step three: perform topological sort, raise a ConfigurationException
        # if there's a cycle (yeah, the algorithm is O(n^2), tsk, tsk...)
        
        unassigned_steps = set(self.steps.keys())
        assigned_steps = set()
        self.topological_step_order = []
        while len(unassigned_steps) > 0:
            # choose all tasks which have all dependencies resolved, either
            # because they have no dependencies or are already assigned
            next_steps = []
            for step_name in unassigned_steps:
                is_ready = True
                for dep in self.steps[step_name].dependencies:
                    dep_name = dep.get_step_name()
                    if not dep_name in assigned_steps:
                        is_ready = False
                        break
                if is_ready:
                    next_steps.append(step_name)
            if len(next_steps) == 0:
                raise ConfigurationException(
                    "There is a cycle in the step dependencies.")
            for step_name in misc.natsorted(next_steps):
                self.topological_step_order.append(step_name)
                assigned_steps.add(step_name)
                unassigned_steps.remove(step_name)
                
        # step four: finalize step
        for step in self.steps.values():
            step.finalize()
Пример #5
0
    def __init__(self, **kwargs):
        self.caught_signal = None

        self.git_dirty_diff = None

        self.cluster_type = None
        '''
        The cluster type to be used (must be one of the keys specified in
        cluster_config).
        '''

        # Check the availability of git
        command = ['git', '--version']
        try:
            with open(os.devnull, 'w') as devnull:
                subprocess.check_call(command, stdout=devnull)

        except subprocess.CalledProcessError as e:
            logger.error("Execution of '%s' failed. Git seems to be "
                         "unavailable." % " ".join(command))
            sys.exit(1)

        # now determine the Git hash of the repository
        command = ['git', 'describe', '--all', '--dirty', '--long']
        try:
            self.git_hash_tag = subprocess.check_output(command).strip()
        except:
            logger.error("Execution of %s failed." % " ".join(command))
            raise
            sys.exit(1)

        # check if we got passed an 'arguments' parameter
        # this parameter should contain a argparse.Namespace object
        args = None
        if 'arguments' in kwargs:
            args = kwargs['arguments']

        self._uap_path = args.uap_path
        '''
        Absolute path to the directory of the uap executable.
        It is used to circumvent path issues.
        '''

        self._cluster_config_path = os.path.join(
            self._uap_path, 'cluster/cluster-specific-commands.yaml')
        with open(self._cluster_config_path, 'r') as cluster_config_file:
            self._cluster_config = yaml.load(cluster_config_file)
        '''
        Cluster-related configuration for every cluster system supported.
        '''

        if self.git_hash_tag.endswith('-dirty'):
            if not args.even_if_dirty:
                print("The repository has uncommitted changes, which is why " +
                      "we will exit right now.")
                print(
                    "If this is not a production environment, you can skip " +
                    "this test by specifying --even-if-dirty on the command " +
                    "line.")
                print(self.git_hash_tag)
                exit(1)
                command = ['git', 'diff']
                try:
                    self.git_dirty_diff = subprocess.check_output(command)
                except:
                    logger.error("Execution of %s failed." % " ".join(command))
                    sys.exit(1)
        try:
            # set cluster type
            if args.cluster == 'auto':
                self.set_cluster_type(self.autodetect_cluster_type())
            else:
                self.set_cluster_type(args.cluster)
        except AttributeError:
            # cluster type is not an applicable parameter here, and that's fine
            # (we're probably in run-locally.py)
            pass

        self._config_filepath = args.config.name
        '''
        Name of the YAML configuration file
        '''

        self.config = dict()
        '''
        Dictionary representation of configuration YAML file.
        '''

        self.steps = dict()
        '''
        This dict stores step objects by their name. Each step knows his
        dependencies.
        '''

        self.topological_step_order = list()
        '''
        List with topologically ordered steps. 
        '''

        self.file_dependencies = dict()
        '''
        This dict stores file dependencies within this pipeline, but regardless
        of step, output file tag or run ID. This dict has, for all output 
        files generated by the pipeline, a set of input files that output 
        file depends on.
        '''

        self.file_dependencies_reverse = dict()
        '''
        This dict stores file dependencies within this pipeline, but regardless
        of step, output file tag or run ID. This dict has, for all input
        files required by the pipeline, a set of output files which are generated
        using this input file.
        '''

        self.task_id_for_output_file = dict()
        '''
        This dict stores a task ID for every output file created by the pipeline.
        '''

        self.task_ids_for_input_file = dict()
        '''
        This dict stores a set of task IDs for every input file used in the
        pipeline.
        '''

        self.input_files_for_task_id = dict()
        '''
        This dict stores a set of input files for every task id in the pipeline.
        '''

        self.output_files_for_task_id = dict()
        '''
        This dict stores a set of output files for every task id in the pipeline.
        '''

        self.task_for_task_id = dict()
        '''
        This dict stores task objects by task IDs.
        '''

        self.all_tasks_topologically_sorted = list()
        '''
        List of all tasks in topological order. 
        '''

        self.read_config(args.config)

        # collect all tasks
        for step_name in self.topological_step_order:
            step = self.get_step(step_name)
            logger.debug("Collect now all tasks for step: %s" % step)
            for run_index, run_id in enumerate(
                    misc.natsorted(step.get_run_ids())):
                task = task_module.Task(self, step, run_id, run_index)
                # if any run of a step contains an exec_groups,
                # the task (step/run) is added to the task list
                run = step.get_run(run_id)
                logger.debug("Step: %s, Run: %s" % (step, run_id))
                run_has_exec_groups = False
                if len(run.get_exec_groups()) > 0:
                    run_has_exec_groups = True
                if run_has_exec_groups:
                    logger.debug("Task: %s" % task)
                    self.all_tasks_topologically_sorted.append(task)
                # Fail if multiple tasks with the same name exist
                if str(task) in self.task_for_task_id:
                    logger.error("%s: Duplicate task ID %s." %
                                 (self.get_config_filepath(), str(task)))
                    sys.exit(1)
                self.task_for_task_id[str(task)] = task

        self.tool_versions = {}
        self.check_tools()
Пример #6
0
 def print_source_runs(self):
     for step_name in self.topological_step_order:
         step = self.steps[step_name]
         if isinstance(step, abstract_step.AbstractSourceStep):
             for run_id in misc.natsorted(step.get_run_ids()):
                 print("%s/%s" % (step, run_id))
Пример #7
0
    def build_steps(self):
        self.steps = {}
        if not 'steps' in self.config:
            logger.error("%s: Missing key: steps" % self.get_config_filepath())
            sys.exit(1)
        re_simple_key = re.compile('^[a-zA-Z0-9_]+$')
        re_complex_key = re.compile('^([a-zA-Z0-9_]+)\s+\(([a-zA-Z0-9_]+)\)$')

        # step one: instantiate all steps
        for step_key, step_description in self.config['steps'].items():

            # the step keys in the configuration may be either:
            # - MODULE_NAME
            # - DIFFERENT_STEP_NAME\s+\(MODULE_NAME\)
            step_name = None
            module_name = None
            if re_simple_key.match(step_key):
                step_name = step_key
                module_name = step_key
            else:
                match = re_complex_key.match(step_key)
                if match:
                    step_name = match.group(1)
                    module_name = match.group(2)

            if step_name == 'temp':
                # A step cannot be named 'temp' because we need the out/temp
                # directory to store temporary files.
                logger.error("%s: A step name cannot be 'temp'." %
                             self.get_config_filepath())
                sys.exit(1)
            step_class = abstract_step.AbstractStep.get_step_class_for_key(
                module_name)
            step = step_class(self)

            step.set_step_name(step_name)
            step.set_options(step_description)

            self.steps[step_name] = step

        # step two: set dependencies
        for step_name, step in self.steps.items():
            if not step.needs_parents:
                if '_depends' in step._options:
                    logger.error("%s: %s must not have dependencies because "
                                 "it declares no in/* connections (remove the "
                                 "_depends key)." %
                                 (self.get_config_filepath(), step_name))
                    sys.exit(1)
            else:
                if not '_depends' in step._options:
                    logger.error("%s: Missing key in step '%s': _depends (set "
                                 "to null if the step has no dependencies)." %
                                 (self.get_config_filepath(), step_name))
                    sys.exit(1)
                depends = step._options['_depends']
                if depends == None:
                    pass
                else:
                    temp_list = depends
                    if depends.__class__ == str:
                        temp_list = [depends]
                    for d in temp_list:
                        if not d in self.steps:
                            logger.error(
                                "%s: Step %s specifies an undefined "
                                "dependency: %s." %
                                (self.get_config_filepath(), step_name, d))
                            sys.exit(1)
                        step.add_dependency(self.steps[d])

        # step three: perform topological sort
        # if there's a cycle (yeah, the algorithm is O(n^2), tsk, tsk...)

        unassigned_steps = set(self.steps.keys())
        assigned_steps = set()
        self.topological_step_order = []
        while len(unassigned_steps) > 0:
            # choose all tasks which have all dependencies resolved, either
            # because they have no dependencies or are already assigned
            next_steps = []
            for step_name in unassigned_steps:
                is_ready = True
                for dep in self.steps[step_name].dependencies:
                    dep_name = dep.get_step_name()
                    if not dep_name in assigned_steps:
                        is_ready = False
                        break
                if is_ready:
                    next_steps.append(step_name)
            if len(next_steps) == 0:
                logger.error("%s: There is a cycle in the step dependencies." %
                             self.get_config_filepath())
                sys.exit(1)
            for step_name in misc.natsorted(next_steps):
                self.topological_step_order.append(step_name)
                assigned_steps.add(step_name)
                unassigned_steps.remove(step_name)

        # step four: finalize step
        for step in self.steps.values():
            step.finalize()
Пример #8
0
    def build_steps(self):
        self.steps = {}
        if 'steps' not in self.config:
            raise UAPError("Missing key: steps")
        re_simple_key = re.compile('^[a-zA-Z0-9_]+$')
        re_complex_key = re.compile(r'^([a-zA-Z0-9_]+)\s+\(([a-zA-Z0-9_]+)\)$')

        # step one: instantiate all steps
        for step_key, step_description in self.config['steps'].items():

            # the step keys in the configuration may be either:
            # - MODULE_NAME
            # - DIFFERENT_STEP_NAME\s+\(MODULE_NAME\)
            step_name = None
            module_name = None
            if re_simple_key.match(step_key):
                step_name = step_key
                module_name = step_key
            else:
                match = re_complex_key.match(step_key)
                if match:
                    step_name = match.group(1)
                    module_name = match.group(2)

            if step_name == 'temp':
                # A step cannot be named 'temp' because we need the out/temp
                # directory to store temporary files.
                raise UAPError("A step name cannot be 'temp'.")
            step_class = abstract_step.AbstractStep.get_step_class_for_key(
                module_name)
            step = step_class(self)

            step.set_step_name(step_name)
            step.set_options(step_description)

            self.steps[step_name] = step
            self.used_tools.update(step.used_tools)

        # step two: set dependencies
        for step_name, step in self.steps.items():
            for parent_step in step._options['_depends']:
                if parent_step not in self.steps.keys():
                    raise UAPError("Step %s specifies an undefined "
                                   "dependency: %s." %
                                   (step_name, parent_step))
                step.add_dependency(self.steps[parent_step])

        # step three: perform topological sort
        # if there's a cycle (yeah, the algorithm is O(n^2), tsk, tsk...)

        unassigned_steps = set(self.steps.keys())
        assigned_steps = set()
        self.topological_step_order = []
        while len(unassigned_steps) > 0:
            # choose all tasks which have all dependencies resolved, either
            # because they have no dependencies or are already assigned
            next_steps = []
            for step_name in unassigned_steps:
                is_ready = True
                step = self.steps[step_name]
                for dep in step.dependencies:
                    dep_name = dep.get_step_name()
                    if dep_name not in assigned_steps:
                        is_ready = False
                        break
                if is_ready and step.get_step_type() == 'source_controller':
                    # make sure source_controller attempts to run first
                    next_steps = [step_name]
                    break
                elif is_ready:
                    next_steps.append(step_name)
            if len(next_steps) == 0:
                raise UAPError("There is a cycle in the step dependencies.")
            for step_name in misc.natsorted(next_steps):
                self.topological_step_order.append(step_name)
                assigned_steps.add(step_name)
                unassigned_steps.remove(step_name)

        # step four: finalize step
        for step in self.steps.values():
            step.finalize()
Пример #9
0
    def __init__(self, **kwargs):
        self.caught_signal = None
        self._cluster_type = None
        self.git_version = None
        self.git_status = None
        self.git_diff = None
        self.git_untracked = None
        self.git_tag = None
        '''use git diff to determine any changes in git
        directory if git is available
        '''
        command = ['git', '--version']
        try:

            self.git_version = subprocess.check_output(command).strip()

        except subprocess.CalledProcessError:
            logger.warning("""Execution of %s failed. Git seems to be
                         unavailable. Continue anyways""" % " ".join(command))

        if self.git_version:
            command = ['git', 'status', '--porcelain']
            try:
                self.git_status = subprocess.check_output(command)
            except subprocess.CalledProcessError:
                logger.error("Execution of %s failed." % " ".join(command))

            command = ['git', 'diff', 'HEAD']
            try:
                self.git_diff = subprocess.check_output(command)
            except subprocess.CalledProcessError:
                logger.error("Execution of %s failed." % " ".join(command))

            command = ['git', 'ls-files', '--others', '--exclude-standard']
            try:
                self.git_untracked = subprocess.check_output(command)
            except subprocess.CalledProcessError:
                logger.error("Execution of %s failed." % " ".join(command))

            command = ['git', 'describe', '--all', '--long']
            try:
                self.git_tag = subprocess.check_output(command).strip()
            except subprocess.CalledProcessError:
                logger.error("Execution of %s failed." % " ".join(command))

            if self.git_diff:
                logger.warning('THE GIT REPOSITORY HAS UNCOMMITED CHANGES:\n'
                               '%s' % self.git_diff.decode('utf-8'))
            if self.git_untracked:
                logger.warning('THE GIT REPOSITORY HAS UNTRACKED FILES:\n'
                               '%s' % self.git_untracked.decode('utf-8'))
        """
        check if we got passed an 'arguments' parameter
        this parameter should contain a argparse.Namespace object
        """
        self.args = None
        if 'arguments' in kwargs:
            self.args = kwargs['arguments']
        '''
        Absolute path to the directory of the uap executable.
        It is used to circumvent path issues.
        '''
        self._uap_path = self.args.uap_path
        '''
        The cluster type to be used (must be one of the keys specified in
        cluster_config).
        '''
        self._cluster_config_path = os.path.join(
            self._uap_path, 'cluster/cluster-specific-commands.yaml')
        with open(self._cluster_config_path, 'r') as cluster_config_file:
            self._cluster_config = yaml.load(cluster_config_file,
                                             Loader=yaml.FullLoader)

        try:
            # set cluster type
            if self.args.cluster == 'auto':
                self.set_cluster_type(self.autodetect_cluster_type())
            else:
                self.set_cluster_type(self.args.cluster)
        except AttributeError:
            # cluster type is not an applicable parameter here, and that's fine
            # (we're probably in run-locally.py)
            pass

        self._start_working_dir = os.getcwd()
        '''
        User working directory.
        '''

        if not self.args.config:
            raise UAPError('No <project-config>.yaml specified.')
        self._config_path, self.config_name = os.path.split(
            self.args.config.name)
        '''
        Name of the YAML configuration file
        '''

        self._config_path = os.path.abspath(self._config_path)
        '''
        Path of the YAML configuration file
        '''

        self.config = dict()
        '''
        Dictionary representation of configuration YAML file.
        '''

        self.steps = dict()
        '''
        This dict stores step objects by their name. Each step knows his
        dependencies.
        '''

        self.topological_step_order = list()
        '''
        List with topologically ordered steps.
        '''

        self.file_dependencies = dict()
        '''
        This dict stores file dependencies within this pipeline, but regardless
        of step, output file tag or run ID. This dict has, for all output
        files generated by the pipeline, a set of input files that output
        file depends on.
        '''

        self.file_dependencies_reverse = dict()
        '''

        This dict stores file dependencies within this pipeline, but regardless
        of step, output file tag or run ID. This dict has, for all input
        files required by the pipeline, a set of output files which are generated
        using this input file.
        '''

        self.task_id_for_output_file = dict()
        '''
        This dict stores a task ID for every output file created by the pipeline.
        '''

        self.task_for_output_file = dict()
        '''
        This dict stores a task ID for every output file created by the pipeline.
        '''

        self.task_ids_for_input_file = dict()
        '''
        This dict stores a set of task IDs for every input file used in the
        pipeline.
        '''

        self.input_files_for_task_id = dict()
        '''
        This dict stores a set of input files for every task id in the pipeline.
        '''

        self.output_files_for_task_id = dict()
        '''
        This dict stores a set of output files for every task id in the pipeline.
        '''

        self.task_for_task_id = dict()
        '''
        This dict stores task objects by task IDs.
        '''

        self.all_tasks_topologically_sorted = list()
        '''
        List of all tasks in topological order.
        '''

        self.tasks_in_step = dict()
        '''
        This dict stores tasks per step name.
        '''

        self.used_tools = set()
        '''
        A set that stores all tools used by some step.
        '''

        self.known_config_keys = {
            'destination_path', 'constants', 'cluster', 'steps', 'lmod',
            'tools', 'base_working_directory', 'id'
        }
        '''
        A set of accepted keys in the config.
        '''

        self.read_config(self.args.config)
        self.setup_lmod()
        self.build_steps()

        configured_tools = set(tool
                               for tool, conf in self.config['tools'].items()
                               if not conf.get('atomatically_configured'))
        unused_tools = configured_tools - self.used_tools
        if unused_tools:
            logger.warning('Unused tool(s): %s' % list(unused_tools))

        # collect all tasks
        for step_name in self.topological_step_order:
            step = self.get_step(step_name)
            self.tasks_in_step[step_name] = list()
            logger.debug("Collect now all tasks for step: %s" % step)
            for run_index, run_id in enumerate(
                    misc.natsorted(step.get_run_ids())):
                task = task_module.Task(self, step, run_id, run_index)
                # if any run of a step contains an exec_groups,
                # the task (step/run) is added to the task list
                run = step.get_run(run_id)
                logger.debug("Step: %s, Run: %s" % (step, run_id))
                run_has_exec_groups = False
                if len(run.get_exec_groups()) > 0:
                    run_has_exec_groups = True
                if run_has_exec_groups:
                    logger.debug("Task: %s" % task)
                    self.all_tasks_topologically_sorted.append(task)
                    self.tasks_in_step[step_name].append(task)
                # Fail if multiple tasks with the same name exist
                if str(task) in self.task_for_task_id:
                    raise UAPError("Duplicate task ID %s." % task)
                self.task_for_task_id[str(task)] = task

        self.tool_versions = {}
        if not self.args.no_tool_checks:
            self.check_tools()