示例#1
0
    def add_step(self, step):
        """
        Add a step to a study.

        For this helper to be most effective, it recommended to apply steps in
        the order that they will be encountered. The method attempts to be
        intelligent and make the intended edge based on the 'depends' entry in
        a step. When adding steps out of order it's recommended to just use the
        base class DAG functionality and manually make connections.

         :param step: A StudyStep instance to be added to the Study instance.
        """
        # Add the node to the DAG.
        self.add_node(step.name, step)
        logger.info("Adding step '%s' to study '%s'...", step.name, self.name)
        # Apply the environment to the incoming step.
        step.__dict__ = \
            apply_function(step.__dict__, self.environment.apply_environment)

        # If the step depends on a prior step, create an edge.
        if "depends" in step.run and step.run["depends"]:
            for dependency in step.run["depends"]:
                logger.info("{0} is dependent on {1}. Creating edge ("
                            "{1}, {0})...".format(step.name, dependency))
                if "*" not in dependency:
                    self.add_edge(dependency, step.name)
                else:
                    self.add_edge(re.sub(ALL_COMBOS, "", dependency),
                                  step.name)
        else:
            # Otherwise, if no other dependency, just execute the step.
            self.add_edge(SOURCE, step.name)
示例#2
0
文件: study.py 项目: trws/maestrowf
    def setup(self, submission_attempts=1, restart_limit=1):
        """
        Method for executing initial setup of a Study.

        The method is used for going through and actually acquiring each
        dependency, substituting variables, sources and labels. Also sets up
        the folder structure for the study.

        :param submission_attempts: Number of attempted submissions before
        marking a step as failed.
        :param restart_limit: Upper limit on the number of times a step with
        a restart command can be resubmitted before it is considered failed.
        :returns: True if the Study is successfully setup, False otherwise.
        """
        # If the study has been set up, just return.
        if self._issetup:
            logger.info("%s is already set up, returning.")
            return True

        self._submission_attempts = submission_attempts
        self._restart_limit = restart_limit

        # Set up the directory structure.
        # TODO: fdinatal - As I implement the high level program (manager and
        # launcher in bin), I'm starting to have questions about whether or
        # not the study set up is the place to handle the output path... it
        # feels like the determination of the output path should be at the
        # higher level.
        out_name = "{}_{}".format(self.name.replace(" ", "_"),
                                  time.strftime("%Y%m%d-%H%M%S"))
        self.output.value = os.path.join(self.output.value, out_name)

        # Set up the environment if it hasn't been already.
        if not self.environment.is_set_up:
            logger.info("Environment is setting up.")
            self.environment.acquire_environment()

        try:
            create_parentdir(self.output.value)
        except Exception as e:
            logger.error(e.message)
            return False

        # Apply all environment artifcacts and acquire everything.
        for key, node in self.values.items():
            logger.info("Applying to step '%s' of the study '%s'...", key,
                        node)
            if node:
                node.__dict__ = apply_function(
                    node.__dict__, self.environment.apply_environment)

        # Flag the study as set up.
        self._issetup = True
        return True
示例#3
0
文件: study.py 项目: trws/maestrowf
    def apply_parameters(self, combo):
        """
        Apply a parameter combination to the StudyStep.

        :param combo: A Combination instance to be applied to a StudyStep.
        :returns: A new StudyStep instance with combo applied to its members.
        """
        # Create a new StudyStep and populate it with substituted values.
        tmp = StudyStep()
        tmp.__dict__ = apply_function(self.__dict__, combo.apply)
        # Return if the new step is modified and the step itself.
        return self.__ne__(tmp), tmp
示例#4
0
文件: study.py 项目: trws/maestrowf
    def _setup_parameterized(self):
        """
        Set up the ExecutionGraph of a parameterized study.

        :returns: The path to the study's global workspace and an expanded
        ExecutionGraph based on the parameters and parameterized workflow
        steps.
        """
        # Construct ExecutionGraph
        dag = ExecutionGraph()
        dag.add_description(**self.description)
        # Items to store that should be reset.
        global_workspace = self.output.value  # Highest ouput dir

        # Rework begins here:
        # First step, we need to map each workflow step to the parameters that
        # they actually use -- and only the parameters used. This setup will
        # make it so that workflows can be constructed with implicit stages.
        # That's to say that if a step only requires a subset of parameters,
        # we only need to run the set of combinations dictated by that subset.
        # NOTE: We're going to need to make a way for users to access the
        # workspaces of other steps. With this rework we won't be able to
        # assume that every directory has all parameters on it.
        used_params = {}
        workspaces = {}
        for parent, step, node in self.walk_study():
            # Source doesn't matter -- ignore it.
            if step == SOURCE:
                continue

            # Otherwise, we have a valid key.
            # We need to collect used parameters for two things:
            # 1. Collect the used parameters for the current step.
            # 2. Get the used parameters for the parent step.
            # The logic here is that the used parameters are going to be the
            # union of the used parameters for this step and ALL parent steps.
            # If we keep including the step's parent parameters, we will simply
            # carry parent parameters recursively.
            step_params = self.parameters.get_used_parameters(node)
            if parent != SOURCE:
                step_params |= used_params[parent]
            used_params[step] = step_params

        logger.debug("Used Parameters - \n%s", used_params)

        # Secondly, we need to now iterate over all combinations for each step
        # and simply apply the combination. We can then add the name to the
        # expanded map using only the parameters that we discovered above.
        for combo in self.parameters:
            # For each Combination in the parameters...
            logger.info("==================================================")
            logger.info("Expanding study '%s' for combination '%s'", self.name,
                        str(combo))
            logger.info("==================================================")

            # For each step in the Study
            # Walk the study and construct subtree based on the combination.
            for parent, step, node in self.walk_study():
                # If we find the source node, we can just add it and continue.
                if step == SOURCE:
                    logger.debug("Source node found.")
                    dag.add_node(SOURCE, None)
                    continue

                logger.debug("Processing step '%s'.", step)
                # Due to the rework, we now can get the parameters used. We no
                # longer have to blindly apply the parameters. In fact, better
                # if we don't know. We have to see if the name exists in the
                # DAG first. If it does we can skip the step. Otherwise, apply
                # and add.
                if used_params[step]:
                    logger.debug("Used parameters %s", used_params[step])
                    # Apply the used parameters to the step.
                    modified, step_exp = node.apply_parameters(combo)
                    # Name the step based on the parameters used.
                    combo_str = combo.get_param_string(used_params[step])
                    step_name = "{}_{}".format(step_exp.name, combo_str)
                    logger.debug(
                        "Step has been modified. Step '%s' renamed"
                        " to '%s'", step_exp.name, step_name)
                    step_exp.name = step_name
                    logger.debug("Resulting step name: %s", step_name)

                    # Set the workspace to the parameterized workspace
                    self.output.value = os.path.join(global_workspace,
                                                     combo_str)

                    # We now should account for varying workspace locations.
                    # Search for the use of workspaces in the command line so
                    # that we can go ahead and fill in the appropriate space
                    # for this combination.
                    cmd = step_exp.run["cmd"]
                    used_spaces = re.findall(WSREGEX, cmd)
                    for match in used_spaces:
                        logger.debug("Workspace found -- %s", match)
                        # Append the parameters that the step uses matching the
                        # current combo.
                        combo_str = combo.get_param_string(used_params[match])
                        logger.debug("Combo str -- %s", combo_str)
                        if combo_str:
                            _ = "{}_{}".format(match, combo_str)
                        else:
                            _ = match
                        # Replace the workspace tag in the command.
                        workspace_var = "$({}.workspace)".format(match)
                        cmd = cmd.replace(workspace_var, workspaces[_])
                        logger.debug("New cmd -- %s", cmd)
                    step_exp.run["cmd"] = cmd
                else:
                    # Otherwise, we know that this step is a joining node.
                    step_exp = copy.deepcopy(node)
                    modified = False
                    logger.debug("No parameters found. Resulting name %s",
                                 step_exp.name)
                    self.output.value = os.path.join(global_workspace)

                # Add the workspace name to the map of workspaces.
                workspaces[step_exp.name] = self.output.value

                # Now we need to make sure we handle the dependencies.
                # We know the parent and the step name (whether it's modified
                # or not and is not _source). So now there's two cases:
                #   1. If the ExecutionGraph contains the parent name as it
                #      exists without parameterization, then we know we have
                #      a hub/joining node.
                #   2. If the ExecutionGraph does not have the parent node,
                #      then our next assumption is that it has a parameterized
                #      version of the parent. We need to check and make sure.
                #   3. Fall back third case... Abort. Something is not right.
                if step_exp.run["restart"]:
                    rlimit = self._restart_limit
                else:
                    rlimit = 0

                if parent != SOURCE:
                    # With the rework, we now need to check the parent's used
                    # parmeters.
                    combo_str = combo.get_param_string(used_params[parent])
                    param_name = "{}_{}".format(parent, combo_str)
                    # If the parent node is not '_source', check.
                    if parent in dag.values:
                        # If the parent is in the dag, add the current step...
                        dag.add_step(step_exp.name, step_exp,
                                     self.output.value, rlimit)
                        # And its associated edge.
                        dag.add_edge(parent, step_exp.name)
                    elif param_name in dag.values:
                        # Find the index in the step for the dependency...
                        i = step_exp.run['depends'].index(parent)
                        # Sub it with parameterized dependency...
                        step_exp.run['depends'][i] = param_name
                        # Add the node and edge.
                        dag.add_step(step_exp.name, step_exp,
                                     self.output.value, rlimit)
                        dag.add_edge(param_name, step_exp.name)
                    else:
                        msg = "'{}' nor '{}' found in the ExecutionGraph. " \
                              "Unexpected error occurred." \
                              .format(parent, param_name)
                        logger.error(msg)
                        raise ValueError(msg)
                else:
                    # If the parent is source, then we can just execute it from
                    # '_source'.
                    dag.add_step(step_exp.name, step_exp, self.output.value,
                                 rlimit)
                    dag.add_edge(SOURCE, step_exp.name)

                # Go ahead and substitute in the output path and create the
                # workspace in the ExecutionGraph.
                create_parentdir(self.output.value)
                step_exp.__dict__ = apply_function(step_exp.__dict__,
                                                   self.output.substitute)

                # logging
                logger.debug("---------------- Modified --------------")
                logger.debug("Modified = %s", modified)
                logger.debug("step_exp = %s", step_exp.__dict__)
                logger.debug("----------------------------------------")

                # Reset the output path to the global_workspace.
                self.output.value = global_workspace
                logger.info(
                    "==================================================")

        return global_workspace, dag
示例#5
0
    def setup(self, submission_attempts=1, restart_limit=1, throttle=0,
              use_tmp=False):
        """
        Perform initial setup of a study.

        The method is used for going through and actually acquiring each
        dependency, substituting variables, sources and labels. Also sets up
        the folder structure for the study.

        :param submission_attempts: Number of attempted submissions before
            marking a step as failed.
        :param restart_limit: Upper limit on the number of times a step with
        a restart command can be resubmitted before it is considered failed.
        :param throttle: The maximum number of in-progress jobs allowed. [0
        denotes no cap].
        :param use_tmp: Boolean value specifying if the generated
        ExecutionGraph dumps its information into a temporary directory.
        :returns: True if the Study is successfully setup, False otherwise.
        """
        # If the study has been set up, just return.
        if self._issetup:
            logger.info("%s is already set up, returning.")
            return True

        self._submission_attempts = submission_attempts
        self._restart_limit = restart_limit
        self._submission_throttle = throttle
        self._use_tmp = use_tmp

        logger.info(
            "\n------------------------------------------\n"
            "Output path =               %s\n"
            "Submission attempts =       %d\n"
            "Submission restart limit =  %d\n"
            "Submission throttle limit = %d\n"
            "Use temporary directory =   %s\n"
            "------------------------------------------",
            self._out_path, submission_attempts, restart_limit, throttle,
            use_tmp
        )

        # Set up the environment if it hasn't been already.
        if not self.environment.is_set_up:
            logger.info("Environment is setting up.")
            self.environment.acquire_environment()

        try:
            logger.info("Environment is setting up.")
            create_parentdir(self._out_path)
        except Exception as e:
            logger.error(e.message)
            return False

        # Apply all environment artifcacts and acquire everything.
        for key, node in self.values.items():
            logger.info("Applying to step '%s' of the study '%s'...",
                        key, node)
            if node:
                node.__dict__ = apply_function(
                                    node.__dict__,
                                    self.environment.apply_environment)

        # Flag the study as set up.
        self._issetup = True
        return True