示例#1
0
 def _get_conf(self, r_node, t_node, key, compulsory=False, default=None):
     """Return the value of a configuration."""
     value = t_node.get_value([key],
                              r_node.get_value([self.SECTION, key],
                                               default=default))
     if compulsory and not value:
         raise CompulsoryConfigValueError([key], None, KeyError(key))
     if value:
         try:
             value = env_var_process(value)
         except UnboundEnvironmentVariableError as exc:
             raise ConfigValueError([key], value, exc)
     return value
示例#2
0
    def run(self, app_runner, conf_tree, opts, args, uuid, work_files):
        """ Run multiple instaces of a command using sets of specified args"""

        # Counts for reporting purposes
        run_ok = 0
        run_fail = 0
        run_skip = 0
        notrun = 0

        # Allow naming of individual calls
        self.invocation_names = conf_tree.node.get_value(
            [self.BUNCH_SECTION, "names"])
        if self.invocation_names:
            self.invocation_names = shlex.split(
                rose.env.env_var_process(self.invocation_names))
            if len(set(self.invocation_names)) != len(self.invocation_names):
                raise ConfigValueError([self.BUNCH_SECTION, "names"],
                                       self.invocation_names,
                                       "names must be unique")

        self.fail_mode = rose.env.env_var_process(
            conf_tree.node.get_value([self.BUNCH_SECTION, "fail-mode"],
                                     self.TYPE_CONTINUE_ON_FAIL))

        if self.fail_mode not in self.FAIL_MODE_TYPES:
            raise ConfigValueError([self.BUNCH_SECTION, "fail-mode"],
                                   fail_mode, "not a valid setting")

        self.incremental = conf_tree.node.get_value(
            [self.BUNCH_SECTION, "incremental"], "true")
        if self.incremental:
            self.incremental = rose.env.env_var_process(self.incremental)

        multi_args = conf_tree.node.get_value([self.ARGS_SECTION], {})
        for key, val in multi_args.items():
            multi_args[key].value = rose.env.env_var_process(val.value)

        self.command_format = rose.env.env_var_process(
            conf_tree.node.get_value([self.BUNCH_SECTION, "command-format"]))

        if not self.command_format:
            raise CompulsoryConfigValueError(
                [self.BUNCH_SECTION, "command-format"], None,
                KeyError("command-format"))

        # Set up command-instances if needed
        instances = conf_tree.node.get_value(
            [self.BUNCH_SECTION, "command-instances"])

        if instances:
            try:
                instances = range(int(rose.env.env_var_process(instances)))
            except ValueError:
                raise ConfigValueError(
                    [self.BUNCH_SECTION, "command-instances"], instances,
                    "not an integer value")

        # Validate runlists
        if not self.invocation_names:
            if instances:
                arglength = len(instances)
            else:
                item, val = sorted(multi_args.items())[0]
                arglength = len(shlex.split(val.value))
            self.invocation_names = range(0, arglength)
        else:
            arglength = len(self.invocation_names)

        for item, val in sorted(multi_args.items()):
            if len(shlex.split(val.value)) != arglength:
                raise ConfigValueError([self.ARGS_SECTION, item],
                                       conf_tree.node.get_value(
                                           [self.ARGS_SECTION, item]),
                                       "inconsistent arg lengths")

        if conf_tree.node.get_value([self.ARGS_SECTION, "command-instances"]):
            raise ConfigValueError([self.ARGS_SECTION, "command-instances"],
                                   conf_tree.node.get_value([
                                       self.ARGS_SECTION, "command-instances"
                                   ]), "reserved keyword")

        if instances and arglength != len(instances):
            raise ConfigValueError([self.BUNCH_SECTION, "command-instances"],
                                   instances, "inconsistent arg lengths")

        # Set max number of processes to run at once
        max_procs = conf_tree.node.get_value([self.BUNCH_SECTION, "pool-size"])

        if max_procs:
            self.MAX_PROCS = int(rose.env.env_var_process(max_procs))
        else:
            self.MAX_PROCS = arglength

        if self.incremental == "true":
            self.dao = RoseBunchDAO(conf_tree)
        else:
            self.dao = None

        commands = {}
        for index, name in enumerate(self.invocation_names):
            invocation = RoseBunchCmd(name, self.command_format, index)
            for key, vals in sorted(multi_args.items()):
                invocation.argsdict[key] = shlex.split(vals.value)[index]
            if instances:
                invocation.argsdict["command-instances"] = instances[index]
            commands[name] = invocation

        procs = {}
        if 'ROSE_TASK_LOG_DIR' in os.environ:
            log_format = os.path.join(os.environ['ROSE_TASK_LOG_DIR'], "%s")
        else:
            log_format = os.path.join(os.getcwd(), "%s")

        failed = {}
        abort = False

        while procs or (commands and not abort):
            for key, proc in procs.items():
                if proc.poll() is not None:
                    procs.pop(key)
                    if proc.returncode:
                        failed[key] = proc.returncode
                        run_fail += 1
                        app_runner.handle_event(
                            RosePopenError(str(key), proc.returncode, None,
                                           None))
                        if self.dao:
                            self.dao.update_command_state(key, self.dao.S_FAIL)
                        if self.fail_mode == self.TYPE_ABORT_ON_FAIL:
                            abort = True
                            app_runner.handle_event(AbortEvent())
                    else:
                        run_ok += 1
                        app_runner.handle_event(SucceededEvent(key),
                                                prefix=self.PREFIX_OK)
                        if self.dao:
                            self.dao.update_command_state(key, self.dao.S_PASS)

            while len(procs) < self.MAX_PROCS and commands and not abort:
                key = self.invocation_names[0]
                command = commands.pop(key)
                self.invocation_names.pop(0)
                cmd = command.get_command()
                cmd_stdout = log_format % command.get_out_file()
                cmd_stderr = log_format % command.get_err_file()
                prefix = command.get_log_prefix()
                bunch_environ = os.environ
                bunch_environ['ROSE_BUNCH_LOG_PREFIX'] = prefix

                if self.dao:
                    if self.dao.check_has_succeeded(key):
                        run_skip += 1
                        app_runner.handle_event(PreviousSuccessEvent(key),
                                                prefix=self.PREFIX_PASS)
                        continue
                    else:
                        self.dao.add_command(key)

                app_runner.handle_event(LaunchEvent(key, cmd))
                procs[key] = \
                    app_runner.popen.run_bg(cmd,
                                            shell=True,
                                            stdout=open(cmd_stdout, 'w'),
                                            stderr=open(cmd_stderr, 'w'),
                                            env=bunch_environ)
            sleep(self.SLEEP_DURATION)

        if abort and commands:
            for key in self.invocation_names:
                notrun += 1
                cmd = commands.pop(key).get_command()
                app_runner.handle_event(NotRunEvent(key, cmd),
                                        prefix=self.PREFIX_NOTRUN)

        if self.dao:
            self.dao.close()

        # Report summary data in job.out file
        app_runner.handle_event(
            SummaryEvent(run_ok, run_fail, run_skip, notrun))

        if failed:
            return 1
        else:
            return 0