Exemplo n.º 1
0
    def run_impl(self, opts, args, uuid, work_files):
        # Log file, temporary
        if hasattr(self.event_handler, "contexts"):
            t_file = TemporaryFile()
            log_context = ReporterContext(None, self.event_handler.VV, t_file)
            self.event_handler.contexts[uuid] = log_context

        # Check suite engine specific compatibility
        self.suite_engine_proc.check_global_conf_compat()

        # Suite name from the current working directory
        if opts.conf_dir:
            self.fs_util.chdir(opts.conf_dir)
        opts.conf_dir = os.getcwd()

        # --remote=KEY=VALUE,...
        if opts.remote:
            # opts.name always set for remote.
            return self._run_remote(opts, opts.name)

        conf_tree = self.config_load(opts)
        self.fs_util.chdir(conf_tree.conf_dirs[0])

        suite_name = opts.name
        if not opts.name:
            suite_name = os.path.basename(os.getcwd())

        # Check suite.rc #! line for template scheme
        templ_scheme = "jinja2"
        if self.suite_engine_proc.SUITE_CONF in conf_tree.files:
            suiterc_path = os.path.join(
                conf_tree.files[self.suite_engine_proc.SUITE_CONF],
                self.suite_engine_proc.SUITE_CONF)
            with open(suiterc_path) as fh:
                line = fh.readline()
                if line.startswith("#!"):
                    templ_scheme = line[2:].strip().lower()
        suite_section = (templ_scheme + ':' +
                         self.suite_engine_proc.SUITE_CONF)

        extra_defines = []
        if opts.defines_suite:
            for define in opts.defines_suite:
                extra_defines.append("[" + suite_section + "]" + define)

        # Automatic Rose constants
        # ROSE_ORIG_HOST: originating host
        # ROSE_VERSION: Rose version (not retained in run_mode=="reload")
        # Suite engine version
        my_rose_version = ResourceLocator.default().get_version()
        suite_engine_key = self.suite_engine_proc.get_version_env_name()
        if opts.run_mode in ["reload", "restart"]:
            prev_config_path = self.suite_engine_proc.get_suite_dir(
                suite_name, "log", "rose-suite-run.conf")
            prev_config = ConfigLoader()(prev_config_path)
            suite_engine_version = prev_config.get_value(
                ["env", suite_engine_key])
        else:
            suite_engine_version =\
                self.suite_engine_proc.get_version().decode()
        resloc = ResourceLocator.default()
        auto_items = [(suite_engine_key, suite_engine_version),
                      ("ROSE_ORIG_HOST", self.host_selector.get_local_host()),
                      ("ROSE_SITE", resloc.get_conf().get_value(['site'], '')),
                      ("ROSE_VERSION", resloc.get_version())]
        for key, val in auto_items:
            requested_value = conf_tree.node.get_value(["env", key])
            if requested_value:
                if key == "ROSE_VERSION" and val != requested_value:
                    exc = VersionMismatchError(requested_value, val)
                    raise ConfigValueError(["env", key], requested_value, exc)
                val = requested_value
            else:
                conf_tree.node.set(["env", key],
                                   val,
                                   state=conf_tree.node.STATE_NORMAL)
            extra_defines.append('[%s]%s="%s"' % (suite_section, key, val))

        # Pass automatic Rose constants as suite defines
        self.conf_tree_loader.node_loader.load(extra_defines, conf_tree.node)

        # See if suite is running or not
        if opts.run_mode == "reload":
            # Check suite is running
            self.suite_engine_proc.get_suite_contact(suite_name)
        else:
            self.suite_engine_proc.check_suite_not_running(suite_name)

        # Install the suite to its run location
        suite_dir_rel = self._suite_dir_rel(suite_name)

        # Unfortunately a large try/finally block to ensure a temporary folder
        # created in validate only mode is cleaned up. Exceptions are not
        # caught here
        try:
            # Process Environment Variables
            environ = self.config_pm(conf_tree, "env")

            if opts.validate_suite_only_mode:
                temp_dir = mkdtemp()
                suite_dir = os.path.join(temp_dir, suite_dir_rel)
                os.makedirs(suite_dir, 0o0700)
            else:
                suite_dir = os.path.join(os.path.expanduser("~"),
                                         suite_dir_rel)

            suite_conf_dir = os.getcwd()
            locs_conf = ConfigNode()
            if opts.new_mode:
                if os.getcwd() == suite_dir:
                    raise NewModeError("PWD", os.getcwd())
                elif opts.run_mode in ["reload", "restart"]:
                    raise NewModeError("--run", opts.run_mode)
                self.suite_run_cleaner.clean(suite_name)
            if os.getcwd() != suite_dir:
                if opts.run_mode == "run":
                    self._run_init_dir(opts,
                                       suite_name,
                                       conf_tree,
                                       locs_conf=locs_conf)
                os.chdir(suite_dir)

            # Housekeep log files
            now_str = None
            if not opts.install_only_mode and not opts.local_install_only_mode:
                now_str = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
                self._run_init_dir_log(opts, now_str)
            self.fs_util.makedirs("log/suite")

            # Rose configuration and version logs
            self.fs_util.makedirs("log/rose-conf")
            run_mode = opts.run_mode
            if run_mode not in ["reload", "restart", "run"]:
                run_mode = "run"
            mode = run_mode
            if opts.validate_suite_only_mode:
                mode = "validate-suite-only"
            elif opts.install_only_mode:
                mode = "install-only"
            elif opts.local_install_only_mode:
                mode = "local-install-only"
            prefix = "rose-conf/%s-%s" % (strftime("%Y%m%dT%H%M%S"), mode)

            # Dump the actual configuration as rose-suite-run.conf
            ConfigDumper()(conf_tree.node, "log/" + prefix + ".conf")

            # Install version information file
            write_source_vc_info(suite_conf_dir, "log/" + prefix + ".version",
                                 self.popen)

            # If run through rose-stem, install version information
            # files for each source tree if they're a working copy
            if hasattr(opts, 'source') and hasattr(opts, 'project'):
                for i, url in enumerate(opts.source):
                    if os.path.isdir(url):
                        write_source_vc_info(
                            url, "log/" + opts.project[i] + "-" + str(i) +
                            ".version", self.popen)

            for ext in [".conf", ".version"]:
                self.fs_util.symlink(prefix + ext, "log/rose-suite-run" + ext)

            # Move temporary log to permanent log
            if hasattr(self.event_handler, "contexts"):
                log_file_path = os.path.abspath(
                    os.path.join("log", "rose-suite-run.log"))
                log_file = open(log_file_path, "ab")
                temp_log_file = self.event_handler.contexts[uuid].handle
                temp_log_file.seek(0)
                log_file.write(temp_log_file.read())
                self.event_handler.contexts[uuid].handle = log_file
                temp_log_file.close()

            # Process Files
            cwd = os.getcwd()
            for rel_path, conf_dir in conf_tree.files.items():
                if (conf_dir == cwd or any(
                        fnmatchcase(os.sep + rel_path, exclude)
                        for exclude in self.SYNC_EXCLUDES)
                        or conf_tree.node.get([templ_scheme + ":" + rel_path
                                               ]) is not None):
                    continue
                # No sub-directories, very slow otherwise
                if os.sep in rel_path:
                    rel_path = rel_path.split(os.sep, 1)[0]
                target_key = self.config_pm.get_handler(
                    "file").PREFIX + rel_path
                target_node = conf_tree.node.get([target_key])
                if target_node is None:
                    conf_tree.node.set([target_key])
                    target_node = conf_tree.node.get([target_key])
                elif target_node.is_ignored():
                    continue
                source_node = target_node.get("source")
                if source_node is None:
                    target_node.set(["source"],
                                    os.path.join(conf_dir, rel_path))
                elif source_node.is_ignored():
                    continue
            self.config_pm(conf_tree,
                           "file",
                           no_overwrite_mode=opts.no_overwrite_mode)

            # Process suite configuration template header
            # (e.g. Jinja2:suite.rc, EmPy:suite.rc)
            self.config_pm(conf_tree, templ_scheme, environ=environ)

            # Ask suite engine to parse suite configuration
            # and determine if it is up to date (unchanged)
            if opts.validate_suite_only_mode:
                suite_conf_unchanged = self.suite_engine_proc.cmp_suite_conf(
                    suite_dir, None, opts.strict_mode, debug_mode=True)
            else:
                suite_conf_unchanged = self.suite_engine_proc.cmp_suite_conf(
                    suite_name, opts.run_mode, opts.strict_mode,
                    opts.debug_mode)
        finally:
            # Ensure the temporary directory created is cleaned up regardless
            # of success or failure
            if opts.validate_suite_only_mode and os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)

        # Only validating so finish now
        if opts.validate_suite_only_mode:
            return

        # Install share/work directories (local)
        for name in ["share", "share/cycle", "work"]:
            self._run_init_dir_work(opts,
                                    suite_name,
                                    name,
                                    conf_tree,
                                    locs_conf=locs_conf)

        if opts.local_install_only_mode:
            return

        # Install suite files to each remote [user@]host
        for name in ["", "log/", "share/", "share/cycle/", "work/"]:
            uuid_file = os.path.abspath(name + uuid)
            open(uuid_file, "w").close()
            work_files.append(uuid_file)

        # Install items to user@host
        auths = self.suite_engine_proc.get_tasks_auths(suite_name)
        proc_queue = []  # [[proc, command, "ssh"|"rsync", auth], ...]
        for auth in sorted(auths):
            host = auth
            if "@" in auth:
                host = auth.split("@", 1)[1]
            # Remote shell
            command = self.popen.get_cmd("ssh", "-n", auth)
            # Provide ROSE_VERSION and CYLC_VERSION in the environment
            shcommand = "env ROSE_VERSION=%s %s=%s" % (
                my_rose_version, suite_engine_key, suite_engine_version)
            # Use login shell?
            no_login_shell = self._run_conf("remote-no-login-shell",
                                            host=host,
                                            conf_tree=conf_tree)
            if not no_login_shell or no_login_shell.lower() != "true":
                shcommand += r""" bash -l -c '"$0" "$@"'"""
            # Path to "rose" command, if applicable
            rose_bin = self._run_conf("remote-rose-bin",
                                      host=host,
                                      conf_tree=conf_tree,
                                      default="rose")
            # Build remote "rose suite-run" command
            shcommand += " %s suite-run -vv -n %s" % (rose_bin, suite_name)
            for key in ["new", "debug", "install-only"]:
                attr = key.replace("-", "_") + "_mode"
                if getattr(opts, attr, None) is not None:
                    shcommand += " --%s" % key
            if opts.log_keep:
                shcommand += " --log-keep=%s" % opts.log_keep
            if opts.log_name:
                shcommand += " --log-name=%s" % opts.log_name
            if not opts.log_archive_mode:
                shcommand += " --no-log-archive"
            shcommand += " --run=%s" % opts.run_mode
            # Build --remote= option
            shcommand += " --remote=uuid=%s" % uuid
            if now_str is not None:
                shcommand += ",now-str=%s" % now_str
            host_confs = [
                "root-dir", "root-dir{share}", "root-dir{share/cycle}",
                "root-dir{work}"
            ]
            locs_conf.set([auth])
            for key in host_confs:
                value = self._run_conf(key, host=host, conf_tree=conf_tree)
                if value is not None:
                    val = self.popen.list_to_shell_str([str(value)])
                    shcommand += ",%s=%s" % (key, pipes.quote(val))
                    locs_conf.set([auth, key], value)
            command.append(shcommand)
            proc = self.popen.run_bg(*command)
            proc_queue.append([proc, command, "ssh", auth])

        while proc_queue:
            sleep(self.SLEEP_PIPE)
            proc, command, command_name, auth = proc_queue.pop(0)
            if proc.poll() is None:  # put it back in proc_queue
                proc_queue.append([proc, command, command_name, auth])
                continue
            ret_code = proc.wait()
            out, err = proc.communicate()
            ret_code, out, err = [
                i.decode() if isinstance(i, bytes) else i
                for i in [ret_code, out, err]
            ]
            if ret_code:
                raise RosePopenError(command, ret_code, out, err)
            if command_name == "rsync":
                self.handle_event(out, level=Event.VV)
                continue
            else:
                self.handle_event(out, level=Event.VV, prefix="[%s] " % auth)
            for line in out.split("\n"):
                if "/" + uuid == line.strip():
                    locs_conf.unset([auth])
                    break
            else:
                filters = {"excludes": [], "includes": []}
                for name in ["", "log/", "share/", "share/cycle/", "work/"]:
                    filters["excludes"].append(name + uuid)
                target = auth + ":" + suite_dir_rel
                cmd = self._get_cmd_rsync(target, **filters)
                proc_queue.append(
                    [self.popen.run_bg(*cmd), cmd, "rsync", auth])

        # Install ends
        ConfigDumper()(locs_conf, os.path.join("log", "rose-suite-run.locs"))
        if opts.install_only_mode:
            return
        elif opts.run_mode == "reload" and suite_conf_unchanged:
            conf_name = self.suite_engine_proc.SUITE_CONF
            self.handle_event(SkipReloadEvent(suite_name, conf_name))
            return

        # Start the suite
        self.fs_util.chdir("log")
        self.suite_engine_proc.run(suite_name, opts.host, opts.run_mode, args)

        # Disconnect log file handle, so monitoring tool command will no longer
        # be associated with the log file.
        self.event_handler.contexts[uuid].handle.close()
        self.event_handler.contexts.pop(uuid)

        return 0
Exemplo n.º 2
0
    def run(self, app_runner, conf_tree, opts, args, uuid, work_files):
        """ Run multiple instances of a command using sets of specified args"""

        # Counts for reporting purposes
        run_ok = 0
        run_fail = 0
        run_skip = 0
        notrun = 0

        # Allow naming of individual calls
        self.invocation_names = conf_tree.node.get_value([self.BUNCH_SECTION,
                                                         "names"])
        if self.invocation_names:
            self.invocation_names = shlex.split(
                metomi.rose.env.env_var_process(self.invocation_names))
            if len(set(self.invocation_names)) != len(self.invocation_names):
                raise ConfigValueError([self.BUNCH_SECTION, "names"],
                                       self.invocation_names,
                                       "names must be unique")

        self.fail_mode = metomi.rose.env.env_var_process(
            conf_tree.node.get_value(
                [self.BUNCH_SECTION, "fail-mode"], self.TYPE_CONTINUE_ON_FAIL))

        if self.fail_mode not in self.FAIL_MODE_TYPES:
            raise ConfigValueError([self.BUNCH_SECTION, "fail-mode"],
                                   self.fail_mode,
                                   "not a valid setting")

        self.incremental = conf_tree.node.get_value([self.BUNCH_SECTION,
                                                    "incremental"],
                                                    "true")
        if self.incremental:
            self.incremental = metomi.rose.env.env_var_process(
                self.incremental)

        self.isformatted = True
        self.command = metomi.rose.env.env_var_process(
            conf_tree.node.get_value([self.BUNCH_SECTION, "command-format"]))

        if not self.command:
            self.isformatted = False
            self.command = app_runner.get_command(conf_tree, opts, args)

        if not self.command:
            raise CommandNotDefinedError()

        # Set up command-instances if needed
        instances = conf_tree.node.get_value([self.BUNCH_SECTION,
                                              "command-instances"])

        if instances:
            try:
                instances = range(
                    int(metomi.rose.env.env_var_process(instances)))
            except ValueError:
                raise ConfigValueError([self.BUNCH_SECTION,
                                        "command-instances"],
                                       instances,
                                       "not an integer value")

        # Argument lists
        multi_args = conf_tree.node.get_value([self.ARGS_SECTION], {})
        bunch_args_names = []
        bunch_args_values = []
        for key, val in multi_args.items():
            bunch_args_names.append(key)
            bunch_args_values.append(
                shlex.split(metomi.rose.env.env_var_process(val.value)))

        # Update the argument values based on the argument-mode
        argument_mode = conf_tree.node.get_value([self.BUNCH_SECTION,
                                                  "argument-mode"],
                                                 self.DEFAULT_ARGUMENT_MODE)
        if argument_mode == self.DEFAULT_ARGUMENT_MODE:
            pass
        elif argument_mode in self.ACCEPTED_ARGUMENT_MODES:
            # The behaviour of of izip and izip_longest are special cases
            # because:
            # * izip was deprecated in Python3 use zip
            # * itertools.izip_longest was renamed and requires the fillvalue
            #     kwarg
            if argument_mode in ['zip', 'izip']:
                _permutations = zip(*bunch_args_values)
            elif argument_mode in ['zip_longest', 'izip_longest']:
                _permutations = itertools.zip_longest(*bunch_args_values,
                                                      fillvalue="")
            else:
                iteration_cmd = getattr(itertools, argument_mode)
                _permutations = iteration_cmd(*bunch_args_values)

            # Reconstruct the bunch_args_values
            _permutations = list(_permutations)
            for index, _ in enumerate(bunch_args_values):
                bunch_args_values[index] = [v[index] for v in _permutations]
        else:
            raise ConfigValueError([self.BUNCH_SECTION,
                                    "argument-mode"],
                                   argument_mode,
                                   "must be one of %s" %
                                   self.ACCEPTED_ARGUMENT_MODES)

        # Validate runlists
        if not self.invocation_names:
            if instances:
                arglength = len(instances)
            else:
                arglength = len(bunch_args_values[0])
            self.invocation_names = list(range(0, arglength))
        else:
            arglength = len(self.invocation_names)

        for item, vals in zip(bunch_args_names, bunch_args_values):
            if len(vals) != arglength:
                raise ConfigValueError([self.ARGS_SECTION, item],
                                       conf_tree.node.get_value(
                                       [self.ARGS_SECTION, item]),
                                       "inconsistent arg lengths")

        if conf_tree.node.get_value([self.ARGS_SECTION, "command-instances"]):
            raise ConfigValueError([self.ARGS_SECTION, "command-instances"],
                                   conf_tree.node.get_value(
                                   [self.ARGS_SECTION, "command-instances"]),
                                   "reserved keyword")

        if conf_tree.node.get_value([self.ARGS_SECTION, "COMMAND_INSTANCES"]):
            raise ConfigValueError([self.ARGS_SECTION, "COMMAND_INSTANCES"],
                                   conf_tree.node.get_value(
                                   [self.ARGS_SECTION, "COMMAND_INSTANCES"]),
                                   "reserved keyword")

        if instances and arglength != len(instances):
            raise ConfigValueError([self.BUNCH_SECTION, "command-instances"],
                                   instances, "inconsistent arg lengths")

        # Set max number of processes to run at once
        max_procs = conf_tree.node.get_value([self.BUNCH_SECTION, "pool-size"])

        if max_procs:
            max_procs = int(metomi.rose.env.env_var_process(max_procs))
        else:
            max_procs = arglength

        if self.incremental == "true":
            self.dao = RoseBunchDAO(conf_tree)
        else:
            self.dao = None

        commands = {}
        for vals in zip(range(arglength), self.invocation_names,
                        *bunch_args_values):
            index, name, bunch_args_vals = vals[0], vals[1], vals[2:]
            argsdict = dict(zip(bunch_args_names, bunch_args_vals))
            if instances:
                if self.isformatted:
                    argsdict["command-instances"] = instances[index]
                else:
                    argsdict["COMMAND_INSTANCES"] = str(instances[index])
            commands[name] = RoseBunchCmd(name, self.command, argsdict,
                                          self.isformatted)

        procs = {}
        if 'ROSE_TASK_LOG_DIR' in os.environ:
            log_format = os.path.join(os.environ['ROSE_TASK_LOG_DIR'], "%s")
        else:
            log_format = os.path.join(os.getcwd(), "%s")

        failed = {}
        abort = False

        while procs or (commands and not abort):
            for key, proc in list(procs.items()):
                if proc.poll() is not None:
                    procs.pop(key)
                    if proc.returncode:
                        failed[key] = proc.returncode
                        run_fail += 1
                        app_runner.handle_event(RosePopenError(str(key),
                                                proc.returncode,
                                                None, None))
                        if self.dao:
                            self.dao.update_command_state(key, self.dao.S_FAIL)
                        if self.fail_mode == self.TYPE_ABORT_ON_FAIL:
                            abort = True
                            app_runner.handle_event(AbortEvent())
                    else:
                        run_ok += 1
                        app_runner.handle_event(SucceededEvent(key),
                                                prefix=self.PREFIX_OK)
                        if self.dao:
                            self.dao.update_command_state(key, self.dao.S_PASS)

            while len(procs) < max_procs and commands and not abort:
                key = self.invocation_names[0]
                command = commands.pop(key)
                self.invocation_names.pop(0)
                cmd = command.get_command()
                cmd_stdout = log_format % command.get_out_file()
                cmd_stderr = log_format % command.get_err_file()
                prefix = command.get_log_prefix()
                bunch_environ = os.environ
                if not command.isformatted:
                    bunch_environ.update(command.argsdict)
                bunch_environ['ROSE_BUNCH_LOG_PREFIX'] = prefix

                if self.dao:
                    if self.dao.check_has_succeeded(key):
                        run_skip += 1
                        app_runner.handle_event(PreviousSuccessEvent(key),
                                                prefix=self.PREFIX_PASS)
                        continue
                    else:
                        self.dao.add_command(key)

                app_runner.handle_event(LaunchEvent(key, cmd))
                procs[key] = app_runner.popen.run_bg(
                    cmd,
                    shell=True,
                    stdout=open(cmd_stdout, 'w'),
                    stderr=open(cmd_stderr, 'w'),
                    env=bunch_environ)

            sleep(self.SLEEP_DURATION)

        if abort and commands:
            for key in self.invocation_names:
                notrun += 1
                cmd = commands.pop(key).get_command()
                app_runner.handle_event(NotRunEvent(key, cmd),
                                        prefix=self.PREFIX_NOTRUN)

        if self.dao:
            self.dao.close()

        # Report summary data in job.out file
        app_runner.handle_event(SummaryEvent(
                                run_ok, run_fail, run_skip, notrun))

        if failed:
            return 1
        else:
            return 0
Exemplo n.º 3
0
 def _run_target_update(cls, dao, app_runner, compress_manager, target):
     """Helper for _run. Update a target."""
     if target.status == target.ST_OLD:
         app_runner.handle_event(RoseArchEvent(target))
         return
     if target.status in (target.ST_BAD, target.ST_NULL):
         # boolean to int
         target.command_rc = int(target.status == target.ST_BAD)
         if target.status == target.ST_BAD:
             level = Event.FAIL
         else:
             level = Event.DEFAULT
         event = RoseArchEvent(target)
         app_runner.handle_event(event)
         app_runner.handle_event(event, kind=Event.KIND_ERR, level=level)
         return
     target.command_rc = 1
     dao.insert(target)
     work_dir = mkdtemp()
     times = [time()] * 3  # init, transformed, archived
     ret_code = None
     try:
         # Rename/edit sources
         target.status = target.ST_BAD
         rename_required = False
         for source in target.sources.values():
             if source.name != source.orig_name:
                 rename_required = True
                 break
         if rename_required or target.source_edit_format:
             for source in target.sources.values():
                 source.path = os.path.join(work_dir, source.name)
                 app_runner.fs_util.makedirs(os.path.dirname(source.path))
                 if target.source_edit_format:
                     command = target.source_edit_format % {
                         "in": source.orig_path,
                         "out": source.path
                     }
                     app_runner.popen.run_ok(command, shell=True)
                 else:
                     app_runner.fs_util.symlink(source.orig_path,
                                                source.path)
         # Compress sources
         if target.compress_scheme:
             handler = compress_manager.get_handler(target.compress_scheme)
             handler.compress_sources(target, work_dir)
         times[1] = time()  # transformed time
         # Run archive command
         sources = []
         if target.work_source_path:
             sources = [target.work_source_path]
         else:
             for source in target.sources.values():
                 sources.append(source.path)
         command = target.command_format % {
             "sources": app_runner.popen.list_to_shell_str(sources),
             "target": app_runner.popen.list_to_shell_str([target.name])
         }
         ret_code, out, err = app_runner.popen.run(command, shell=True)
         if isinstance(out, bytes):
             out, err = out.decode(), err.decode()
         times[2] = time()  # archived time
         if ret_code:
             app_runner.handle_event(
                 RosePopenError([command], ret_code, out, err))
         else:
             target.status = target.ST_NEW
             app_runner.handle_event(err, kind=Event.KIND_ERR)
         app_runner.handle_event(out)
         target.command_rc = ret_code
         dao.update_command_rc(target)
     finally:
         app_runner.fs_util.delete(work_dir)
         event = RoseArchEvent(target, times, ret_code)
         app_runner.handle_event(event)
         if target.status in (target.ST_BAD, target.ST_NULL):
             app_runner.handle_event(event,
                                     kind=Event.KIND_ERR,
                                     level=Event.FAIL)