def run_impl(self, opts, args, uuid, work_files): # Log file, temporary if hasattr(self.event_handler, "contexts"): t_file = TemporaryFile() log_context = ReporterContext(None, self.event_handler.VV, t_file) self.event_handler.contexts[uuid] = log_context # Check suite engine specific compatibility self.suite_engine_proc.check_global_conf_compat() # Suite name from the current working directory if opts.conf_dir: self.fs_util.chdir(opts.conf_dir) opts.conf_dir = os.getcwd() # --remote=KEY=VALUE,... if opts.remote: # opts.name always set for remote. return self._run_remote(opts, opts.name) conf_tree = self.config_load(opts) self.fs_util.chdir(conf_tree.conf_dirs[0]) suite_name = opts.name if not opts.name: suite_name = os.path.basename(os.getcwd()) # Check suite.rc #! line for template scheme templ_scheme = "jinja2" if self.suite_engine_proc.SUITE_CONF in conf_tree.files: suiterc_path = os.path.join( conf_tree.files[self.suite_engine_proc.SUITE_CONF], self.suite_engine_proc.SUITE_CONF) with open(suiterc_path) as fh: line = fh.readline() if line.startswith("#!"): templ_scheme = line[2:].strip().lower() suite_section = (templ_scheme + ':' + self.suite_engine_proc.SUITE_CONF) extra_defines = [] if opts.defines_suite: for define in opts.defines_suite: extra_defines.append("[" + suite_section + "]" + define) # Automatic Rose constants # ROSE_ORIG_HOST: originating host # ROSE_VERSION: Rose version (not retained in run_mode=="reload") # Suite engine version my_rose_version = ResourceLocator.default().get_version() suite_engine_key = self.suite_engine_proc.get_version_env_name() if opts.run_mode in ["reload", "restart"]: prev_config_path = self.suite_engine_proc.get_suite_dir( suite_name, "log", "rose-suite-run.conf") prev_config = ConfigLoader()(prev_config_path) suite_engine_version = prev_config.get_value( ["env", suite_engine_key]) else: suite_engine_version =\ self.suite_engine_proc.get_version().decode() resloc = ResourceLocator.default() auto_items = [(suite_engine_key, suite_engine_version), ("ROSE_ORIG_HOST", self.host_selector.get_local_host()), ("ROSE_SITE", resloc.get_conf().get_value(['site'], '')), ("ROSE_VERSION", resloc.get_version())] for key, val in auto_items: requested_value = conf_tree.node.get_value(["env", key]) if requested_value: if key == "ROSE_VERSION" and val != requested_value: exc = VersionMismatchError(requested_value, val) raise ConfigValueError(["env", key], requested_value, exc) val = requested_value else: conf_tree.node.set(["env", key], val, state=conf_tree.node.STATE_NORMAL) extra_defines.append('[%s]%s="%s"' % (suite_section, key, val)) # Pass automatic Rose constants as suite defines self.conf_tree_loader.node_loader.load(extra_defines, conf_tree.node) # See if suite is running or not if opts.run_mode == "reload": # Check suite is running self.suite_engine_proc.get_suite_contact(suite_name) else: self.suite_engine_proc.check_suite_not_running(suite_name) # Install the suite to its run location suite_dir_rel = self._suite_dir_rel(suite_name) # Unfortunately a large try/finally block to ensure a temporary folder # created in validate only mode is cleaned up. Exceptions are not # caught here try: # Process Environment Variables environ = self.config_pm(conf_tree, "env") if opts.validate_suite_only_mode: temp_dir = mkdtemp() suite_dir = os.path.join(temp_dir, suite_dir_rel) os.makedirs(suite_dir, 0o0700) else: suite_dir = os.path.join(os.path.expanduser("~"), suite_dir_rel) suite_conf_dir = os.getcwd() locs_conf = ConfigNode() if opts.new_mode: if os.getcwd() == suite_dir: raise NewModeError("PWD", os.getcwd()) elif opts.run_mode in ["reload", "restart"]: raise NewModeError("--run", opts.run_mode) self.suite_run_cleaner.clean(suite_name) if os.getcwd() != suite_dir: if opts.run_mode == "run": self._run_init_dir(opts, suite_name, conf_tree, locs_conf=locs_conf) os.chdir(suite_dir) # Housekeep log files now_str = None if not opts.install_only_mode and not opts.local_install_only_mode: now_str = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ") self._run_init_dir_log(opts, now_str) self.fs_util.makedirs("log/suite") # Rose configuration and version logs self.fs_util.makedirs("log/rose-conf") run_mode = opts.run_mode if run_mode not in ["reload", "restart", "run"]: run_mode = "run" mode = run_mode if opts.validate_suite_only_mode: mode = "validate-suite-only" elif opts.install_only_mode: mode = "install-only" elif opts.local_install_only_mode: mode = "local-install-only" prefix = "rose-conf/%s-%s" % (strftime("%Y%m%dT%H%M%S"), mode) # Dump the actual configuration as rose-suite-run.conf ConfigDumper()(conf_tree.node, "log/" + prefix + ".conf") # Install version information file write_source_vc_info(suite_conf_dir, "log/" + prefix + ".version", self.popen) # If run through rose-stem, install version information # files for each source tree if they're a working copy if hasattr(opts, 'source') and hasattr(opts, 'project'): for i, url in enumerate(opts.source): if os.path.isdir(url): write_source_vc_info( url, "log/" + opts.project[i] + "-" + str(i) + ".version", self.popen) for ext in [".conf", ".version"]: self.fs_util.symlink(prefix + ext, "log/rose-suite-run" + ext) # Move temporary log to permanent log if hasattr(self.event_handler, "contexts"): log_file_path = os.path.abspath( os.path.join("log", "rose-suite-run.log")) log_file = open(log_file_path, "ab") temp_log_file = self.event_handler.contexts[uuid].handle temp_log_file.seek(0) log_file.write(temp_log_file.read()) self.event_handler.contexts[uuid].handle = log_file temp_log_file.close() # Process Files cwd = os.getcwd() for rel_path, conf_dir in conf_tree.files.items(): if (conf_dir == cwd or any( fnmatchcase(os.sep + rel_path, exclude) for exclude in self.SYNC_EXCLUDES) or conf_tree.node.get([templ_scheme + ":" + rel_path ]) is not None): continue # No sub-directories, very slow otherwise if os.sep in rel_path: rel_path = rel_path.split(os.sep, 1)[0] target_key = self.config_pm.get_handler( "file").PREFIX + rel_path target_node = conf_tree.node.get([target_key]) if target_node is None: conf_tree.node.set([target_key]) target_node = conf_tree.node.get([target_key]) elif target_node.is_ignored(): continue source_node = target_node.get("source") if source_node is None: target_node.set(["source"], os.path.join(conf_dir, rel_path)) elif source_node.is_ignored(): continue self.config_pm(conf_tree, "file", no_overwrite_mode=opts.no_overwrite_mode) # Process suite configuration template header # (e.g. Jinja2:suite.rc, EmPy:suite.rc) self.config_pm(conf_tree, templ_scheme, environ=environ) # Ask suite engine to parse suite configuration # and determine if it is up to date (unchanged) if opts.validate_suite_only_mode: suite_conf_unchanged = self.suite_engine_proc.cmp_suite_conf( suite_dir, None, opts.strict_mode, debug_mode=True) else: suite_conf_unchanged = self.suite_engine_proc.cmp_suite_conf( suite_name, opts.run_mode, opts.strict_mode, opts.debug_mode) finally: # Ensure the temporary directory created is cleaned up regardless # of success or failure if opts.validate_suite_only_mode and os.path.exists(temp_dir): shutil.rmtree(temp_dir) # Only validating so finish now if opts.validate_suite_only_mode: return # Install share/work directories (local) for name in ["share", "share/cycle", "work"]: self._run_init_dir_work(opts, suite_name, name, conf_tree, locs_conf=locs_conf) if opts.local_install_only_mode: return # Install suite files to each remote [user@]host for name in ["", "log/", "share/", "share/cycle/", "work/"]: uuid_file = os.path.abspath(name + uuid) open(uuid_file, "w").close() work_files.append(uuid_file) # Install items to user@host auths = self.suite_engine_proc.get_tasks_auths(suite_name) proc_queue = [] # [[proc, command, "ssh"|"rsync", auth], ...] for auth in sorted(auths): host = auth if "@" in auth: host = auth.split("@", 1)[1] # Remote shell command = self.popen.get_cmd("ssh", "-n", auth) # Provide ROSE_VERSION and CYLC_VERSION in the environment shcommand = "env ROSE_VERSION=%s %s=%s" % ( my_rose_version, suite_engine_key, suite_engine_version) # Use login shell? no_login_shell = self._run_conf("remote-no-login-shell", host=host, conf_tree=conf_tree) if not no_login_shell or no_login_shell.lower() != "true": shcommand += r""" bash -l -c '"$0" "$@"'""" # Path to "rose" command, if applicable rose_bin = self._run_conf("remote-rose-bin", host=host, conf_tree=conf_tree, default="rose") # Build remote "rose suite-run" command shcommand += " %s suite-run -vv -n %s" % (rose_bin, suite_name) for key in ["new", "debug", "install-only"]: attr = key.replace("-", "_") + "_mode" if getattr(opts, attr, None) is not None: shcommand += " --%s" % key if opts.log_keep: shcommand += " --log-keep=%s" % opts.log_keep if opts.log_name: shcommand += " --log-name=%s" % opts.log_name if not opts.log_archive_mode: shcommand += " --no-log-archive" shcommand += " --run=%s" % opts.run_mode # Build --remote= option shcommand += " --remote=uuid=%s" % uuid if now_str is not None: shcommand += ",now-str=%s" % now_str host_confs = [ "root-dir", "root-dir{share}", "root-dir{share/cycle}", "root-dir{work}" ] locs_conf.set([auth]) for key in host_confs: value = self._run_conf(key, host=host, conf_tree=conf_tree) if value is not None: val = self.popen.list_to_shell_str([str(value)]) shcommand += ",%s=%s" % (key, pipes.quote(val)) locs_conf.set([auth, key], value) command.append(shcommand) proc = self.popen.run_bg(*command) proc_queue.append([proc, command, "ssh", auth]) while proc_queue: sleep(self.SLEEP_PIPE) proc, command, command_name, auth = proc_queue.pop(0) if proc.poll() is None: # put it back in proc_queue proc_queue.append([proc, command, command_name, auth]) continue ret_code = proc.wait() out, err = proc.communicate() ret_code, out, err = [ i.decode() if isinstance(i, bytes) else i for i in [ret_code, out, err] ] if ret_code: raise RosePopenError(command, ret_code, out, err) if command_name == "rsync": self.handle_event(out, level=Event.VV) continue else: self.handle_event(out, level=Event.VV, prefix="[%s] " % auth) for line in out.split("\n"): if "/" + uuid == line.strip(): locs_conf.unset([auth]) break else: filters = {"excludes": [], "includes": []} for name in ["", "log/", "share/", "share/cycle/", "work/"]: filters["excludes"].append(name + uuid) target = auth + ":" + suite_dir_rel cmd = self._get_cmd_rsync(target, **filters) proc_queue.append( [self.popen.run_bg(*cmd), cmd, "rsync", auth]) # Install ends ConfigDumper()(locs_conf, os.path.join("log", "rose-suite-run.locs")) if opts.install_only_mode: return elif opts.run_mode == "reload" and suite_conf_unchanged: conf_name = self.suite_engine_proc.SUITE_CONF self.handle_event(SkipReloadEvent(suite_name, conf_name)) return # Start the suite self.fs_util.chdir("log") self.suite_engine_proc.run(suite_name, opts.host, opts.run_mode, args) # Disconnect log file handle, so monitoring tool command will no longer # be associated with the log file. self.event_handler.contexts[uuid].handle.close() self.event_handler.contexts.pop(uuid) return 0
def run(self, app_runner, conf_tree, opts, args, uuid, work_files): """ Run multiple instances of a command using sets of specified args""" # Counts for reporting purposes run_ok = 0 run_fail = 0 run_skip = 0 notrun = 0 # Allow naming of individual calls self.invocation_names = conf_tree.node.get_value([self.BUNCH_SECTION, "names"]) if self.invocation_names: self.invocation_names = shlex.split( metomi.rose.env.env_var_process(self.invocation_names)) if len(set(self.invocation_names)) != len(self.invocation_names): raise ConfigValueError([self.BUNCH_SECTION, "names"], self.invocation_names, "names must be unique") self.fail_mode = metomi.rose.env.env_var_process( conf_tree.node.get_value( [self.BUNCH_SECTION, "fail-mode"], self.TYPE_CONTINUE_ON_FAIL)) if self.fail_mode not in self.FAIL_MODE_TYPES: raise ConfigValueError([self.BUNCH_SECTION, "fail-mode"], self.fail_mode, "not a valid setting") self.incremental = conf_tree.node.get_value([self.BUNCH_SECTION, "incremental"], "true") if self.incremental: self.incremental = metomi.rose.env.env_var_process( self.incremental) self.isformatted = True self.command = metomi.rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "command-format"])) if not self.command: self.isformatted = False self.command = app_runner.get_command(conf_tree, opts, args) if not self.command: raise CommandNotDefinedError() # Set up command-instances if needed instances = conf_tree.node.get_value([self.BUNCH_SECTION, "command-instances"]) if instances: try: instances = range( int(metomi.rose.env.env_var_process(instances))) except ValueError: raise ConfigValueError([self.BUNCH_SECTION, "command-instances"], instances, "not an integer value") # Argument lists multi_args = conf_tree.node.get_value([self.ARGS_SECTION], {}) bunch_args_names = [] bunch_args_values = [] for key, val in multi_args.items(): bunch_args_names.append(key) bunch_args_values.append( shlex.split(metomi.rose.env.env_var_process(val.value))) # Update the argument values based on the argument-mode argument_mode = conf_tree.node.get_value([self.BUNCH_SECTION, "argument-mode"], self.DEFAULT_ARGUMENT_MODE) if argument_mode == self.DEFAULT_ARGUMENT_MODE: pass elif argument_mode in self.ACCEPTED_ARGUMENT_MODES: # The behaviour of of izip and izip_longest are special cases # because: # * izip was deprecated in Python3 use zip # * itertools.izip_longest was renamed and requires the fillvalue # kwarg if argument_mode in ['zip', 'izip']: _permutations = zip(*bunch_args_values) elif argument_mode in ['zip_longest', 'izip_longest']: _permutations = itertools.zip_longest(*bunch_args_values, fillvalue="") else: iteration_cmd = getattr(itertools, argument_mode) _permutations = iteration_cmd(*bunch_args_values) # Reconstruct the bunch_args_values _permutations = list(_permutations) for index, _ in enumerate(bunch_args_values): bunch_args_values[index] = [v[index] for v in _permutations] else: raise ConfigValueError([self.BUNCH_SECTION, "argument-mode"], argument_mode, "must be one of %s" % self.ACCEPTED_ARGUMENT_MODES) # Validate runlists if not self.invocation_names: if instances: arglength = len(instances) else: arglength = len(bunch_args_values[0]) self.invocation_names = list(range(0, arglength)) else: arglength = len(self.invocation_names) for item, vals in zip(bunch_args_names, bunch_args_values): if len(vals) != arglength: raise ConfigValueError([self.ARGS_SECTION, item], conf_tree.node.get_value( [self.ARGS_SECTION, item]), "inconsistent arg lengths") if conf_tree.node.get_value([self.ARGS_SECTION, "command-instances"]): raise ConfigValueError([self.ARGS_SECTION, "command-instances"], conf_tree.node.get_value( [self.ARGS_SECTION, "command-instances"]), "reserved keyword") if conf_tree.node.get_value([self.ARGS_SECTION, "COMMAND_INSTANCES"]): raise ConfigValueError([self.ARGS_SECTION, "COMMAND_INSTANCES"], conf_tree.node.get_value( [self.ARGS_SECTION, "COMMAND_INSTANCES"]), "reserved keyword") if instances and arglength != len(instances): raise ConfigValueError([self.BUNCH_SECTION, "command-instances"], instances, "inconsistent arg lengths") # Set max number of processes to run at once max_procs = conf_tree.node.get_value([self.BUNCH_SECTION, "pool-size"]) if max_procs: max_procs = int(metomi.rose.env.env_var_process(max_procs)) else: max_procs = arglength if self.incremental == "true": self.dao = RoseBunchDAO(conf_tree) else: self.dao = None commands = {} for vals in zip(range(arglength), self.invocation_names, *bunch_args_values): index, name, bunch_args_vals = vals[0], vals[1], vals[2:] argsdict = dict(zip(bunch_args_names, bunch_args_vals)) if instances: if self.isformatted: argsdict["command-instances"] = instances[index] else: argsdict["COMMAND_INSTANCES"] = str(instances[index]) commands[name] = RoseBunchCmd(name, self.command, argsdict, self.isformatted) procs = {} if 'ROSE_TASK_LOG_DIR' in os.environ: log_format = os.path.join(os.environ['ROSE_TASK_LOG_DIR'], "%s") else: log_format = os.path.join(os.getcwd(), "%s") failed = {} abort = False while procs or (commands and not abort): for key, proc in list(procs.items()): if proc.poll() is not None: procs.pop(key) if proc.returncode: failed[key] = proc.returncode run_fail += 1 app_runner.handle_event(RosePopenError(str(key), proc.returncode, None, None)) if self.dao: self.dao.update_command_state(key, self.dao.S_FAIL) if self.fail_mode == self.TYPE_ABORT_ON_FAIL: abort = True app_runner.handle_event(AbortEvent()) else: run_ok += 1 app_runner.handle_event(SucceededEvent(key), prefix=self.PREFIX_OK) if self.dao: self.dao.update_command_state(key, self.dao.S_PASS) while len(procs) < max_procs and commands and not abort: key = self.invocation_names[0] command = commands.pop(key) self.invocation_names.pop(0) cmd = command.get_command() cmd_stdout = log_format % command.get_out_file() cmd_stderr = log_format % command.get_err_file() prefix = command.get_log_prefix() bunch_environ = os.environ if not command.isformatted: bunch_environ.update(command.argsdict) bunch_environ['ROSE_BUNCH_LOG_PREFIX'] = prefix if self.dao: if self.dao.check_has_succeeded(key): run_skip += 1 app_runner.handle_event(PreviousSuccessEvent(key), prefix=self.PREFIX_PASS) continue else: self.dao.add_command(key) app_runner.handle_event(LaunchEvent(key, cmd)) procs[key] = app_runner.popen.run_bg( cmd, shell=True, stdout=open(cmd_stdout, 'w'), stderr=open(cmd_stderr, 'w'), env=bunch_environ) sleep(self.SLEEP_DURATION) if abort and commands: for key in self.invocation_names: notrun += 1 cmd = commands.pop(key).get_command() app_runner.handle_event(NotRunEvent(key, cmd), prefix=self.PREFIX_NOTRUN) if self.dao: self.dao.close() # Report summary data in job.out file app_runner.handle_event(SummaryEvent( run_ok, run_fail, run_skip, notrun)) if failed: return 1 else: return 0
def _run_target_update(cls, dao, app_runner, compress_manager, target): """Helper for _run. Update a target.""" if target.status == target.ST_OLD: app_runner.handle_event(RoseArchEvent(target)) return if target.status in (target.ST_BAD, target.ST_NULL): # boolean to int target.command_rc = int(target.status == target.ST_BAD) if target.status == target.ST_BAD: level = Event.FAIL else: level = Event.DEFAULT event = RoseArchEvent(target) app_runner.handle_event(event) app_runner.handle_event(event, kind=Event.KIND_ERR, level=level) return target.command_rc = 1 dao.insert(target) work_dir = mkdtemp() times = [time()] * 3 # init, transformed, archived ret_code = None try: # Rename/edit sources target.status = target.ST_BAD rename_required = False for source in target.sources.values(): if source.name != source.orig_name: rename_required = True break if rename_required or target.source_edit_format: for source in target.sources.values(): source.path = os.path.join(work_dir, source.name) app_runner.fs_util.makedirs(os.path.dirname(source.path)) if target.source_edit_format: command = target.source_edit_format % { "in": source.orig_path, "out": source.path } app_runner.popen.run_ok(command, shell=True) else: app_runner.fs_util.symlink(source.orig_path, source.path) # Compress sources if target.compress_scheme: handler = compress_manager.get_handler(target.compress_scheme) handler.compress_sources(target, work_dir) times[1] = time() # transformed time # Run archive command sources = [] if target.work_source_path: sources = [target.work_source_path] else: for source in target.sources.values(): sources.append(source.path) command = target.command_format % { "sources": app_runner.popen.list_to_shell_str(sources), "target": app_runner.popen.list_to_shell_str([target.name]) } ret_code, out, err = app_runner.popen.run(command, shell=True) if isinstance(out, bytes): out, err = out.decode(), err.decode() times[2] = time() # archived time if ret_code: app_runner.handle_event( RosePopenError([command], ret_code, out, err)) else: target.status = target.ST_NEW app_runner.handle_event(err, kind=Event.KIND_ERR) app_runner.handle_event(out) target.command_rc = ret_code dao.update_command_rc(target) finally: app_runner.fs_util.delete(work_dir) event = RoseArchEvent(target, times, ret_code) app_runner.handle_event(event) if target.status in (target.ST_BAD, target.ST_NULL): app_runner.handle_event(event, kind=Event.KIND_ERR, level=Event.FAIL)