def _distribute(host, is_restart): """Re-invoke this command on a different host if requested.""" # Check whether a run host is explicitly specified, else select one. if not host: host = select_suite_host()[0] if is_remote_host(host): # Prevent recursive host selection cmd = sys.argv[1:] cmd.append("--host=localhost") remote_cylc_cmd(cmd, host=host) sys.exit(0)
def _get_metrics(hosts, metrics, data=None): """Retrieve host metrics using SSH if necessary. Note hosts will not appear in the returned results if: * They are not contactable. * There is an error in the command which returns the results. Args: hosts (list): List of host fqdns. metrics (list): List in the form [(function, arg1, arg2, ...), ...] data (dict): Used for logging success/fail outcomes of the form {host: {}} Examples: Command failure: >>> _get_metrics(['localhost'], [['elephant']]) ({}, {'localhost': {'get_metrics': 'Command failed (exit: 1)'}}) Returns: dict - {host: {(function, arg1, arg2, ...): result}} """ host_stats = {} proc_map = {} if not data: data = {host: dict() for host in hosts} # Start up commands on hosts cmd = ['psutil'] kwargs = {'stdin_str': json.dumps(metrics), 'capture_process': True} for host in hosts: if is_remote_host(host): proc_map[host] = remote_cylc_cmd(cmd, host=host, **kwargs) else: proc_map[host] = run_cmd(['cylc'] + cmd, **kwargs) # Collect results from commands while proc_map: for host, proc in list(proc_map.copy().items()): if proc.poll() is None: continue del proc_map[host] out, err = (f.decode() for f in proc.communicate()) if proc.wait(): # Command failed in verbose/debug mode LOG.warning('Could not evaluate "%s" (return code %d)\n%s', host, proc.returncode, err) data[host]['get_metrics'] = ( f'Command failed (exit: {proc.returncode})') else: host_stats[host] = dict( zip( metrics, # convert JSON dicts -> namedtuples _deserialise(metrics, parse_dirty_json(out)))) sleep(0.01) return host_stats, data
def scheduler_cli(parser, options, args, is_restart=False): """CLI main.""" reg = args[0] # Check suite is not already running before start of host selection. try: suite_files.detect_old_contact_file(reg) except SuiteServiceFileError as exc: sys.exit(exc) suite_run_dir = get_suite_run_dir(reg) if not os.path.exists(suite_run_dir): sys.stderr.write(f'suite service directory not found ' f'at: {suite_run_dir}\n') sys.exit(1) # Create auth files if needed. suite_files.create_auth_files(reg) # Extract job.sh from library, for use in job scripts. extract_resources(suite_files.get_suite_srv_dir(reg), ['etc/job.sh']) # Check whether a run host is explicitly specified, else select one. if not options.host: try: host = HostAppointer().appoint_host() except EmptyHostList as exc: if cylc.flow.flags.debug: raise else: sys.exit(str(exc)) if is_remote_host(host): if is_restart: base_cmd = ["restart"] + sys.argv[1:] else: base_cmd = ["run"] + sys.argv[1:] # Prevent recursive host selection base_cmd.append("--host=localhost") return remote_cylc_cmd(base_cmd, host=host) if remrun(set_rel_local=True): # State localhost as above. sys.exit() try: suite_files.get_suite_source_dir(args[0], options.owner) except SuiteServiceFileError: # Source path is assumed to be the run directory suite_files.register(args[0], get_suite_run_dir(args[0])) try: scheduler = Scheduler(is_restart, options, args) except SuiteServiceFileError as exc: sys.exit(exc) scheduler.start()
def _get_host_metrics(self): """Run "cylc get-host-metrics" commands on hosts. Return (dict): {host: host-metrics-dict, ...} """ host_stats = {} # Run "cylc get-host-metrics" commands on hosts host_proc_map = {} cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts()) # Start up commands on hosts for host in self.hosts: if is_remote_host(host): host_proc_map[host] = remote_cylc_cmd(cmd, stdin=None, host=host, capture_process=True) elif 'localhost' in host_proc_map: continue # Don't duplicate localhost else: # 1st instance of localhost host_proc_map['localhost'] = run_cmd(['cylc'] + cmd, capture_process=True) # Collect results from commands while host_proc_map: for host, proc in list(host_proc_map.copy().items()): if proc.poll() is None: continue del host_proc_map[host] out, err = (f.decode() for f in proc.communicate()) if proc.wait(): # Command failed in verbose/debug mode LOG.warning( "can't get host metric from '%s'" + "%s # returncode=%d, err=%s\n", host, ' '.join( (quote(item) for item in cmd)), proc.returncode, err) else: # Command OK # Users may have profile scripts that write to STDOUT. # Drop all output lines until the the first character of a # line is '{'. Hopefully this is enough to find us the # first line that denotes the beginning of the expected # JSON data structure. out = ''.join( dropwhile(lambda s: not s.startswith('{'), out.splitlines(True))) host_stats[host] = json.loads(out) sleep(0.01) return host_stats
def _get_host_metrics(self): """Run "cylc get-host-metrics" commands on hosts. Return (dict): {host: host-metrics-dict, ...} """ host_stats = {} # Run "cylc get-host-metrics" commands on hosts host_proc_map = {} cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts()) # Start up commands on hosts for host in self.hosts: if is_remote_host(host): host_proc_map[host] = remote_cylc_cmd( cmd, stdin=None, host=host, capture_process=True) elif 'localhost' in host_proc_map: continue # Don't duplicate localhost else: # 1st instance of localhost host_proc_map['localhost'] = run_cmd( ['cylc'] + cmd, capture_process=True) # Collect results from commands while host_proc_map: for host, proc in list(host_proc_map.copy().items()): if proc.poll() is None: continue del host_proc_map[host] out, err = (f.decode() for f in proc.communicate()) if proc.wait(): # Command failed in verbose/debug mode LOG.warning( "can't get host metric from '%s'" + "%s # returncode=%d, err=%s\n", host, ' '.join((quote(item) for item in cmd)), proc.returncode, err) else: # Command OK # Users may have profile scripts that write to STDOUT. # Drop all output lines until the the first character of a # line is '{'. Hopefully this is enough to find us the # first line that denotes the beginning of the expected # JSON data structure. out = ''.join(dropwhile( lambda s: not s.startswith('{'), out.splitlines(True))) host_stats[host] = json.loads(out) sleep(0.01) return host_stats
def scheduler_cli(parser, options, args, is_restart=False): """CLI main.""" # Check suite is not already running before start of host selection. try: SuiteSrvFilesManager().detect_old_contact_file(args[0]) except SuiteServiceFileError as exc: sys.exit(exc) # Create auth files if needed. SuiteSrvFilesManager().create_auth_files(args[0]) # Check whether a run host is explicitly specified, else select one. if not options.host: try: host = HostAppointer().appoint_host() except EmptyHostList as exc: if cylc.flow.flags.debug: raise else: sys.exit(str(exc)) if is_remote_host(host): if is_restart: base_cmd = ["restart"] + sys.argv[1:] else: base_cmd = ["run"] + sys.argv[1:] # Prevent recursive host selection base_cmd.append("--host=localhost") return remote_cylc_cmd(base_cmd, host=host) if remrun(set_rel_local=True): # State localhost as above. sys.exit() try: SuiteSrvFilesManager().get_suite_source_dir(args[0], options.owner) except SuiteServiceFileError: # Source path is assumed to be the run directory SuiteSrvFilesManager().register(args[0], get_suite_run_dir(args[0])) try: scheduler = Scheduler(is_restart, options, args) except SuiteServiceFileError as exc: sys.exit(exc) scheduler.start()
def main(parser, options, *args, color=False): """Implement cylc cat-log CLI. Determine log path, user@host, batchview_cmd, and action (print, dir-list, cat, edit, or tail), and then if the log path is: a) local: perform action on log path, or b) remote: re-invoke cylc cat-log as a) on the remote account """ if options.remote_args: # Invoked on job hosts for job logs only, as a wrapper to view_log(). # Tail and batchview commands come from global config on suite host). logpath, mode, tail_tmpl = options.remote_args[0:3] if logpath.startswith('$'): logpath = os.path.expandvars(logpath) elif logpath.startswith('~'): logpath = os.path.expanduser(logpath) try: batchview_cmd = options.remote_args[3] except IndexError: batchview_cmd = None res = view_log(logpath, mode, tail_tmpl, batchview_cmd, remote=True, color=color) if res == 1: sys.exit(res) return suite_name = args[0] # Get long-format mode. try: mode = MODES[options.mode] except KeyError: mode = options.mode if len(args) == 1: # Cat suite logs, local only. if options.filename is not None: raise UserInputError("The '-f' option is for job logs only.") logpath = get_suite_run_log_name(suite_name) if options.rotation_num: logs = glob('%s.*' % logpath) logs.sort(key=os.path.getmtime, reverse=True) try: logpath = logs[int(options.rotation_num)] except IndexError: raise UserInputError("max rotation %d" % (len(logs) - 1)) tail_tmpl = str(glbl_cfg().get_host_item("tail command template")) out = view_log(logpath, mode, tail_tmpl, color=color) if out == 1: sys.exit(1) if mode == 'edit': tmpfile_edit(out, options.geditor) return if len(args) == 2: # Cat task job logs, may be on suite or job host. if options.rotation_num is not None: raise UserInputError("only suite (not job) logs get rotated") task_id = args[1] try: task, point = TaskID.split(task_id) except ValueError: parser.error("Illegal task ID: %s" % task_id) if options.submit_num != NN: try: options.submit_num = "%02d" % int(options.submit_num) except ValueError: parser.error("Illegal submit number: %s" % options.submit_num) if options.filename is None: options.filename = JOB_LOG_OUT else: # Convert short filename args to long (e.g. 'o' to 'job.out'). try: options.filename = JOB_LOG_OPTS[options.filename] except KeyError: # Is already long form (standard log, or custom). pass user_at_host, batch_sys_name, live_job_id = get_task_job_attrs( suite_name, point, task, options.submit_num) user, host = split_user_at_host(user_at_host) batchview_cmd = None if live_job_id is not None: # Job is currently running. Get special batch system log view # command (e.g. qcat) if one exists, and the log is out or err. conf_key = None if options.filename == JOB_LOG_OUT: if mode == 'cat': conf_key = "out viewer" elif mode == 'tail': conf_key = "out tailer" elif options.filename == JOB_LOG_ERR: if mode == 'cat': conf_key = "err viewer" elif mode == 'tail': conf_key = "err tailer" if conf_key is not None: conf = glbl_cfg().get_host_item("batch systems", host, user) batchview_cmd_tmpl = None try: batchview_cmd_tmpl = conf[batch_sys_name][conf_key] except KeyError: pass if batchview_cmd_tmpl is not None: batchview_cmd = batchview_cmd_tmpl % { "job_id": str(live_job_id) } log_is_remote = (is_remote(host, user) and (options.filename not in JOB_LOGS_LOCAL)) log_is_retrieved = (glbl_cfg().get_host_item('retrieve job logs', host) and live_job_id is None) if log_is_remote and (not log_is_retrieved or options.force_remote): logpath = os.path.normpath( get_remote_suite_run_job_dir(host, user, suite_name, point, task, options.submit_num, options.filename)) tail_tmpl = str(glbl_cfg().get_host_item("tail command template", host, user)) # Reinvoke the cat-log command on the remote account. cmd = ['cat-log'] if cylc.flow.flags.debug: cmd.append('--debug') for item in [logpath, mode, tail_tmpl]: cmd.append('--remote-arg=%s' % quote(item)) if batchview_cmd: cmd.append('--remote-arg=%s' % quote(batchview_cmd)) cmd.append(suite_name) is_edit_mode = (mode == 'edit') try: proc = remote_cylc_cmd(cmd, user, host, capture_process=is_edit_mode, manage=(mode == 'tail')) except KeyboardInterrupt: # Ctrl-C while tailing. pass else: if is_edit_mode: # Write remote stdout to a temp file for viewing in editor. # Only BUFSIZE bytes at a time in case huge stdout volume. out = NamedTemporaryFile() data = proc.stdout.read(BUFSIZE) while data: out.write(data) data = proc.stdout.read(BUFSIZE) os.chmod(out.name, S_IRUSR) out.seek(0, 0) else: # Local task job or local job log. logpath = os.path.normpath( get_suite_run_job_dir(suite_name, point, task, options.submit_num, options.filename)) tail_tmpl = str(glbl_cfg().get_host_item("tail command template")) out = view_log(logpath, mode, tail_tmpl, batchview_cmd, color=color) if mode != 'edit': sys.exit(out) if mode == 'edit': tmpfile_edit(out, options.geditor)
def main(parser: COP, options: 'Values', reg: str, task_id: Optional[str] = None, color: bool = False) -> None: """Implement cylc cat-log CLI. Determine log path, user@host, batchview_cmd, and action (print, dir-list, cat, edit, or tail), and then if the log path is: a) local: perform action on log path, or b) remote: re-invoke cylc cat-log as a) on the remote account """ if options.remote_args: # Invoked on job hosts for job logs only, as a wrapper to view_log(). # Tail and batchview commands from global config on workflow host). logpath, mode, tail_tmpl = options.remote_args[0:3] logpath = expand_path(logpath) tail_tmpl = expand_path(tail_tmpl) try: batchview_cmd = options.remote_args[3] except IndexError: batchview_cmd = None res = view_log(logpath, mode, tail_tmpl, batchview_cmd, remote=True, color=color) if res == 1: sys.exit(res) return workflow_name, _ = parse_reg(reg) # Get long-format mode. try: mode = MODES[options.mode] except KeyError: mode = options.mode if not task_id: # Cat workflow logs, local only. if options.filename is not None: raise UserInputError("The '-f' option is for job logs only.") logpath = get_workflow_run_log_name(workflow_name) if options.rotation_num: logs = glob('%s.*' % logpath) logs.sort(key=os.path.getmtime, reverse=True) try: logpath = logs[int(options.rotation_num)] except IndexError: raise UserInputError("max rotation %d" % (len(logs) - 1)) tail_tmpl = os.path.expandvars(get_platform()["tail command template"]) out = view_log(logpath, mode, tail_tmpl, color=color) if out == 1: sys.exit(1) if mode == 'edit': tmpfile_edit(out, options.geditor) return if task_id: # Cat task job logs, may be on workflow or job host. if options.rotation_num is not None: raise UserInputError("only workflow (not job) logs get rotated") try: task, point = TaskID.split(task_id) except ValueError: parser.error("Illegal task ID: %s" % task_id) if options.submit_num != NN: try: options.submit_num = "%02d" % int(options.submit_num) except ValueError: parser.error("Illegal submit number: %s" % options.submit_num) if options.filename is None: options.filename = JOB_LOG_OUT else: # Convert short filename args to long (e.g. 'o' to 'job.out'). with suppress(KeyError): options.filename = JOB_LOG_OPTS[options.filename] # KeyError: Is already long form (standard log, or custom). platform_name, job_runner_name, live_job_id = get_task_job_attrs( workflow_name, point, task, options.submit_num) platform = get_platform(platform_name) batchview_cmd = None if live_job_id is not None: # Job is currently running. Get special job runner log view # command (e.g. qcat) if one exists, and the log is out or err. conf_key = None if options.filename == JOB_LOG_OUT: if mode == 'cat': conf_key = "out viewer" elif mode == 'tail': conf_key = "out tailer" elif options.filename == JOB_LOG_ERR: if mode == 'cat': conf_key = "err viewer" elif mode == 'tail': conf_key = "err tailer" if conf_key is not None: batchview_cmd_tmpl = None with suppress(KeyError): batchview_cmd_tmpl = platform[conf_key] if batchview_cmd_tmpl is not None: batchview_cmd = batchview_cmd_tmpl % { "job_id": str(live_job_id) } log_is_remote = (is_remote_platform(platform) and (options.filename != JOB_LOG_ACTIVITY)) log_is_retrieved = (platform['retrieve job logs'] and live_job_id is None) if log_is_remote and (not log_is_retrieved or options.force_remote): logpath = os.path.normpath( get_remote_workflow_run_job_dir(workflow_name, point, task, options.submit_num, options.filename)) tail_tmpl = platform["tail command template"] # Reinvoke the cat-log command on the remote account. cmd = ['cat-log', *verbosity_to_opts(cylc.flow.flags.verbosity)] for item in [logpath, mode, tail_tmpl]: cmd.append('--remote-arg=%s' % shlex.quote(item)) if batchview_cmd: cmd.append('--remote-arg=%s' % shlex.quote(batchview_cmd)) cmd.append(workflow_name) is_edit_mode = (mode == 'edit') # TODO: Add Intelligent Host selection to this try: proc = remote_cylc_cmd(cmd, platform, capture_process=is_edit_mode, manage=(mode == 'tail')) except KeyboardInterrupt: # Ctrl-C while tailing. pass else: if is_edit_mode: # Write remote stdout to a temp file for viewing in editor. # Only BUFSIZE bytes at a time in case huge stdout volume. out = NamedTemporaryFile() data = proc.stdout.read(BUFSIZE) while data: out.write(data) data = proc.stdout.read(BUFSIZE) os.chmod(out.name, S_IRUSR) out.seek(0, 0) else: # Local task job or local job log. logpath = os.path.normpath( get_workflow_run_job_dir(workflow_name, point, task, options.submit_num, options.filename)) tail_tmpl = os.path.expandvars(platform["tail command template"]) out = view_log(logpath, mode, tail_tmpl, batchview_cmd, color=color) if mode != 'edit': sys.exit(out) if mode == 'edit': tmpfile_edit(out, options.geditor)