def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys): """Process retrieval of task job logs from remote user@host.""" if ctx.user_at_host and "@" in ctx.user_at_host: s_user, s_host = ctx.user_at_host.split("@", 1) else: s_user, s_host = (None, ctx.user_at_host) ssh_str = str(glbl_cfg().get_host_item("ssh command", s_host, s_user)) rsync_str = str(glbl_cfg().get_host_item( "retrieve job logs command", s_host, s_user)) cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str] if LOG.isEnabledFor(DEBUG): cmd.append("-v") if ctx.max_size: cmd.append("--max-size=%s" % (ctx.max_size,)) # Includes and excludes includes = set() for _, point, name, submit_num in id_keys: # Include relevant directories, all levels needed includes.add("/%s" % (point)) includes.add("/%s/%s" % (point, name)) includes.add("/%s/%s/%02d" % (point, name, submit_num)) includes.add("/%s/%s/%02d/**" % (point, name, submit_num)) cmd += ["--include=%s" % (include) for include in sorted(includes)] cmd.append("--exclude=/**") # exclude everything else # Remote source cmd.append(ctx.user_at_host + ":" + glbl_cfg().get_derived_host_item( schd_ctx.suite, "suite job log directory", s_host, s_user) + "/") # Local target cmd.append(glbl_cfg().get_derived_host_item( schd_ctx.suite, "suite job log directory") + "/") self.proc_pool.put_command( SubProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys), self._job_logs_retrieval_callback, [schd_ctx])
def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys): """Process retrieval of task job logs from remote user@host.""" if ctx.user_at_host and "@" in ctx.user_at_host: s_user, s_host = ctx.user_at_host.split("@", 1) else: s_user, s_host = (None, ctx.user_at_host) ssh_str = str(glbl_cfg().get_host_item("ssh command", s_host, s_user)) rsync_str = str(glbl_cfg().get_host_item("retrieve job logs command", s_host, s_user)) cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str] if cylc.flags.debug: cmd.append("-v") if ctx.max_size: cmd.append("--max-size=%s" % (ctx.max_size, )) # Includes and excludes includes = set() for _, point, name, submit_num in id_keys: # Include relevant directories, all levels needed includes.add("/%s" % (point)) includes.add("/%s/%s" % (point, name)) includes.add("/%s/%s/%02d" % (point, name, submit_num)) includes.add("/%s/%s/%02d/**" % (point, name, submit_num)) cmd += ["--include=%s" % (include) for include in sorted(includes)] cmd.append("--exclude=/**") # exclude everything else # Remote source cmd.append(ctx.user_at_host + ":" + glbl_cfg().get_derived_host_item( schd_ctx.suite, "suite job log directory", s_host, s_user) + "/") # Local target cmd.append(glbl_cfg().get_derived_host_item( schd_ctx.suite, "suite job log directory") + "/") self.proc_pool.put_command( SuiteProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys), self._job_logs_retrieval_callback, [schd_ctx])
def get_scan_items_from_fs(owner_pattern=None, reg_pattern=None): """Scrape list of suites from the filesystem. Walk users' "~/cylc-run/" to get (host, port) from ".service/contact" for active suites. Yields: tuple - (reg, host, port) """ srv_files_mgr = SuiteSrvFilesManager() if owner_pattern is None: # Run directory of current user only run_dirs = [(glbl_cfg().get_host_item('run directory'), None)] else: # Run directory of all users matching "owner_pattern". # But skip those with /nologin or /false shells run_dirs = [] skips = ('/false', '/nologin') for pwent in getpwall(): if any(pwent.pw_shell.endswith(s) for s in (skips)): continue if owner_pattern.match(pwent.pw_name): run_dirs.append(( glbl_cfg().get_host_item( 'run directory', owner=pwent.pw_name, owner_home=pwent.pw_dir), pwent.pw_name)) if cylc.flags.debug: sys.stderr.write('Listing suites:%s%s\n' % ( DEBUG_DELIM, DEBUG_DELIM.join(item[1] for item in run_dirs if item[1] is not None))) for run_d, owner in run_dirs: for dirpath, dnames, _ in os.walk(run_d, followlinks=True): # Always descend for top directory, but # don't descend further if it has a .service/ or log/ dir if dirpath != run_d and ( srv_files_mgr.DIR_BASE_SRV in dnames or 'log' in dnames): dnames[:] = [] # Filter suites by name reg = os.path.relpath(dirpath, run_d) if reg_pattern and not reg_pattern.match(reg): continue # Choose only suites with .service and matching filter try: contact_data = srv_files_mgr.load_contact_file(reg, owner) except (SuiteServiceFileError, IOError, TypeError, ValueError): continue else: yield ( reg, contact_data[srv_files_mgr.KEY_HOST], contact_data[srv_files_mgr.KEY_PORT] )
def get_scan_items_from_fs(owner_pattern=None, updater=None): """Get list of host:port available to scan using the file system. Walk users' "~/cylc-run/" to get (host, port) from ".service/contact" for active suites. Return (list): List of (host, port) available for scan. """ srv_files_mgr = SuiteSrvFilesManager() if owner_pattern is None: # Run directory of current user only run_dirs = [(glbl_cfg().get_host_item('run directory'), None)] else: # Run directory of all users matching "owner_pattern". # But skip those with /nologin or /false shells run_dirs = [] skips = ('/false', '/nologin') for pwent in getpwall(): if any(pwent.pw_shell.endswith(s) for s in (skips)): continue if owner_pattern.match(pwent.pw_name): run_dirs.append( (glbl_cfg().get_host_item('run directory', owner=pwent.pw_name, owner_home=pwent.pw_dir), pwent.pw_name)) if cylc.flags.debug: sys.stderr.write( 'Listing suites:%s%s\n' % (DEBUG_DELIM, DEBUG_DELIM.join(item[1] for item in run_dirs if item[1] is not None))) items = [] for run_d, owner in run_dirs: for dirpath, dnames, fnames in os.walk(run_d, followlinks=True): if updater and updater.quit: return # Always descend for top directory, but # don't descend further if it has a: # * .service/ or log/ # * cylc-suite.db: (pre-cylc-7 suites don't have ".service/"). if dirpath != run_d and (srv_files_mgr.DIR_BASE_SRV in dnames or 'log' in dnames or 'cylc-suite.db' in fnames): dnames[:] = [] # Choose only suites with .service and matching filter reg = os.path.relpath(dirpath, run_d) try: contact_data = srv_files_mgr.load_contact_file(reg, owner) except (SuiteServiceFileError, IOError, TypeError, ValueError): continue else: items.append((contact_data[srv_files_mgr.KEY_HOST], contact_data[srv_files_mgr.KEY_PORT])) return items
def get_scan_items_from_fs(owner_pattern=None, updater=None): """Get list of host:port available to scan using the file system. Walk users' "~/cylc-run/" to get (host, port) from ".service/contact" for active suites. Return (list): List of (host, port) available for scan. """ srv_files_mgr = SuiteSrvFilesManager() if owner_pattern is None: # Run directory of current user only run_dirs = [(glbl_cfg().get_host_item('run directory'), None)] else: # Run directory of all users matching "owner_pattern". # But skip those with /nologin or /false shells run_dirs = [] skips = ('/false', '/nologin') for pwent in getpwall(): if any(pwent.pw_shell.endswith(s) for s in (skips)): continue if owner_pattern.match(pwent.pw_name): run_dirs.append(( glbl_cfg().get_host_item( 'run directory', owner=pwent.pw_name, owner_home=pwent.pw_dir), pwent.pw_name)) if cylc.flags.debug: sys.stderr.write('Listing suites:%s%s\n' % ( DEBUG_DELIM, DEBUG_DELIM.join(item[1] for item in run_dirs if item[1] is not None))) items = [] for run_d, owner in run_dirs: for dirpath, dnames, fnames in os.walk(run_d, followlinks=True): if updater and updater.quit: return # Always descend for top directory, but # don't descend further if it has a: # * .service/ or log/ # * cylc-suite.db: (pre-cylc-7 suites don't have ".service/"). if dirpath != run_d and ( srv_files_mgr.DIR_BASE_SRV in dnames or 'log' in dnames or 'cylc-suite.db' in fnames): dnames[:] = [] # Choose only suites with .service and matching filter reg = os.path.relpath(dirpath, run_d) try: contact_data = srv_files_mgr.load_contact_file(reg, owner) except (SuiteServiceFileError, IOError, TypeError, ValueError): continue else: items.append(( contact_data[srv_files_mgr.KEY_HOST], contact_data[srv_files_mgr.KEY_PORT])) return items
def get_scan_items_from_fs(owner_pattern=None, reg_pattern=None): """Scrape list of suites from the filesystem. Walk users' "~/cylc-run/" to get (host, port) from ".service/contact" for active suites. Yields: tuple - (reg, host, port) """ srv_files_mgr = SuiteSrvFilesManager() if owner_pattern is None: # Run directory of current user only run_dirs = [(glbl_cfg().get_host_item('run directory'), None)] else: # Run directory of all users matching "owner_pattern". # But skip those with /nologin or /false shells run_dirs = [] skips = ('/false', '/nologin') for pwent in getpwall(): if any(pwent.pw_shell.endswith(s) for s in (skips)): continue if owner_pattern.match(pwent.pw_name): run_dirs.append( (glbl_cfg().get_host_item('run directory', owner=pwent.pw_name, owner_home=pwent.pw_dir), pwent.pw_name)) if cylc.flags.debug: sys.stderr.write( 'Listing suites:%s%s\n' % (DEBUG_DELIM, DEBUG_DELIM.join(item[1] for item in run_dirs if item[1] is not None))) for run_d, owner in run_dirs: for dirpath, dnames, _ in os.walk(run_d, followlinks=True): # Always descend for top directory, but # don't descend further if it has a .service/ or log/ dir if dirpath != run_d and (srv_files_mgr.DIR_BASE_SRV in dnames or 'log' in dnames): dnames[:] = [] # Filter suites by name reg = os.path.relpath(dirpath, run_d) if reg_pattern and not reg_pattern.match(reg): continue # Choose only suites with .service and matching filter try: contact_data = srv_files_mgr.load_contact_file(reg, owner) except (SuiteServiceFileError, IOError, TypeError, ValueError): continue else: yield (reg, contact_data[srv_files_mgr.KEY_HOST], contact_data[srv_files_mgr.KEY_PORT])
def __init__(self, size=None): if not size: size = glbl_cfg().get(['process pool size'], size) self.size = size self.proc_pool_timeout = glbl_cfg().get(['process pool timeout']) self.closed = False # Close queue self.stopping = False # No more job submit if True # .stopping may be set by an API command in a different thread self.stopping_lock = RLock() self.queuings = deque() self.runnings = []
def __init__(self): self.size = glbl_cfg().get(['process pool size']) self.proc_pool_timeout = glbl_cfg().get(['process pool timeout']) self.closed = False # Close queue self.stopping = False # No more job submit if True # .stopping may be set by an API command in a different thread self.stopping_lock = RLock() self.queuings = deque() self.runnings = [] try: self.pipepoller = select.poll() except AttributeError: # select.poll not implemented for this OS self.pipepoller = None
def __init__(self, suite): # Suite only needed for back-compat with old clients (see below): self.suite = suite self.engine = None self.port = None # Figure out the ports we are allowed to use. base_port = glbl_cfg().get(['communication', 'base port']) max_ports = glbl_cfg().get( ['communication', 'maximum number of ports']) self.ok_ports = range(int(base_port), int(base_port) + int(max_ports)) random.shuffle(self.ok_ports) comms_options = glbl_cfg().get(['communication', 'options']) # HTTP Digest Auth uses MD5 - pretty secure in this use case. # Extending it with extra algorithms is allowed, but won't be # supported by most browsers. requests and urllib2 are OK though. self.hash_algorithm = "MD5" if "SHA1" in comms_options: # Note 'SHA' rather than 'SHA1'. self.hash_algorithm = "SHA" self.srv_files_mgr = SuiteSrvFilesManager() self.comms_method = glbl_cfg().get(['communication', 'method']) self.get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain( { 'cylc': self.srv_files_mgr.get_auth_item( self.srv_files_mgr.FILE_BASE_PASSPHRASE, suite, content=True), 'anon': NO_PASSPHRASE }, algorithm=self.hash_algorithm) if self.comms_method == 'http': self.cert = None self.pkey = None else: # if self.comms_method in [None, 'https']: try: self.cert = self.srv_files_mgr.get_auth_item( self.srv_files_mgr.FILE_BASE_SSL_CERT, suite) self.pkey = self.srv_files_mgr.get_auth_item( self.srv_files_mgr.FILE_BASE_SSL_PEM, suite) except SuiteServiceFileError: ERR.error("no HTTPS/OpenSSL support. Aborting...") raise CylcError("No HTTPS support. " "Configure user's global.rc to use HTTP.") self.start()
def prompt(question, force=False, gui=False, no_force=False, no_abort=False, keep_above=True): """Interactive Yes/No prompt for cylc CLI scripts. For convenience, on No we just exit rather than return. If force is True don't prompt, just return immediately. """ if (force or glbl_cfg().get(['disable interactive command prompts'])) and ( not no_force): return True if gui: import gtk dialog = gtk.MessageDialog( None, gtk.DIALOG_DESTROY_WITH_PARENT, gtk.MESSAGE_QUESTION, gtk.BUTTONS_YES_NO, question ) dialog.set_keep_above(keep_above) gui_response = dialog.run() response_no = (gui_response != gtk.RESPONSE_YES) else: cli_response = raw_input('%s (y/n)? ' % question) response_no = (cli_response not in ['y', 'Y']) if response_no: if no_abort: return False else: sys.exit(0) else: return True
def _append_job_status_file(suite, task_job, event_time, messages): """Write messages to job status file.""" job_log_name = os.getenv('CYLC_TASK_LOG_ROOT') if not job_log_name: job_log_name = os.path.join( glbl_cfg().get_derived_host_item(suite, 'suite job log directory'), 'job') try: job_status_file = open(job_log_name + '.status', 'a') except IOError: if cylc.flags.debug: import traceback traceback.print_exc() return for severity, message in messages: if message == TASK_OUTPUT_STARTED: job_id = os.getppid() if job_id > 1: # If os.getppid() returns 1, the original job process # is likely killed already job_status_file.write('%s=%s\n' % (CYLC_JOB_PID, job_id)) job_status_file.write('%s=%s\n' % (CYLC_JOB_INIT_TIME, event_time)) elif message == TASK_OUTPUT_SUCCEEDED: job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, TASK_OUTPUT_SUCCEEDED.upper())) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(FAIL_MESSAGE_PREFIX): job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, message[len(FAIL_MESSAGE_PREFIX):])) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(ABORT_MESSAGE_PREFIX): job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, message[len(ABORT_MESSAGE_PREFIX):])) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(VACATION_MESSAGE_PREFIX): # Job vacated, remove entries related to current job job_status_file_name = job_status_file.name job_status_file.close() lines = [] for line in open(job_status_file_name): if not line.startswith('CYLC_JOB_'): lines.append(line) job_status_file = open(job_status_file_name, 'w') for line in lines: job_status_file.write(line) job_status_file.write( '%s=%s|%s|%s\n' % (CYLC_MESSAGE, event_time, severity, message)) else: job_status_file.write( '%s=%s|%s|%s\n' % (CYLC_MESSAGE, event_time, severity, message)) try: job_status_file.close() except IOError: if cylc.flags.debug: import traceback traceback.print_exc()
def list_suites(self, regfilter=None): """Return a filtered list of valid suite registrations.""" rec_regfilter = None if regfilter: try: rec_regfilter = re.compile(regfilter) except re.error as exc: raise ValueError("%s: %s" % (regfilter, exc)) run_d = glbl_cfg().get_host_item('run directory') results = [] for dirpath, dnames, fnames in os.walk(run_d, followlinks=True): # Always descend for top directory, but # don't descend further if it has a: # * .service/ # * cylc-suite.db: (pre-cylc-7 suites don't have ".service/"). if dirpath != run_d and (self.DIR_BASE_SRV in dnames or "cylc-suite.db" in fnames): dnames[:] = [] # Choose only suites with .service and matching filter reg = os.path.relpath(dirpath, run_d) path = os.path.join(dirpath, self.DIR_BASE_SRV) if (not self._locate_item(self.FILE_BASE_SOURCE, path) or rec_regfilter and not rec_regfilter.search(reg)): continue try: results.append([ reg, self.get_suite_source_dir(reg), self.get_suite_title(reg) ]) except (IOError, SuiteServiceFileError) as exc: LOG.error('%s: %s', reg, exc) return results
def list_suites(self, regfilter=None): """Return a filtered list of valid suite registrations.""" rec_regfilter = None if regfilter: try: rec_regfilter = re.compile(regfilter) except re.error as exc: raise ValueError("%s: %s" % (regfilter, exc)) run_d = glbl_cfg().get_host_item('run directory') results = [] for dirpath, dnames, _ in os.walk(run_d, followlinks=True): # Always descend for top directory, but # don't descend further if it has a .service/ dir if dirpath != run_d and self.DIR_BASE_SRV in dnames: dnames[:] = [] # Choose only suites with .service and matching filter reg = os.path.relpath(dirpath, run_d) path = os.path.join(dirpath, self.DIR_BASE_SRV) if (not self._locate_item(self.FILE_BASE_SOURCE, path) or rec_regfilter and not rec_regfilter.search(reg)): continue try: results.append([ reg, self.get_suite_source_dir(reg), self.get_suite_title(reg)]) except (IOError, SuiteServiceFileError) as exc: LOG.error('%s: %s', reg, exc) return results
def _run_job_cmd(self, cmd_key, suite, itasks, callback): """Run job commands, e.g. poll, kill, etc. Group itasks with their user@host. Put a job command for each user@host to the multiprocess pool. """ if not itasks: return auth_itasks = {} for itask in itasks: if (itask.task_host, itask.task_owner) not in auth_itasks: auth_itasks[(itask.task_host, itask.task_owner)] = [] auth_itasks[(itask.task_host, itask.task_owner)].append(itask) for (host, owner), itasks in sorted(auth_itasks.items()): cmd = ["cylc", cmd_key] if LOG.isEnabledFor(DEBUG): cmd.append("--debug") if is_remote_host(host): cmd.append("--host=%s" % (host)) if is_remote_user(owner): cmd.append("--user=%s" % (owner)) cmd.append("--") cmd.append(glbl_cfg().get_derived_host_item( suite, "suite job log directory", host, owner)) job_log_dirs = [] for itask in sorted(itasks, key=lambda itask: itask.identity): job_log_dirs.append(get_task_job_id( itask.point, itask.tdef.name, itask.submit_num)) cmd += job_log_dirs self.proc_pool.put_command( SubProcContext(cmd_key, cmd), callback, [suite, itasks])
def __init__(self, cached=False): # get the global config, if cached = False a new config instance will # be returned with the up-to-date configuration. global_config = glbl_cfg(cached=cached) # list the condemned hosts, hosts may be suffixed with `!` condemned_hosts = [ get_fqdn_by_host(host.split('!')[0]) for host in global_config.get(['suite servers', 'condemned hosts']) ] # list configured run hosts eliminating any which cannot be contacted # or which are condemned self.hosts = [] for host in (global_config.get(['suite servers', 'run hosts']) or ['localhost']): try: if get_fqdn_by_host(host) not in condemned_hosts: self.hosts.append(host) except socket.gaierror: pass # determine the server ranking and acceptance thresholds if configured self.rank_method = global_config.get( ['suite servers', 'run host select', 'rank']) self.parsed_thresholds = self.parse_thresholds( global_config.get( ['suite servers', 'run host select', 'thresholds']))
def prompt(question, force=False, gui=False, no_force=False, no_abort=False, keep_above=True): """Interactive Yes/No prompt for cylc CLI scripts. For convenience, on No we just exit rather than return. If force is True don't prompt, just return immediately. """ if (force or glbl_cfg().get(['disable interactive command prompts' ])) and (not no_force): return True if gui: raise NotImplementedError else: cli_response = input('%s (y/n)? ' % question) response_no = (cli_response not in ['y', 'Y']) if response_no: if no_abort: return False else: sys.exit(0) else: return True
def _append_job_status_file(suite, task_job, event_time, messages): """Write messages to job status file.""" job_log_name = os.getenv('CYLC_TASK_LOG_ROOT') if not job_log_name: job_log_name = os.path.join( glbl_cfg().get_derived_host_item(suite, 'suite job log directory'), 'job') try: job_status_file = open(job_log_name + '.status', 'ab') except IOError: if cylc.flags.debug: import traceback traceback.print_exc() return for severity, message in messages: if message == TASK_OUTPUT_STARTED: job_id = os.getppid() if job_id > 1: # If os.getppid() returns 1, the original job process # is likely killed already job_status_file.write('%s=%s\n' % (CYLC_JOB_PID, job_id)) job_status_file.write('%s=%s\n' % (CYLC_JOB_INIT_TIME, event_time)) elif message == TASK_OUTPUT_SUCCEEDED: job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, TASK_OUTPUT_SUCCEEDED.upper())) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(FAIL_MESSAGE_PREFIX): job_status_file.write( ('%s=%s\n' % ( CYLC_JOB_EXIT, message[len(FAIL_MESSAGE_PREFIX):])) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(ABORT_MESSAGE_PREFIX): job_status_file.write( ('%s=%s\n' % ( CYLC_JOB_EXIT, message[len(ABORT_MESSAGE_PREFIX):])) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(VACATION_MESSAGE_PREFIX): # Job vacated, remove entries related to current job job_status_file_name = job_status_file.name job_status_file.close() lines = [] for line in open(job_status_file_name): if not line.startswith('CYLC_JOB_'): lines.append(line) job_status_file = open(job_status_file_name, 'wb') for line in lines: job_status_file.write(line) job_status_file.write('%s=%s|%s|%s\n' % ( CYLC_MESSAGE, event_time, severity, message)) else: job_status_file.write('%s=%s|%s|%s\n' % ( CYLC_MESSAGE, event_time, severity, message)) try: job_status_file.close() except IOError: if cylc.flags.debug: import traceback traceback.print_exc()
def _remote_init_items(self, host, owner): """Return list of items that should be installed on task remote. Each item is (path, name), where name is relative path under suite run directory. """ items = [] comm_meth = glbl_cfg().get_host_item('task communication method', host, owner) LOG.debug('comm_meth=%s' % comm_meth) if comm_meth in ['ssh', 'http', 'https']: # Contact file items.append( (self.suite_srv_files_mgr.get_contact_file(self.suite), os.path.join(self.suite_srv_files_mgr.DIR_BASE_SRV, self.suite_srv_files_mgr.FILE_BASE_CONTACT))) if comm_meth in ['http', 'https']: # Passphrase file items.append( (self.suite_srv_files_mgr.get_auth_item( self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE, self.suite), os.path.join( self.suite_srv_files_mgr.DIR_BASE_SRV, self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE))) if comm_meth in ['https']: # SSL cert file items.append( (self.suite_srv_files_mgr.get_auth_item( self.suite_srv_files_mgr.FILE_BASE_SSL_CERT, self.suite), os.path.join( self.suite_srv_files_mgr.DIR_BASE_SRV, self.suite_srv_files_mgr.FILE_BASE_SSL_CERT))) return items
def __init__(self, suite, no_detach=False): logging.FileHandler.__init__( self, glbl_cfg().get_derived_host_item(suite, 'suite log')) self.no_detach = no_detach self.stamp = None self.formatter = CylcLogFormatter() self.header_records = []
def remote_tidy(self): """Remove suite contact files from initialised remotes. Call "cylc remote-tidy". This method is called on suite shutdown, so we want nothing to hang. Timeout any incomplete commands after 10 seconds. Also remove UUID file on suite host ".service/uuid". """ # Remove UUID file uuid_fname = os.path.join( self.suite_srv_files_mgr.get_suite_srv_dir(self.suite), FILE_BASE_UUID) try: os.unlink(uuid_fname) except OSError: pass # Issue all SSH commands in parallel procs = {} for (host, owner), init_with_contact in self.remote_init_map.items(): if init_with_contact != REMOTE_INIT_DONE: continue cmd = ['timeout', '10', 'cylc', 'remote-tidy'] if is_remote_host(host): cmd.append('--host=%s' % host) if is_remote_user(owner): cmd.append('--user=%s' % owner) if cylc.flags.debug: cmd.append('--debug') cmd.append(os.path.join(glbl_cfg().get_derived_host_item( self.suite, 'suite run directory', host, owner))) procs[(host, owner)] = ( cmd, Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=open(os.devnull))) # Wait for commands to complete for a max of 10 seconds timeout = time() + 10.0 while procs and time() < timeout: for (host, owner), (cmd, proc) in procs.copy().items(): if proc.poll() is None: continue del procs[(host, owner)] out, err = (f.decode() for f in proc.communicate()) if proc.wait(): LOG.warning(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, (host, owner), ' '.join(quote(item) for item in cmd), proc.returncode, out, err)) # Terminate any remaining commands for (host, owner), (cmd, proc) in procs.items(): try: proc.terminate() except OSError: pass out, err = proc.communicate() if proc.wait(): LOG.warning(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, (host, owner), ' '.join(quote(item) for item in cmd), proc.returncode, out, err))
def _get_priv_level(self, auth_user): """Get the privilege level for this authenticated user.""" if auth_user == "cylc": return PRIVILEGE_LEVELS[-1] elif self.schd.config.cfg['cylc']['authentication']['public']: return self.schd.config.cfg['cylc']['authentication']['public'] else: return glbl_cfg().get(['authentication', 'public'])
def get_task_job_log(suite, point, name, submit_num=None, suffix=None): """Return the full job log path.""" args = [ glbl_cfg().get_derived_host_item(suite, "suite job log directory"), get_task_job_id(point, name, submit_num)] if suffix is not None: args.append(suffix) return os.path.join(*args)
def get_task_job_log( self, suite, point, name, submit_num=None, tail=None): """Return the job log path.""" args = [ glbl_cfg().get_derived_host_item(suite, "suite job log directory"), self.get_task_job_id(point, name, submit_num)] if tail: args.append(tail) return os.path.join(*args)
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): run_d = glbl_cfg().get_derived_host_item( reg, 'suite run directory') return os.path.join(run_d, self.DIR_BASE_SRV)
def run(self): proc = Popen( self.command, stdin=open(os.devnull), stdout=self.stdoutfile, stderr=STDOUT, shell=True) self.proc = proc gobject.timeout_add(40, self.pulse_proc_progress) tail_cmd_tmpl = glbl_cfg().get_host_item("tail command template") tail_cmd = tail_cmd_tmpl % {'filename': self.stdoutfile.name} self.stdout_updater = Tailer(self.textview, tail_cmd, pollable=proc) self.stdout_updater.start()
def __init__(self, size=None): if not size: size = glbl_cfg().get(['process pool size'], size) self.size = size self.closed = False # Close queue self.stopping = False # No more job submit if True # .stopping may be set by an API command in a different thread self.stopping_lock = RLock() self.queuings = deque() self.runnings = []
def should_rollover(self, record): """Should rollover?""" if self.stamp is None or self.stream is None: return True max_bytes = glbl_cfg().get([self.GLBL_KEY, 'maximum size in bytes']) if max_bytes < self.MIN_BYTES: # No silly value max_bytes = self.MIN_BYTES msg = "%s\n" % self.format(record) self.stream.seek(0, 2) # due to non-posix-compliant Windows feature return self.stream.tell() + len(msg.encode('utf8')) >= max_bytes
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): run_d = glbl_cfg().get_derived_host_item(reg, 'suite run directory') return os.path.join(run_d, self.DIR_BASE_SRV)
def main(is_restart=False): """CLI main.""" options, args = parse_commandline(is_restart) if not args: # Auto-registration: "cylc run" (no args) in source dir. try: reg = SuiteSrvFilesManager().register() except SuiteServiceFileError as exc: sys.exit(exc) # Replace this process with "cylc run REG ..." for 'ps -f'. os.execv(sys.argv[0], [sys.argv[0]] + [reg] + sys.argv[1:]) # Check suite is not already running before start of host selection. try: SuiteSrvFilesManager().detect_old_contact_file(args[0]) except SuiteServiceFileError as exc: sys.exit(exc) # Create auth files if needed. On a shared FS if the suite host changes # this may (will?) renew the ssl.cert to reflect the change in host name. SuiteSrvFilesManager().create_auth_files(args[0]) # Check whether a run host is explicitly specified, else select one. if not options.host: try: host = HostAppointer().appoint_host() except EmptyHostList as exc: if cylc.flags.debug: raise else: sys.exit(str(exc)) if is_remote_host(host): if is_restart: base_cmd = ["restart"] + sys.argv[1:] else: base_cmd = ["run"] + sys.argv[1:] # Prevent recursive host selection base_cmd.append("--host=localhost") return remote_cylc_cmd(base_cmd, host=host) if remrun(set_rel_local=True): # State localhost as above. sys.exit() try: SuiteSrvFilesManager().get_suite_source_dir(args[0], options.owner) except SuiteServiceFileError: # Source path is assumed to be the run directory SuiteSrvFilesManager().register( args[0], glbl_cfg().get_derived_host_item(args[0], 'suite run directory')) try: scheduler = Scheduler(is_restart, options, args) except SuiteServiceFileError as exc: sys.exit(exc) scheduler.start()
def remote_host_select(self, host_str): """Evaluate a task host string. Arguments: host_str (str): An explicit host name, a command in back-tick or $(command) format, or an environment variable holding a hostname. Return (str): None if evaluate of host_str is still taking place. 'localhost' if host_str is not defined or if the evaluated host name is equivalent to 'localhost'. Otherwise, return the evaluated host name on success. Raise TaskRemoteMgmtError on error. """ if not host_str: return 'localhost' # Host selection command: $(command) or `command` match = REC_COMMAND.match(host_str) if match: cmd_str = match.groups()[1] if cmd_str in self.remote_host_str_map: # Command recently launched value = self.remote_host_str_map[cmd_str] if isinstance(value, TaskRemoteMgmtError): raise value # command failed elif value is None: return # command not yet ready else: host_str = value # command succeeded else: # Command not launched (or already reset) timeout = glbl_cfg().get(['task host select command timeout']) if timeout: cmd = ['timeout', str(int(timeout)), 'bash', '-c', cmd_str] else: cmd = ['bash', '-c', cmd_str] self.proc_pool.put_command( SuiteProcContext('remote-host-select', cmd, env=dict(os.environ)), self._remote_host_select_callback, [cmd_str]) self.remote_host_str_map[cmd_str] = None return self.remote_host_str_map[cmd_str] # Environment variable substitution host_str = os.path.expandvars(host_str) # Remote? if is_remote_host(host_str): return host_str else: return 'localhost'
def main(is_restart=False): """CLI main.""" options, args = parse_commandline(is_restart) if not args: # Auto-registration: "cylc run" (no args) in source dir. try: reg = SuiteSrvFilesManager().register() except SuiteServiceFileError as exc: sys.exit(exc) # Replace this process with "cylc run REG ..." for 'ps -f'. os.execv(sys.argv[0], [sys.argv[0]] + [reg] + sys.argv[1:]) # Check suite is not already running before start of host selection. try: SuiteSrvFilesManager().detect_old_contact_file(args[0]) except SuiteServiceFileError as exc: sys.exit(exc) # Create auth files if needed. SuiteSrvFilesManager().create_auth_files(args[0]) # Check whether a run host is explicitly specified, else select one. if not options.host: try: host = HostAppointer().appoint_host() except EmptyHostList as exc: if cylc.flags.debug: raise else: sys.exit(str(exc)) if is_remote_host(host): if is_restart: base_cmd = ["restart"] + sys.argv[1:] else: base_cmd = ["run"] + sys.argv[1:] # Prevent recursive host selection base_cmd.append("--host=localhost") return remote_cylc_cmd(base_cmd, host=host) if remrun(set_rel_local=True): # State localhost as above. sys.exit() try: SuiteSrvFilesManager().get_suite_source_dir(args[0], options.owner) except SuiteServiceFileError: # Source path is assumed to be the run directory SuiteSrvFilesManager().register( args[0], glbl_cfg().get_derived_host_item(args[0], 'suite run directory')) try: scheduler = Scheduler(is_restart, options, args) except SuiteServiceFileError as exc: sys.exit(exc) scheduler.start()
def __str__(self): msg = ("\nERROR: No hosts currently compatible with this global " "configuration:") suite_server_cfg_items = (['run hosts'], ['run host select', 'rank'], ['run host select', 'thresholds']) for cfg_end_ref in suite_server_cfg_items: cfg_end_ref.insert(0, 'suite servers') # Add 2-space indentation for clarity in distinction of items. msg = '\n '.join([msg, ' -> '.join(cfg_end_ref) + ':', ' ' + str(glbl_cfg().get(cfg_end_ref))]) return msg
def remote_host_select(self, host_str): """Evaluate a task host string. Arguments: host_str (str): An explicit host name, a command in back-tick or $(command) format, or an environment variable holding a hostname. Return (str): None if evaluate of host_str is still taking place. 'localhost' if host_str is not defined or if the evaluated host name is equivalent to 'localhost'. Otherwise, return the evaluated host name on success. Raise TaskRemoteMgmtError on error. """ if not host_str: return 'localhost' # Host selection command: $(command) or `command` match = REC_COMMAND.match(host_str) if match: cmd_str = match.groups()[1] if cmd_str in self.remote_host_str_map: # Command recently launched value = self.remote_host_str_map[cmd_str] if isinstance(value, TaskRemoteMgmtError): raise value # command failed elif value is None: return # command not yet ready else: host_str = value # command succeeded else: # Command not launched (or already reset) timeout = glbl_cfg().get(['task host select command timeout']) if timeout: cmd = ['timeout', str(int(timeout)), 'bash', '-c', cmd_str] else: cmd = ['bash', '-c', cmd_str] self.proc_pool.put_command( SubProcContext( 'remote-host-select', cmd, env=dict(os.environ)), self._remote_host_select_callback, [cmd_str]) self.remote_host_str_map[cmd_str] = None return self.remote_host_str_map[cmd_str] # Environment variable substitution host_str = os.path.expandvars(host_str) # Remote? if is_remote_host(host_str): return host_str else: return 'localhost'
def __init__(self, suite, test_params=None): if SuiteLog.__INSTANCE: raise Exception("Attempting to initiate a second singleton" "instance.") self._group = None if not test_params: self.is_test = False self.max_bytes = glbl_cfg().get( ['suite logging', 'maximum size in bytes']) self.roll_at_startup = glbl_cfg().get( ['suite logging', 'roll over at start-up']) self.archive_length = glbl_cfg().get( ['suite logging', 'rolling archive length']) else: self.is_test = True self.max_bytes = test_params['max_bytes'] self.roll_at_startup = test_params['roll_at_startup'] self.archive_length = 4 # Log paths. if test_params: self.ldir = test_params['ldir'] else: self.ldir = glbl_cfg().get_derived_host_item( suite, 'suite log directory') self.log_paths = {} self.log_paths[SUITE_LOG] = os.path.join(self.ldir, SUITE_LOG) self.log_paths[SUITE_OUT] = os.path.join(self.ldir, SUITE_OUT) self.log_paths[SUITE_ERR] = os.path.join(self.ldir, SUITE_ERR) # The loggers. self.loggers = {} self.loggers[SUITE_LOG] = None self.loggers[SUITE_OUT] = None self.loggers[SUITE_ERR] = None # File streams self.streams = [] SuiteLog.__INSTANCE = self
def __init__(self, suite, test_params=None): if SuiteLog.__INSTANCE: raise Exception("Attempting to initiate a second singleton" "instance.") self._group = None if not test_params: self.is_test = False self.max_bytes = glbl_cfg().get( ['suite logging', 'maximum size in bytes']) self.roll_at_startup = glbl_cfg().get( ['suite logging', 'roll over at start-up']) self.archive_length = glbl_cfg().get( ['suite logging', 'rolling archive length']) else: self.is_test = True self.max_bytes = test_params['max_bytes'] self.roll_at_startup = test_params['roll_at_startup'] self.archive_length = 4 # Log paths. if test_params: self.ldir = test_params['ldir'] else: self.ldir = glbl_cfg().get_derived_host_item( suite, 'suite log directory') self.log_paths = {} self.log_paths[self.LOG] = os.path.join(self.ldir, self.LOG) self.log_paths[self.OUT] = os.path.join(self.ldir, self.OUT) self.log_paths[self.ERR] = os.path.join(self.ldir, self.ERR) # The loggers. self.loggers = {} self.loggers[self.LOG] = None self.loggers[self.OUT] = None self.loggers[self.ERR] = None # File streams self.streams = [] SuiteLog.__INSTANCE = self
def get_events_conf(config, key, default=None): """Return a named [cylc][[events]] configuration.""" for getter in [ config.cfg['cylc']['events'], glbl_cfg().get(['cylc', 'events'])]: try: value = getter[key] except KeyError: pass else: if value is not None: return value return default
def _get_events_conf(self, itask, key, default=None): """Return an events setting from suite then global configuration.""" for getter in [ self.broadcast_mgr.get_broadcast(itask.identity).get("events"), itask.tdef.rtconfig["events"], glbl_cfg().get()["task events"]]: try: value = getter.get(key) except (AttributeError, ItemNotFoundError, KeyError): pass else: if value is not None: return value return default
def get_host_conf(self, itask, key, default=None, skey="remote"): """Return a host setting from suite then global configuration.""" overrides = self.broadcast_mgr.get_broadcast(itask.identity) if skey in overrides and overrides[skey].get(key) is not None: return overrides[skey][key] elif itask.tdef.rtconfig[skey].get(key) is not None: return itask.tdef.rtconfig[skey][key] else: try: return glbl_cfg().get_host_item(key, itask.task_host, itask.task_owner) except (KeyError, ItemNotFoundError): pass return default
def _call_server_get_url(self, function, **kwargs): """Build request URL.""" scheme = self.comms1.get(self.srv_files_mgr.KEY_COMMS_PROTOCOL) if scheme is None: # Use standard setting from global configuration scheme = glbl_cfg().get(['communication', 'method']) url = '%s://%s:%s/%s' % (scheme, self.host, self.port, function) # If there are any parameters left in the dict after popping, # append them to the url. if kwargs: import urllib params = urllib.urlencode(kwargs, doseq=True) url += "?" + params return url
def get_host_conf(self, itask, key, default=None, skey="remote"): """Return a host setting from suite then global configuration.""" overrides = self.broadcast_mgr.get_broadcast(itask.identity) if skey in overrides and overrides[skey].get(key) is not None: return overrides[skey][key] elif itask.tdef.rtconfig[skey].get(key) is not None: return itask.tdef.rtconfig[skey][key] else: try: return glbl_cfg().get_host_item( key, itask.task_host, itask.task_owner) except (KeyError, ItemNotFoundError): pass return default
def should_rollover(self, record): """Should rollover?""" if self.stamp is None or self.stream is None: return True max_bytes = glbl_cfg().get([self.GLBL_KEY, 'maximum size in bytes']) if max_bytes < self.MIN_BYTES: # No silly value max_bytes = self.MIN_BYTES msg = "%s\n" % self.format(record) try: # due to non-posix-compliant Windows feature self.stream.seek(0, 2) except ValueError as exc: # intended to catch - ValueError: I/O operation on closed file raise SystemExit(exc) return self.stream.tell() + len(msg.encode('utf8')) >= max_bytes
def _call_server_get_url(self, function, **kwargs): """Build request URL.""" comms_protocol = self.comms_protocol if comms_protocol is None: # Use standard setting from global configuration comms_protocol = glbl_cfg().get(['communication', 'method']) url = '%s://%s:%s/%s' % (comms_protocol, self.host, self.port, function) # If there are any parameters left in the dict after popping, # append them to the url. if kwargs: import urllib params = urllib.urlencode(kwargs, doseq=True) url += "?" + params return url
def _call_server_get_url(self, function, **kwargs): """Build request URL.""" scheme = self.comms1.get(self.srv_files_mgr.KEY_COMMS_PROTOCOL) if scheme is None: # Use standard setting from global configuration scheme = glbl_cfg().get(['communication', 'method']) url = '%s://%s:%s/%s' % ( scheme, self.host, self.port, function) # If there are any parameters left in the dict after popping, # append them to the url. if kwargs: import urllib params = urllib.urlencode(kwargs, doseq=True) url += "?" + params return url
def do_rollover(self): """Create and rollover log file if necessary.""" # Generate new file name self.stamp = get_current_time_string(use_basic_format=True) filename = self.baseFilename + '.' + self.stamp os.makedirs(os.path.dirname(filename), exist_ok=True) # Touch file with open(filename, 'w+'): os.utime(filename, None) # Update symlink if (os.path.exists(self.baseFilename) or os.path.lexists(self.baseFilename)): os.unlink(self.baseFilename) os.symlink(os.path.basename(filename), self.baseFilename) # Housekeep log files arch_len = glbl_cfg().get([self.GLBL_KEY, 'rolling archive length']) if arch_len: log_files = glob(self.baseFilename + '.*') log_files.sort() while len(log_files) > arch_len: os.unlink(log_files.pop(0)) # Reopen stream, redirect STDOUT and STDERR to log if self.stream: self.stream.close() self.stream = None self.stream = self._open() # Dup STDOUT and STDERR in detach mode if not self.no_detach: os.dup2(self.stream.fileno(), sys.stdout.fileno()) os.dup2(self.stream.fileno(), sys.stderr.fileno()) # Emit header records (should only do this for subsequent log files) for header_record in self.header_records: if self.FILE_NUM in header_record.__dict__: # Increment log file number header_record.__dict__[self.FILE_NUM] += 1 header_record.args = header_record.args[0:-1] + ( header_record.__dict__[self.FILE_NUM],) logging.FileHandler.emit(self, header_record)
def prompt(question, force=False, gui=False, no_force=False, no_abort=False, keep_above=True): """Interactive Yes/No prompt for cylc CLI scripts. For convenience, on No we just exit rather than return. If force is True don't prompt, just return immediately. """ if (force or glbl_cfg().get(['disable interactive command prompts'])) and ( not no_force): return True if gui: raise NotImplementedError else: cli_response = input('%s (y/n)? ' % question) response_no = (cli_response not in ['y', 'Y']) if response_no: if no_abort: return False else: sys.exit(0) else: return True
def suite_state(suite, task, point, offset=None, status='succeeded', message=None, cylc_run_dir=None, debug=False): """Connect to a suite DB and query the requested task state. Reports satisfied only if the remote suite state has been achieved. Returns all suite state args to pass on to triggering tasks. """ cylc_run_dir = os.path.expandvars( os.path.expanduser( cylc_run_dir or glbl_cfg().get_host_item('run directory'))) if offset is not None: point = str(add_offset(point, offset)) try: checker = CylcSuiteDBChecker(cylc_run_dir, suite) except (OSError, sqlite3.Error): # Failed to connect to DB; target suite may not be started. return (False, None) fmt = checker.get_remote_point_format() if fmt: my_parser = TimePointParser() point = str(my_parser.parse(point, dump_format=fmt)) if message is not None: satisfied = checker.task_state_met(task, point, message=message) else: satisfied = checker.task_state_met(task, point, status=status) results = { 'suite': suite, 'task': task, 'point': point, 'offset': offset, 'status': status, 'message': message, 'cylc_run_dir': cylc_run_dir } return (satisfied, results)
def _get_host_item(job_conf, key): """Return host item from glbl_cfg().""" return glbl_cfg().get_host_item( key, job_conf["host"], job_conf["owner"])
def __init__(self, cfg, updater, theme, info_bar, xdot): super(GraphUpdater, self).__init__() self.quit = False self.cleared = False self.ignore_suicide = True self.focus_start_point_string = None self.focus_stop_point_string = None self.xdot = xdot self.first_update = False self.graph_disconnect = False self.action_required = True self.oldest_point_string = None self.newest_point_string = None self.orientation = "TB" # Top to Bottom ordering of nodes self.best_fit = True # zoom to page size self.normal_fit = False # zoom to 1.0 scale self.crop = False self.subgraphs_on = False # organise by cycle point. self.descendants = {} self.all_families = [] self.write_dot_frames = False self.prev_graph_id = () self.cfg = cfg self.updater = updater self.theme = theme self.info_bar = info_bar self.state_summary = {} self.fam_state_summary = {} self.global_summary = {} self.last_update_time = None self.god = None self.mode = "waiting..." self.update_time_str = "waiting..." self.prev_graph_id = () # empty graphw object: self.graphw = CGraphPlain(self.cfg.suite) # lists of nodes to newly group or ungroup (not of all currently # grouped and ungrouped nodes - still held server side) self.group = [] self.ungroup = [] self.have_leaves_and_feet = False self.leaves = [] self.feet = [] self.ungroup_recursive = False if "graph" in self.cfg.ungrouped_views: self.ungroup_all = True self.group_all = False else: self.ungroup_all = False self.group_all = True self.graph_frame_count = 0 self.suite_share_dir = glbl_cfg().get_derived_host_item( self.cfg.suite, 'suite share directory')
def _get_public_priv(self): """Return the public privilege level of this suite.""" if self.schd.config.cfg['cylc']['authentication']['public']: return Priv.parse( self.schd.config.cfg['cylc']['authentication']['public']) return Priv.parse(glbl_cfg().get(['authentication', 'public']))
def _get_derived_host_item(job_conf, key): """Return derived host item from glbl_cfg().""" return glbl_cfg().get_derived_host_item( job_conf['suite_name'], key, job_conf["host"], job_conf["owner"])
def get_dir_for_suite(suite): """Returns the logging directory for a given suite without setting up suite logging.""" return glbl_cfg().get_derived_host_item(suite, 'suite log directory')