def test_sprocess_communicate_with_process(self): foo = ' foo' bar = ' bar' cmd = ["echo", "this is a command" + foo + bar] p = procopen(cmd, stdoutpipe=True) stdout, _ = p.communicate() compare(stdout, b"this is a command foo bar\n")
def job_kill(self, st_file_path): """Ask batch system to terminate the job specified in "st_file_path". Return 0 on success, non-zero integer on failure. """ # SUITE_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status self.configure_suite_run_dir(st_file_path.rsplit(os.sep, 6)[0]) try: st_file = open(st_file_path) for line in st_file: if line.startswith(self.CYLC_BATCH_SYS_NAME + "="): batch_sys = self._get_sys(line.strip().split("=", 1)[1]) break else: return (1, "Cannot determine batch system from %s file" % ( JOB_LOG_STATUS)) st_file.seek(0, 0) # rewind if getattr(batch_sys, "SHOULD_KILL_PROC_GROUP", False): for line in st_file: if line.startswith(CYLC_JOB_PID + "="): pid = line.strip().split("=", 1)[1] try: os.killpg(os.getpgid(int(pid)), SIGKILL) except (OSError, ValueError) as exc: traceback.print_exc() return (1, str(exc)) else: return (0, "") st_file.seek(0, 0) # rewind if hasattr(batch_sys, "KILL_CMD_TMPL"): for line in st_file: if not line.startswith(self.CYLC_BATCH_SYS_JOB_ID + "="): continue job_id = line.strip().split("=", 1)[1] command = shlex.split( batch_sys.KILL_CMD_TMPL % {"job_id": job_id}) try: proc = procopen(command, stdin=open(os.devnull), stderrpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] traceback.print_exc() return (1, str(exc)) else: return (proc.wait(), proc.communicate()[1].decode()) return (1, "Cannot determine batch job ID from %s file" % ( JOB_LOG_STATUS)) except IOError as exc: return (1, str(exc))
def job_kill(self, st_file_path): """Ask batch system to terminate the job specified in "st_file_path". Return 0 on success, non-zero integer on failure. """ # SUITE_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status self.configure_suite_run_dir(st_file_path.rsplit(os.sep, 6)[0]) try: st_file = open(st_file_path) for line in st_file: if line.startswith(self.CYLC_BATCH_SYS_NAME + "="): batch_sys = self._get_sys(line.strip().split("=", 1)[1]) break else: return (1, "Cannot determine batch system from %s file" % (JOB_LOG_STATUS)) st_file.seek(0, 0) # rewind if getattr(batch_sys, "SHOULD_KILL_PROC_GROUP", False): for line in st_file: if line.startswith(CYLC_JOB_PID + "="): pid = line.strip().split("=", 1)[1] try: os.killpg(os.getpgid(int(pid)), SIGKILL) except (OSError, ValueError) as exc: traceback.print_exc() return (1, str(exc)) else: return (0, "") st_file.seek(0, 0) # rewind if hasattr(batch_sys, "KILL_CMD_TMPL"): for line in st_file: if not line.startswith(self.CYLC_BATCH_SYS_JOB_ID + "="): continue job_id = line.strip().split("=", 1)[1] command = shlex.split(batch_sys.KILL_CMD_TMPL % {"job_id": job_id}) try: proc = procopen(command, stdin=open(os.devnull), stderrpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] traceback.print_exc() return (1, str(exc)) else: return (proc.wait(), proc.communicate()[1].decode()) return (1, "Cannot determine batch job ID from %s file" % (JOB_LOG_STATUS)) except IOError as exc: return (1, str(exc))
def _run_command_init(cls, ctx, callback=None, callback_args=None): """Prepare and launch shell command in ctx.""" try: if ctx.cmd_kwargs.get('stdin_files'): if len(ctx.cmd_kwargs['stdin_files']) > 1: stdin_file = TemporaryFile() for file_ in ctx.cmd_kwargs['stdin_files']: if hasattr(file_, 'read'): stdin_file.write(file_.read()) else: stdin_file.write(open(file_, 'rb').read()) stdin_file.seek(0) elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'): stdin_file = ctx.cmd_kwargs['stdin_files'][0] else: stdin_file = open(ctx.cmd_kwargs['stdin_files'][0], 'rb') elif ctx.cmd_kwargs.get('stdin_str'): stdin_file = TemporaryFile('bw+') stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode()) stdin_file.seek(0) else: stdin_file = open(os.devnull) proc = procopen( ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True, # Execute command as a process group leader, # so we can use "os.killpg" to kill the whole group. preexec_fn=os.setpgrp, env=ctx.cmd_kwargs.get('env'), usesh=ctx.cmd_kwargs.get('shell')) # calls to open a shell are aggregated in cylc_subproc.procopen() # with logging for what is calling it and the commands given except (IOError, OSError) as exc: if exc.filename is None: exc.filename = ctx.cmd[0] LOG.exception(exc) ctx.ret_code = 1 ctx.err = str(exc) cls._run_command_exit(ctx, callback, callback_args) return None else: LOG.debug(ctx.cmd) return proc
def run_get_stdout(command, timeout=None, poll_delay=None): """Get standard output from a shell command. If "timeout" is specified, it should be the number of seconds before timeout. On timeout, the command will be killed. The argument "poll_delay" is only relevant if "timeout" is specified. It specifies the intervals in number of seconds between polling for the completion of the command. Return (True, [stdoutline1, ...]) on success. Return (False, [err_msg, command]) on failure. """ try: proc = procopen(command, usesh=True, preexec_fn=setpgrp, stdin=open(devnull), stderrpipe=True, stdoutpipe=True) # calls to open a shell are aggregated in cylc_subproc.procopen() is_killed_after_timeout = False if timeout: if poll_delay is None: poll_delay = POLL_DELAY timeout_time = time() + timeout while proc.poll() is None: if time() > timeout_time: killpg(proc.pid, SIGTERM) is_killed_after_timeout = True break sleep(poll_delay) out, err = (f.decode() for f in proc.communicate()) res = proc.wait() if res < 0 and is_killed_after_timeout: return (False, [ERR_TIMEOUT % (timeout, -res, err), command]) elif res < 0: return (False, [ERR_SIGNAL % (-res, err), command]) elif res > 0: return (False, [ERR_RETCODE % (res, err), command]) except OSError: # should never do this with shell=True return (False, [ERR_OS, command]) else: return (True, out.strip().splitlines())
if event != 'shutdown': raise SystemExit("ERROR: run this as a shutdown event handler") try: log_dir = os.path.expandvars(os.environ['CYLC_SUITE_LOG_DIR']) suite_dir = os.path.expandvars(os.environ['CYLC_SUITE_DEF_PATH']) except KeyError, x: raise SystemExit(x) ref = os.path.join(suite_dir, 'broadcast.ref') log = os.path.join(suite_dir, 'broadcast.log') fref = open(ref, 'r') flog = open(log, 'r') reflines = fref.readlines() loglines = flog.readlines() reflines.sort() loglines.sort() if reflines != loglines: sys.exit("ERROR: broadcast logs do not compare") else: print "broadcast logs compare OK" res = procopen(["cylc check-triggering " + event + " " + suite], usesh=True) status = res.wait() if status != 0: sys.exit(1)
def _job_submit_impl(self, job_file_path, batch_sys_name, submit_opts): """Helper for self.jobs_submit() and self.job_submit().""" # Create NN symbolic link, if necessary self._create_nn(job_file_path) for name in JOB_LOG_ERR, JOB_LOG_OUT: try: os.unlink(os.path.join(job_file_path, name)) except OSError: pass # Start new status file job_status_file = open(job_file_path + ".status", "w") job_status_file.write("%s=%s\n" % (self.CYLC_BATCH_SYS_NAME, batch_sys_name)) job_status_file.close() # Submit job batch_sys = self._get_sys(batch_sys_name) proc_stdin_arg = None proc_stdin_value = open(os.devnull) if hasattr(batch_sys, "get_submit_stdin"): proc_stdin_arg, proc_stdin_value = batch_sys.get_submit_stdin( job_file_path, submit_opts) if isinstance(proc_stdin_arg, str): proc_stdin_arg = proc_stdin_arg.encode() if isinstance(proc_stdin_value, str): proc_stdin_value = proc_stdin_value.encode() if hasattr(batch_sys, "submit"): # batch_sys.submit should handle OSError, if relevant. ret_code, out, err = batch_sys.submit(job_file_path, submit_opts) else: env = None if hasattr(batch_sys, "SUBMIT_CMD_ENV"): env = dict(os.environ) env.update(batch_sys.SUBMIT_CMD_ENV) batch_submit_cmd_tmpl = submit_opts.get("batch_submit_cmd_tmpl") if batch_submit_cmd_tmpl: # No need to catch OSError when using shell. It is unlikely # that we do not have a shell, and still manage to get as far # as here. batch_sys_cmd = batch_submit_cmd_tmpl % {"job": job_file_path} proc = procopen(batch_sys_cmd, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, usesh=True, env=env) # calls to open a shell are aggregated in # cylc_subproc.procopen() else: command = shlex.split(batch_sys.SUBMIT_CMD_TMPL % {"job": job_file_path}) try: proc = procopen(command, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, env=env) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] return 1, "", str(exc), "" out, err = (f.decode() for f in proc.communicate(proc_stdin_value)) ret_code = proc.wait() # Filter submit command output, if relevant # Get job ID, if possible job_id = None if out or err: try: out, err, job_id = self._filter_submit_output( job_file_path + ".status", batch_sys, out, err) except OSError: ret_code = 1 self.job_kill(job_file_path + ".status") return ret_code, out, err, job_id
def _jobs_poll_batch_sys(self, job_log_root, batch_sys_name, my_ctx_list): """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs).""" exp_job_ids = [ctx.batch_sys_job_id for ctx in my_ctx_list] bad_job_ids = list(exp_job_ids) exp_pids = [] bad_pids = [] items = [[self._get_sys(batch_sys_name), exp_job_ids, bad_job_ids]] if getattr(items[0][0], "SHOULD_POLL_PROC_GROUP", False): exp_pids = [ctx.pid for ctx in my_ctx_list if ctx.pid is not None] bad_pids.extend(exp_pids) items.append([self._get_sys("background"), exp_pids, bad_pids]) debug_messages = [] for batch_sys, exp_ids, bad_ids in items: if hasattr(batch_sys, "get_poll_many_cmd"): # Some poll commands may not be as simple cmd = batch_sys.get_poll_many_cmd(exp_ids) else: # if hasattr(batch_sys, "POLL_CMD"): # Simple poll command that takes a list of job IDs cmd = [batch_sys.POLL_CMD] + exp_ids try: proc = procopen(cmd, stdin=open(os.devnull), stderrpipe=True, stdoutpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = cmd[0] sys.stderr.write(str(exc) + "\n") return ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) debug_messages.append('%s - %s' % (batch_sys, len(out.split('\n')))) sys.stderr.write(err) if (ret_code and hasattr(batch_sys, "POLL_CANT_CONNECT_ERR") and batch_sys.POLL_CANT_CONNECT_ERR in err): # Poll command failed because it cannot connect to batch system # Assume jobs are still healthy until the batch system is back. bad_ids[:] = [] elif hasattr(batch_sys, "filter_poll_many_output"): # Allow custom filter for id_ in batch_sys.filter_poll_many_output(out): try: bad_ids.remove(id_) except ValueError: pass else: # Just about all poll commands return a table, with column 1 # being the job ID. The logic here should be sufficient to # ensure that any table header is ignored. for line in out.splitlines(): try: head = line.split(None, 1)[0] except IndexError: continue if head in exp_ids: try: bad_ids.remove(head) except ValueError: pass debug_flag = False for ctx in my_ctx_list: ctx.batch_sys_exit_polled = int( ctx.batch_sys_job_id in bad_job_ids) # Exited batch system, but process still running # This can happen to jobs in some "at" implementation if ctx.batch_sys_exit_polled and ctx.pid in exp_pids: if ctx.pid not in bad_pids: ctx.batch_sys_exit_polled = 0 else: debug_flag = True # Add information to "job.status" if ctx.batch_sys_exit_polled: try: handle = open( os.path.join(job_log_root, ctx.job_log_dir, JOB_LOG_STATUS), "a") handle.write("%s=%s\n" % (self.CYLC_BATCH_SYS_EXIT_POLLED, get_current_time_string())) handle.close() except IOError as exc: sys.stderr.write(str(exc) + "\n") if debug_flag: ctx.batch_sys_call_no_lines = ', '.join(debug_messages)
def main(argv): if len(argv) != 2: print("Incorrect number of args", sys.stderr) sys.exit(1) sname = argv[0] rundir = argv[1] command = "cylc cat-state " + sname p = procopen(command, usesh=True, stdoutpipe=True, stderrpipe=True) state, err = (f.decode() for f in p.communicate()) if p.returncode > 0: print(err, sys.stderr) sys.exit(1) db = (os.sep).join([rundir, sname, "log", "db"]) cnx = sqlite3.Connection(db) cur = cnx.cursor() state = state.split("\n") states_begun = False qbase = "select status from task_states where name==? and cycle==?" error_states = [] for line in state: if states_begun and line != '': line2 = line.split(':') task_and_cycle = line2[0].strip().split(".") status = line2[1].split(',')[0].strip().split("=")[1] # query db and compare result res = [] try: cur.execute(qbase, [task_and_cycle[0], task_and_cycle[1]]) next_ = cur.fetchmany() while next_: res.append(next_[0]) next_ = cur.fetchmany() except Exception: sys.stderr.write("unable to query suite database\n") sys.exit(1) if not res[0][0] == status: error_states.append( line + ": state retrieved " + str(res[0][0])) elif line == "Begin task states": states_begun = True cnx.close() if error_states: print( "The following task states were not consistent with the database:", sys.stderr ) for line in error_states: print(line, sys.stderr) sys.exit(1) else: sys.exit(0)
def _job_submit_impl( self, job_file_path, batch_sys_name, submit_opts): """Helper for self.jobs_submit() and self.job_submit().""" # Create NN symbolic link, if necessary self._create_nn(job_file_path) for name in JOB_LOG_ERR, JOB_LOG_OUT: try: os.unlink(os.path.join(job_file_path, name)) except OSError: pass # Start new status file job_status_file = open(job_file_path + ".status", "w") job_status_file.write( "%s=%s\n" % (self.CYLC_BATCH_SYS_NAME, batch_sys_name)) job_status_file.close() # Submit job batch_sys = self._get_sys(batch_sys_name) proc_stdin_arg = None proc_stdin_value = open(os.devnull) if hasattr(batch_sys, "get_submit_stdin"): proc_stdin_arg, proc_stdin_value = batch_sys.get_submit_stdin( job_file_path, submit_opts) if isinstance(proc_stdin_arg, str): proc_stdin_arg = proc_stdin_arg.encode() if isinstance(proc_stdin_value, str): proc_stdin_value = proc_stdin_value.encode() if hasattr(batch_sys, "submit"): # batch_sys.submit should handle OSError, if relevant. ret_code, out, err = batch_sys.submit(job_file_path, submit_opts) else: env = None if hasattr(batch_sys, "SUBMIT_CMD_ENV"): env = dict(os.environ) env.update(batch_sys.SUBMIT_CMD_ENV) batch_submit_cmd_tmpl = submit_opts.get("batch_submit_cmd_tmpl") if batch_submit_cmd_tmpl: # No need to catch OSError when using shell. It is unlikely # that we do not have a shell, and still manage to get as far # as here. batch_sys_cmd = batch_submit_cmd_tmpl % {"job": job_file_path} proc = procopen(batch_sys_cmd, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, usesh=True, env=env) # calls to open a shell are aggregated in # cylc_subproc.procopen() else: command = shlex.split( batch_sys.SUBMIT_CMD_TMPL % {"job": job_file_path}) try: proc = procopen(command, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, env=env) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] return 1, "", str(exc), "" out, err = (f.decode() for f in proc.communicate(proc_stdin_value)) ret_code = proc.wait() # Filter submit command output, if relevant # Get job ID, if possible job_id = None if out or err: try: out, err, job_id = self._filter_submit_output( job_file_path + ".status", batch_sys, out, err) except OSError: ret_code = 1 self.job_kill(job_file_path + ".status") return ret_code, out, err, job_id
def _jobs_poll_batch_sys(self, job_log_root, batch_sys_name, my_ctx_list): """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs).""" exp_job_ids = [ctx.batch_sys_job_id for ctx in my_ctx_list] bad_job_ids = list(exp_job_ids) exp_pids = [] bad_pids = [] items = [[self._get_sys(batch_sys_name), exp_job_ids, bad_job_ids]] if getattr(items[0][0], "SHOULD_POLL_PROC_GROUP", False): exp_pids = [ctx.pid for ctx in my_ctx_list if ctx.pid is not None] bad_pids.extend(exp_pids) items.append([self._get_sys("background"), exp_pids, bad_pids]) debug_messages = [] for batch_sys, exp_ids, bad_ids in items: if hasattr(batch_sys, "get_poll_many_cmd"): # Some poll commands may not be as simple cmd = batch_sys.get_poll_many_cmd(exp_ids) else: # if hasattr(batch_sys, "POLL_CMD"): # Simple poll command that takes a list of job IDs cmd = [batch_sys.POLL_CMD] + exp_ids try: proc = procopen(cmd, stdin=open(os.devnull), stderrpipe=True, stdoutpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = cmd[0] sys.stderr.write(str(exc) + "\n") return ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) debug_messages.append('%s - %s' % ( batch_sys, len(out.split('\n')))) sys.stderr.write(err) if (ret_code and hasattr(batch_sys, "POLL_CANT_CONNECT_ERR") and batch_sys.POLL_CANT_CONNECT_ERR in err): # Poll command failed because it cannot connect to batch system # Assume jobs are still healthy until the batch system is back. bad_ids[:] = [] elif hasattr(batch_sys, "filter_poll_many_output"): # Allow custom filter for id_ in batch_sys.filter_poll_many_output(out): try: bad_ids.remove(id_) except ValueError: pass else: # Just about all poll commands return a table, with column 1 # being the job ID. The logic here should be sufficient to # ensure that any table header is ignored. for line in out.splitlines(): try: head = line.split(None, 1)[0] except IndexError: continue if head in exp_ids: try: bad_ids.remove(head) except ValueError: pass debug_flag = False for ctx in my_ctx_list: ctx.batch_sys_exit_polled = int( ctx.batch_sys_job_id in bad_job_ids) # Exited batch system, but process still running # This can happen to jobs in some "at" implementation if ctx.batch_sys_exit_polled and ctx.pid in exp_pids: if ctx.pid not in bad_pids: ctx.batch_sys_exit_polled = 0 else: debug_flag = True # Add information to "job.status" if ctx.batch_sys_exit_polled: try: handle = open(os.path.join( job_log_root, ctx.job_log_dir, JOB_LOG_STATUS), "a") handle.write("%s=%s\n" % ( self.CYLC_BATCH_SYS_EXIT_POLLED, get_current_time_string())) handle.close() except IOError as exc: sys.stderr.write(str(exc) + "\n") if debug_flag: ctx.batch_sys_call_no_lines = ', '.join(debug_messages)
if event != 'shutdown': raise SystemExit("ERROR: run this as a shutdown event handler") try: log_dir = os.path.expandvars(os.environ['CYLC_SUITE_LOG_DIR']) suite_dir = os.path.expandvars(os.environ['CYLC_SUITE_DEF_PATH']) except KeyError as exc: raise SystemExit(exc) ref = os.path.join(suite_dir, 'broadcast.ref') log = os.path.join(suite_dir, 'broadcast.log') fref = open(ref, 'r') flog = open(log, 'r') reflines = fref.readlines() loglines = flog.readlines() reflines.sort() loglines.sort() if reflines != loglines: sys.exit("ERROR: broadcast logs do not compare") else: print("broadcast logs compare OK") res = procopen(["cylc check-triggering " + event + " " + suite], usesh=True) status = res.wait() if status != 0: sys.exit(1)