def run_diagnostics(preset_xml, output_dir): """Run Hello World pipeline Submit to the cluster if configured """ precord = parse_pipeline_preset_xml(preset_xml) wopts = precord.to_workflow_level_opt() to_p = _to_path(output_dir) input_txt = to_p("e-01_input.txt") with open(input_txt, 'w') as f: f.write("Mock data\n") job_preset_xml = to_p("preset.xml") shutil.copyfile(preset_xml, job_preset_xml) _d = dict(f=input_txt, p=job_preset_xml, d=output_dir) cmd_str = "pbsmrtpipe pipeline-id pbsmrtpipe.pipelines.dev_dist -e \"e_01:{f}\" --preset-xml={p} --output-dir={d}" cmd = cmd_str.format(**_d) print "Running command {c}".format(c=cmd) rcode, stdout, stderr, run_time = run_command(cmd, sys.stdout, sys.stderr) if rcode == 0: print "Successfully submitted cluster job using templates {p}".format(p=wopts.cluster_manager_path) return rcode
def run_simple_diagnostics(preset_xml, output_dir): """Setup simple job to run""" precord = parse_pipeline_preset_xml(preset_xml) wopts = precord.to_workflow_level_opt() to_p = _to_path(output_dir) ts = load_cluster_templates(wopts.cluster_manager_path) run_sh = to_p('run.sh') cluster_sh = to_p('cluster.sh') output_file = to_p('hello-world-output.txt') _write_echo_hello_world(output_file, run_sh) cluster_stderr = to_p("cluster.stderr") cluster_stdout = to_p("cluster.stdout") cluster_cmd = ts.render("start", run_sh, "job.dev-diagnostic-hello-world", stdout=cluster_stdout, stderr=cluster_stderr) with open(cluster_sh, 'w') as f: f.write(cluster_cmd) print "Run.sh command {r}".format(r=run_sh) print "Exe'ing Cluster command {c}".format(c=cluster_cmd) rcode, stdout, stderr, run_time = run_command(cluster_cmd, sys.stdout, sys.stderr) if rcode == 0: print "Successfully submitted cluster job using templates {p}".format(p=wopts.cluster_manager_path) return rcode
def test_run(self): root_output_dir = self._get_root_temp_dir() i = random.randint(1, 10000) name = "{n}_{i}".format(n=self.JOB_NAME, i=i) output_dir = os.path.join(root_output_dir, name) os.mkdir(output_dir) ep_d = {ep_id: get_temp_file(suffix=name) for ep_id, name in self.ENTRY_POINTS.iteritems()} for ep_id, file_name in ep_d.iteritems(): with open(file_name, 'w') as x: x.write("Mock data for {i} \n".format(i=ep_id)) cmd = self.TO_CMD_FUNC(output_dir, self.WORKFLOW_XML, self.PRESET_XML, ep_d) stderr_path = os.path.join(output_dir, 'job.stderr') stdout_path = os.path.join(output_dir, 'job.stdout') log.debug(cmd) with open(stdout_path, 'w') as wo: with open(stderr_path, 'w') as we: rcode, stdout_results, stderr_results, run_time = run_command(cmd, wo, we) log.debug("Integration Job {i} state {s} in {t:.2f} sec.".format(i=self.JOB_NAME, s=rcode, t=run_time)) if rcode != 0: log.error("Integration Job {i} failed.".format(i=self.JOB_NAME)) log.error(stdout_results) log.error(stderr_results) if os.path.exists(stderr_path): with open(stderr_path, 'r') as f: log.error(f.read()) emsg = "Failed Integration Job {i} with exit code {r} in {d}. {w}".format(i=self.JOB_NAME, r=rcode, d=output_dir, w=self.WORKFLOW_XML) self.assertEqual(rcode, 0, emsg)
def test_run(self): root_output_dir = self._get_root_temp_dir() i = random.randint(1, 10000) name = "{n}_{i}".format(n=self.JOB_NAME, i=i) output_dir = os.path.join(root_output_dir, name) os.mkdir(output_dir) ep_d = {ep_id: get_temp_file(suffix=name) for ep_id, name in self.ENTRY_POINTS.iteritems()} for ep_id, file_name in ep_d.iteritems(): with open(file_name, 'w') as x: x.write("Mock data for {i} \n".format(i=ep_id)) cmd = self.TO_CMD_FUNC(output_dir, self.WORKFLOW_XML, self.PRESET_JSON, self.PRESET_XML, ep_d) stderr_path = os.path.join(output_dir, 'job.stderr') stdout_path = os.path.join(output_dir, 'job.stdout') log.debug(cmd) with open(stdout_path, 'w') as wo: with open(stderr_path, 'w') as we: rcode, stdout_results, stderr_results, run_time = run_command(cmd, wo, we) log.debug("Integration Job {i} state {s} in {t:.2f} sec.".format(i=self.JOB_NAME, s=rcode, t=run_time)) if rcode != 0 and self.EXPECTED_EXIT_CODE == 0: log.error("Integration Job {i} failed.".format(i=self.JOB_NAME)) log.error(stdout_results) log.error(stderr_results) if os.path.exists(stderr_path): with open(stderr_path, 'r') as f: log.error(f.read()) emsg = "Failed Integration Job {i} with exit code {r} in {d}. {w}".format(i=self.JOB_NAME, r=rcode, d=output_dir, w=self.WORKFLOW_XML) self.assertEqual(rcode, self.EXPECTED_EXIT_CODE, emsg)
def test_hello_world_job(self): r = C.load_installed_cluster_templates_by_name('sge') log.debug(r) job_name = "int_job_hello" output_dir = get_temp_cluster_dir(job_name) cmd = "pbsmrtpipe --help" def _to_p(x_): return os.path.join(output_dir, x_) sh_script = _to_p('qsub_test.sh') with open(sh_script, 'w') as f: f.write(cmd + "\n") # qsub output stdout = _to_p('stdout') stderr = _to_p('stderr') for x in [stdout, stderr]: with open(x, 'w') as f: f.write("") log.info(sh_script) cmd = r.render("start", sh_script, 'test_job_01', stdout=stdout, stderr=stderr, nproc=1) log.debug("Running qsub command '{c}'".format(c=cmd)) time_out = 60 * 5 rcode, stdout, stderr, run_time = run_command(cmd, None, None, time_out=time_out) log.debug((rcode, stdout, stderr, run_time)) if rcode != 0: log.info(stdout) log.error(stderr) log.error("Failed Integration Job {i} with exit code {r}".format( i=job_name, r=rcode)) if os.path.exists(stderr): with open(stderr, 'r') as f: log.error(f.read()) else: try: shutil.rmtree(output_dir) except Exception as e: log.warn("Unable to cleanup testdir {o}. {m}".format( o=output_dir, m=e.message)) self.assertEqual(rcode, 0, stderr)
def test_hello_world_job(self): r = C.load_installed_cluster_templates_by_name('sge') log.debug(r) job_name = "int_job_hello" output_dir = get_temp_cluster_dir(job_name) cmd = "pbsmrtpipe --help" def _to_p(x_): return os.path.join(output_dir, x_) sh_script = _to_p('qsub_test.sh') with open(sh_script, 'w') as f: f.write(cmd + "\n") # qsub output stdout = _to_p('stdout') stderr = _to_p('stderr') for x in [stdout, stderr]: with open(x, 'w') as f: f.write("") log.info(sh_script) cmd = r.render("start", sh_script, 'test_job_01', stdout=stdout, stderr=stderr, nproc=1) log.debug("Running qsub command '{c}'".format(c=cmd)) time_out = 60 * 5 with tempfile.TemporaryFile() as stdout_tmp: with tempfile.TemporaryFile() as stderr_tmp: rcode, stdout, stderr, run_time = run_command(cmd, stdout_tmp, stderr_tmp, time_out=time_out) log.debug((rcode, stdout, stderr, run_time)) if rcode != 0: log.info(stdout) log.error(stderr) log.error("Failed Integration Job {i} with exit code {r}".format(i=job_name, r=rcode)) if os.path.exists(stderr): with open(stderr, 'r') as f: log.error(f.read()) else: try: shutil.rmtree(output_dir) except Exception as e: log.warn("Unable to cleanup testdir {o}. {m}".format(o=output_dir, m=e.message)) self.assertEqual(rcode, 0, stderr)
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode): """ Run a runnable task locally. :param runnable_task: :type runnable_task: RunnableTask :return: exit code, run_time :rtype: (int, int) """ started_at = time.time() rcode = -1 err_msg = "" # host = socket.getfqdn() host = platform.node() ncmds = len(runnable_task.task.cmds) # so core dumps are written to the job dir os.chdir(output_dir) env_json = os.path.join(output_dir, '.env.json') IO.write_env_to_json(env_json) with open(task_stdout, 'w') as stdout_fh: with open(task_stderr, 'w') as stderr_fh: stdout_fh.write(repr(runnable_task) + "\n") stdout_fh.write("Created at {x} on {h}\n".format(x=datetime.datetime.now(), h=host)) stderr_fh.write("Running task in {o}\n".format(o=output_dir)) # Validate Inputs for input_file in runnable_task.task.input_files: if os.path.exists(input_file): stdout_fh.write("Validated INPUT file '{i}\n".format(i=input_file)) else: err_msg = "Unable to find INPUT file '{i}".format(i=input_file) stderr_fh.write(err_msg + "\n") sys.stderr.write(err_msg + "\n") break # Create resources if necessary #if runnable_task.task.resources: # create_tmp_resources_ignore_error(runnable_task.task.resources) # Run commands for i, cmd in enumerate(runnable_task.task.cmds): log.debug("Running command \n" + cmd) rcode, out, error, run_time = run_command(cmd, stdout_fh, stderr_fh, time_out=None) if rcode != 0: err_msg_ = "Failed task {i} exit code {r} in {s:.2f} sec".format(i=runnable_task.task.task_id, r=rcode, s=run_time) t_error_msg = _extract_last_nlines(task_stderr) err_msg = "\n".join([err_msg_, t_error_msg]) log.error(err_msg) log.error(error) stderr_fh.write(str(error) + "\n") sys.stderr.write(str(error) + "\n") stderr_fh.write(err_msg + "\n") sys.stderr.write(err_msg + "\n") break else: stdout_fh.write("completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n".format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds)) smsg_ = "completed running commands. Exit code {i}".format(i=rcode) log.debug(smsg_) if rcode == 0: # Validate output files of a successful task. for ix, output_file in enumerate(runnable_task.task.output_files): if os.path.exists(output_file): stdout_fh.write("Successfully validated {i} output file '{o}' on {h} \n".format(o=output_file, i=ix, h=host)) else: err_msg = "Unable to find {i} output file '{x}'. Marking task as failed.".format(x=output_file, i=ix) stderr_fh.write(err_msg + "\n") stdout_fh.write(err_msg + "\n") sys.stderr.write(err_msg + "\n") rcode = -1 total_run_time = time.time() - started_at # FIXME. There should be a better way to communicate warnings warn_msg = "" r = to_task_report(host, runnable_task.task.task_id, total_run_time, rcode, err_msg, warn_msg) task_report_path = os.path.join(output_dir, 'task-report.json') msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id) log.info(msg) stdout_fh.write(msg + "\n") r.write_json(task_report_path) stderr_fh.flush() stdout_fh.flush() # Cleanup resource files if not debug_mode and runnable_task.task.resources: try: cleanup_resources(runnable_task) log.debug("successfully cleaned up {n} resources.".format(n=len(runnable_task.task.resources))) except Exception as e: log.error(str(e)) log.error("failed to successfully cleanup resources. {f}".format(f=runnable_task.task.resources)) run_time = time.time() - started_at return rcode, run_time
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode): """ Run a runnable task locally. :param runnable_task: :type runnable_task: RunnableTask :return: exit code, run_time :rtype: (int, int) """ started_at = time.time() rcode = -1 err_msg = "" # host = socket.getfqdn() host = platform.node() ncmds = len(runnable_task.task.cmds) # so core dumps are written to the job dir os.chdir(output_dir) env_json = os.path.join(output_dir, 'env.json') IO.write_env_to_json(env_json) with open(task_stdout, 'w') as stdout_fh: with open(task_stderr, 'w') as stderr_fh: stdout_fh.write(repr(runnable_task) + "\n") stdout_fh.write("Created at {x} on {h}\n".format( x=datetime.datetime.now(), h=host)) # Validate Inputs for input_file in runnable_task.task.input_files: if os.path.exists(input_file): stdout_fh.write( "Validated INPUT file '{i}\n".format(i=input_file)) else: err_msg = "Unable to find INPUT file '{i}".format( i=input_file) stderr_fh.write(err_msg + "\n") sys.stderr.write(err_msg + "\n") break # Create resources if necessary #if runnable_task.task.resources: # create_tmp_resources_ignore_error(runnable_task.task.resources) # Run commands for i, cmd in enumerate(runnable_task.task.cmds): log.debug("Running command \n" + cmd) rcode, out, error, run_time = run_command(cmd, stdout_fh, stderr_fh, time_out=None) if rcode != 0: err_msg = "Failed task {i} exit code {r} in {s:.2f} sec".format( i=runnable_task.task.task_id, r=rcode, s=run_time) log.error(err_msg) log.error(error) stderr_fh.write(str(error) + "\n") sys.stderr.write(str(error) + "\n") stderr_fh.write(err_msg + "\n") sys.stderr.write(err_msg + "\n") break else: stdout_fh.write( "completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n" .format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds)) smsg_ = "completed running commands. Exit code {i}".format(i=rcode) log.debug(smsg_) if rcode == 0: # Validate output files of a successful task. for output_file in runnable_task.task.output_files: if os.path.exists(output_file): stdout_fh.write( "Successfully validated file '{o}'\n".format( o=output_file)) else: err_msg = "Unable to find file '{x}'".format( x=output_file) stderr_fh.write(err_msg + "\n") stdout_fh.write(err_msg + "\n") sys.stderr.write(err_msg + "\n") rcode = -1 total_run_time = time.time() - started_at warn_msg = "" r = to_task_report(host, runnable_task.task.task_id, total_run_time, rcode, err_msg, warn_msg) task_report_path = os.path.join(output_dir, 'task-report.json') msg = "Writing task id {i} task report to {r}".format( r=task_report_path, i=runnable_task.task.task_id) log.info(msg) stdout_fh.write(msg + "\n") r.write_json(task_report_path) stderr_fh.flush() stdout_fh.flush() # Cleanup resource files if not debug_mode and runnable_task.task.resources: try: cleanup_resources(runnable_task) log.debug("successfully cleaned up {n} resources.".format( n=len(runnable_task.task.resources))) except Exception as e: log.error(str(e)) log.error("failed to successfully cleanup resources. {f}".format( f=runnable_task.task.resources)) run_time = time.time() - started_at return rcode, run_time
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode): """ Run a runnable task locally. :param debug_mode: Enabling debug mode will not cleanup temp resources upon failure :type debug_mode: bool :param runnable_task: Runnable task instance :type runnable_task: RunnableTask :param output_dir: Path to output dir :type output_dir: str :param task_stderr: Absolute path to task stderr file :type task_stderr: str :param task_stdout: Absolute path to task stdout file :type task_stdout: str :return: (exit code, error message, run_time) :rtype: (int, str, int) """ started_at = time.time() def get_run_time(): return time.time() - started_at # Default general catch all rcode = 1 err_msg = "" # host = socket.getfqdn() host = platform.node() ncmds = len(runnable_task.task.cmds) # so core dumps are written to the job dir os.chdir(output_dir) env_json = os.path.join(output_dir, '.env.json') IO.write_env_to_json(env_json) with open(task_stdout, 'w') as stdout_fh: with open(task_stderr, 'w') as stderr_fh: stdout_fh.write(repr(runnable_task) + "\n") stdout_fh.write("Created at {x} on {h}\n".format(x=datetime.datetime.now(), h=host)) stdout_fh.write("Running task in {o}\n".format(o=output_dir)) # Validate Inputs for input_file in runnable_task.task.input_files: if os.path.exists(input_file): stdout_fh.write("Validated INPUT file '{i}\n".format(i=input_file)) else: err_msg = "Unable to find INPUT file '{i}".format(i=input_file) stderr_fh.write(err_msg + "\n") log.error(err_msg) break # Create resources if necessary #if runnable_task.task.resources: # create_tmp_resources_ignore_error(runnable_task.task.resources) stdout_fh.write("Starting to run {n} cmds.".format(n=len(runnable_task.task.cmds))) stdout_fh.flush() stderr_fh.flush() for i, cmd in enumerate(runnable_task.task.cmds): log.info("Running command \n" + cmd) # see run_command API for future fixes rcode, _, _, run_time = run_command(cmd, stdout_fh, stderr_fh, time_out=None) if rcode != 0: err_msg_ = "Failed task {i} exit code {r} in {s:.2f} sec (See file '{f}'.)".format(i=runnable_task.task.task_id, r=rcode, s=run_time, f=task_stderr) stderr_fh.write(err_msg + "\n") stderr_fh.flush() t_error_msg = _extract_last_nlines(task_stderr) err_msg = "\n".join([err_msg_, "Extracted from stderr", t_error_msg]) log.error(err_msg) stdout_fh.write("breaking out. Unable to run remaining task commands.") break else: stdout_fh.write("completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n".format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds)) smsg_ = "completed running commands. Exit code {i}".format(i=rcode) log.debug(smsg_) if rcode == 0: log.info("Core RTC runner was successful. Validating output files.") # Validate output files of a successful task. for ix, output_file in enumerate(runnable_task.task.output_files): if os.path.exists(output_file): stdout_fh.write("Successfully validated {i} output file '{o}' on {h} \n".format(o=output_file, i=ix, h=host)) else: rcode = 127 err_msg = "Unable to find {i} output file '{x}'. Marking task as failed. Setting exit code to {r}".format(x=output_file, i=ix, r=rcode) stderr_fh.write(err_msg + "\n") stdout_fh.write(err_msg + "\n") log.error(err_msg) # FIXME. There should be a better way to communicate warnings warn_msg = "" # Write the task summary to a pbcommand Report object r = to_task_report(host, runnable_task.task.task_id, get_run_time(), rcode, err_msg, warn_msg) task_report_path = os.path.join(output_dir, 'task-report.json') msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id) log.info(msg) stdout_fh.write(msg + "\n") r.write_json(task_report_path) stderr_fh.flush() stdout_fh.flush() # Cleanup resource files if not debug_mode and runnable_task.task.resources: try: cleanup_resources(runnable_task) log.debug("successfully cleaned up {n} resources.".format(n=len(runnable_task.task.resources))) except Exception as e: log.error(str(e)) log.error("failed to successfully cleanup resources. {f}".format(f=runnable_task.task.resources)) return rcode, err_msg, get_run_time()