def do_submit(command, submit_path, stdoutdir, mp_params, job_name, dry_run=False): submit_command = get_submit_command_chooser(command, submit_path, stdoutdir, mp_params, job_name=job_name) if mp_params.method in ['lsf', 'sge', 'pbs']: parts = submit_command.split(" ") script = open(parts.pop(-1), "rb") run_command = script.read().split("\n")[-2] command = " ".join(parts + [run_command]) else: command = submit_command print(command) submit_command = str(submit_command) # unicode workaround if dry_run: print("Dry run: job not submitted. Trial directory created here:", os.path.dirname(submit_path)) print("Execute this command to submit the job:") print(submit_command) elif mp_params.method == 'local': submission_id = os.fork() if submission_id > 0: return submission_id else: stdout = os.open(os.path.join(stdoutdir, 'log.out'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stdout, 1) stderr = os.open(os.path.join(stdoutdir, 'log.err'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stderr, 2) os.execv(command.split()[0], command.split()) else: try: result = easy_run.fully_buffered(command=submit_command) result.raise_if_errors() except Exception as e: if not "Warning: job being submitted without an AFS token." in str(e): raise e return get_submission_id(result, mp_params.method)
def run(self): '''Execute the script.''' if self.params.striping.run: print "processing runs " + ", ".join( ["r%04d" % r for r in self.params.striping.run]) if self.params.striping.rungroup: print "processing rungroups " + ", ".join( ["rg%03d" % rg for rg in self.params.striping.rungroup]) batch_chunks = allocate_chunks( self.params.striping.results_dir, self.params.striping.trial, rgs_selected=[ "rg%03d" % rg for rg in self.params.striping.rungroup ], respect_rungroup_barriers=self.params.striping. respect_rungroup_barriers, runs_selected=["r%04d" % r for r in self.params.striping.run], stripe=self.params.striping.stripe, max_size=self.params.striping.chunk_size, integrated=self.params.combine_experiments.keep_integrated) self.dirname = "combine_experiments_t%03d" % self.params.striping.trial self.intermediates = os.path.join(self.dirname, "intermediates") self.extracted = os.path.join(self.dirname, "final_extracted") for d in self.dirname, self.intermediates, self.extracted: if not os.path.isdir(d): os.mkdir(d) self.cwd = os.getcwd() tag = "stripe" if self.params.striping.stripe else "chunk" for batch, ch_list in batch_chunks.iteritems(): for idx in xrange(len(ch_list)): chunk = ch_list[idx] # reset for this chunk/stripe self.filename = "t%03d_%s_%s%03d" % ( self.params.striping.trial, batch, tag, idx) self.command_sequence = [] # set up the file containing input expts and refls (logging) chunk_path = os.path.join(self.cwd, self.intermediates, self.filename) if os.path.isfile(chunk_path): os.remove(chunk_path) with open(chunk_path, "wb") as outfile: for i in (0, 1): # expts then refls outfile.write("\n".join(chunk[i]) + "\n") # set up the params for dials.combine_experiments custom_parts = [" input {"] for expt_path in chunk[0]: custom_parts.append(" experiments = %s" % expt_path) for refl_path in chunk[1]: custom_parts.append(" reflections = %s" % refl_path) custom_parts.append(" }") self.set_up_section("combine_experiments", "dials.combine_experiments", clustering=False, custom_parts=custom_parts) # refinement of the grouped experiments self.set_up_section("refinement", "dials.refine", clustering=self.clustering) # refinement of the grouped experiments self.set_up_section("recompute_mosaicity", "cctbx.xfel.recompute_mosaicity", clustering=self.clustering) # reintegration if self.params.reintegration.enable: custom_parts = [ " integration.mp.nproc = %d" % self.params.mp.nproc ] self.set_up_section("reintegration", "dials.integrate", custom_parts=custom_parts, clustering=self.clustering) # extract results to integration pickles for merging if self.params.postprocessing.enable: lambda_diff_str = lambda diff_str: (diff_str % \ (os.path.join("..", "final_extracted"))).replace("ITER", "%04d") self.set_up_section("postprocessing", "cctbx.xfel.frame_extractor", lambda_diff_str=lambda_diff_str, clustering=self.clustering) # submit queued job from appropriate directory os.chdir(self.intermediates) command = " && ".join(self.command_sequence) if self.params.combine_experiments.clustering.dendrogram: easy_run.fully_buffered( command).raise_if_errors().show_stdout() else: submit_path = os.path.join(self.cwd, self.intermediates, "combine_%s.sh" % self.filename) submit_command = get_submit_command_chooser( command, submit_path, self.intermediates, self.params.mp, log_name=(submit_path.split(".sh")[0] + ".out")) print "executing command: %s" % submit_command try: easy_run.fully_buffered( submit_command).raise_if_errors().show_stdout() except Exception as e: if not "Warning: job being submitted without an AFS token." in str( e): raise e os.chdir(self.cwd)
class Script(object): """ Script to submit XFEL data at LCLS for processing""" def __init__(self): pass def run(self, argv=None): """ Set up run folder and submit the job. """ if argv is None: argv = sys.argv[1:] if len(argv) == 0 or "-h" in argv or "--help" in argv or "-c" in argv: print help_str print "Showing phil parameters:" print phil_scope.as_str(attributes_level=2) return user_phil = [] dispatcher_args = [] for arg in argv: if (os.path.isfile(arg)): user_phil.append(parse(file_name=arg)) else: try: user_phil.append(parse(arg)) except RuntimeError, e: dispatcher_args.append(arg) scope, unused = phil_scope.fetch(sources=user_phil, track_unused_definitions=True) params = scope.extract() dispatcher_args = [ "%s=%s" % (u.path, u.object.words[0].value) for u in unused ] assert params.input.experiment is not None assert params.input.run_num is not None print "Submitting run %d of experiment %s" % (params.input.run_num, params.input.experiment) if not os.path.exists(params.output.output_dir): os.makedirs(params.output.output_dir) rundir = os.path.join(params.output.output_dir, "r%04d" % params.input.run_num) if not os.path.exists(rundir): os.mkdir(rundir) # If a trial number wasn't included, find the next available, up to 999 trials if params.input.trial is None: found_one = False for i in xrange(1000): trialdir = os.path.join(rundir, "%03d" % i) if params.input.rungroup is not None: trialdir += "_rg%03d" % params.input.rungroup if not os.path.exists(trialdir): found_one = True break if found_one: params.input.trial = i else: raise Sorry("All trial numbers in use") else: trialdir = os.path.join(rundir, "%03d" % params.input.trial) if params.input.rungroup is not None: trialdir += "_rg%03d" % params.input.rungroup if os.path.exists(trialdir): raise Sorry("Trial %d already in use" % params.input.trial) print "Using trial", params.input.trial os.mkdir(trialdir) # log file will live here stdoutdir = os.path.join(trialdir, "stdout") os.mkdir(stdoutdir) if params.output.split_logs: # test parameter for split_log then open and close log file and loop over nprocs for i in xrange(params.mp.nproc): error_files = os.path.join(stdoutdir, "error_rank%04d.out" % i) log_files = os.path.join(stdoutdir, "log_rank%04d.out" % i) open(log_files, 'a').close() open(error_files, 'a').close() logging_str = "output.logging_dir=%s" % stdoutdir else: logging_str = "" # Copy any config or phil files specified target_num = 1 has_config = False redone_args = [] for arg in dispatcher_args: if not len(arg.split('=')) == 2: continue name, value = arg.split('=') if "cfg" in name and os.path.splitext(value)[1].lower() == ".cfg": cfg = value if not os.path.exists(cfg): raise Sorry("Config file doesn't exist: %s" % cfg) if has_config: raise Sorry("Multiple config files found") has_config = True target_num = copy_config(cfg, trialdir, "psana", params, target_num) redone_args.append("%s=%s" % (name, os.path.join(trialdir, "psana.cfg"))) elif "target" in name or os.path.splitext( value)[1].lower() == ".phil": phil = value if not os.path.exists(phil): raise Sorry("Phil file doesn't exist: %s" % phil) copy_target(phil, trialdir, "params_%d" % target_num) redone_args.append( "%s=%s" % (name, os.path.join(trialdir, "params_%d.phil" % target_num))) target_num += 1 else: redone_args.append(arg) dispatcher_args = redone_args # If additional phil params are provided, copy them over too if params.input.target is not None: if not os.path.exists(params.input.target): raise Sorry("Target file doesn't exist: %s" % params.input.target) copy_target(params.input.target, trialdir, "params_%d" % target_num) params.input.target = os.path.join(trialdir, "params_%d.phil" % target_num) target_num += 1 # Some configs files will specify out_dirname. If not, we want to explicitly create one # so the dispatcher will have an output directory. output_dir = os.path.join(trialdir, "out") if not os.path.exists(output_dir): os.makedirs(output_dir) # Write out a script for submitting this job and submit it submit_path = os.path.join(trialdir, "submit.sh") extra_str = "" for arg in dispatcher_args: extra_str += " %s" % arg if params.input.target is not None: extra_str += " %s" % params.input.target if params.input.rungroup is not None: extra_str += " input.rungroup=%d" % params.input.rungroup command = "%s input.experiment=%s input.run_num=%d input.trial=%d output.output_dir=%s %s %s" % ( params.input.dispatcher, params.input.experiment, params.input.run_num, params.input.trial, output_dir, logging_str, extra_str) submit_command = get_submit_command_chooser(command, submit_path, stdoutdir, params.mp) if params.mp.method in "lsf sge pbs".split(" "): parts = submit_command.split(" ") script = open(parts.pop(-1), "rb") run_command = script.read().split("\n")[-2] command = " ".join(parts + [run_command]) else: command = submit_command print command if params.dry_run: print "Dry run: job not submitted. Trial directory created here:", trialdir print "Execute this command to submit the job:" print submit_command else: try: result = easy_run.fully_buffered(command=submit_command) result.raise_if_errors() except Exception, e: if not "Warning: job being submitted without an AFS token." in str( e): raise e print "Job submitted. Output in", trialdir if params.mp.method == "mpi" or params.mp.method == "lsf": submission_id = None for line in result.stdout_lines: # example for lsf: 'Job <XXXXXX> is submitted to queue <YYYYYYY>.' if len(line.split()) < 2: continue s = line.split()[1].lstrip('<').rstrip('>') try: s = int(s) except ValueError: pass else: submission_id = str(s) return submission_id
def run(self, argv = None): """ Set up run folder and submit the job. """ if argv is None: argv = sys.argv[1:] if len(argv) == 0 or "-h" in argv or "--help" in argv or "-c" in argv: print(help_str) print("Showing phil parameters:") print(phil_scope.as_str(attributes_level = 2)) return user_phil = [] dispatcher_args = [] for arg in argv: if (os.path.isfile(arg)): try: user_phil.append(parse(file_name=arg)) except Exception as e: if os.path.splitext(arg)[1] == ".phil": raise e dispatcher_args.append(arg) else: try: user_phil.append(parse(arg)) except RuntimeError as e: dispatcher_args.append(arg) scope, unused = phil_scope.fetch(sources=user_phil, track_unused_definitions=True) params = scope.extract() dispatcher_args.extend(["%s=%s"%(u.path,u.object.words[0].value) for u in unused]) assert params.input.run_num is not None if params.input.dispatcher in ["cxi.xtc_process", "cctbx.xfel.xtc_process"]: # processing XTC streams at LCLS -- dispatcher will locate raw data assert params.input.experiment is not None or params.input.locator is not None print("Submitting run %d of experiment %s"%(int(params.input.run_num), params.input.experiment)) rundir = os.path.join(params.output.output_dir, "r%04d"%int(params.input.run_num)) else: print("Submitting run %s"%(params.input.run_num)) try: rundir = os.path.join(params.output.output_dir, "r%04d"%int(params.input.run_num)) except ValueError: rundir = os.path.join(params.output.output_dir, params.input.run_num) if not os.path.exists(params.output.output_dir): os.makedirs(params.output.output_dir) if not os.path.exists(rundir): os.mkdir(rundir) # If a trial number wasn't included, find the next available, up to 999 trials if params.input.trial is None: found_one = False for i in range(1000): trialdir = os.path.join(rundir, "%03d"%i) if params.input.rungroup is not None: trialdir += "_rg%03d"%params.input.rungroup if not os.path.exists(trialdir): found_one = True break if found_one: params.input.trial = i else: raise Sorry("All trial numbers in use") else: trialdir = os.path.join(rundir, "%03d"%params.input.trial) if params.input.rungroup is not None: trialdir += "_rg%03d"%params.input.rungroup if os.path.exists(trialdir): raise Sorry("Trial %d already in use"%params.input.trial) print("Using trial", params.input.trial) os.mkdir(trialdir) # log file will live here stdoutdir = os.path.join(trialdir, "stdout") os.mkdir(stdoutdir) logging_str = "" if params.output.split_logs:# test parameter for split_log then open and close log file and loop over nprocs for i in range(params.mp.nproc): error_files = os.path.join(stdoutdir,"error_rank%04d.out"%i) log_files = os.path.join(stdoutdir,"log_rank%04d.out"%i) open(log_files,'a').close() open(error_files,'a').close() logging_str = "output.logging_dir=%s"%stdoutdir else: logging_str = "" # Copy any config or phil files specified target_num = 1 has_config = False redone_args = [] for arg in dispatcher_args: if not len(arg.split('=')) == 2: redone_args.append(arg) continue name, value = arg.split('=') if "cfg" in name and os.path.splitext(value)[1].lower() == ".cfg": cfg = value if not os.path.exists(cfg): raise Sorry("Config file doesn't exist: %s"%cfg) if has_config: raise Sorry("Multiple config files found") has_config = True target_num = copy_config(cfg, trialdir, "psana", params, target_num) redone_args.append("%s=%s"%(name, os.path.join(trialdir, "psana.cfg"))) elif "target" in name or os.path.splitext(value)[1].lower() == ".phil": phil = value if not os.path.exists(phil): raise Sorry("Phil file doesn't exist: %s"%phil) copy_target(phil, trialdir, "params_%d"%target_num) redone_args.append("%s=%s"%(name, os.path.join(trialdir, "params_%d.phil"%target_num))) target_num += 1 else: redone_args.append(arg) dispatcher_args = redone_args # If additional phil params are provided, copy them over too if params.input.target is not None: if not os.path.exists(params.input.target): raise Sorry("Target file doesn't exist: %s"%params.input.target) copy_target(params.input.target, trialdir, "params_%d"%target_num) params.input.target = os.path.join(trialdir, "params_%d.phil"%target_num) target_num += 1 # Some configs files will specify out_dirname. If not, we want to explicitly create one # so the dispatcher will have an output directory. output_dir = os.path.join(trialdir, "out") if not os.path.exists(output_dir): os.makedirs(output_dir) # Write out a script for submitting this job and submit it submit_path = os.path.join(trialdir, "submit.sh") extra_str = "" data_str = "" assert [params.input.locator, params.input.experiment].count(None) != 0 if params.input.locator is not None: locator_file = os.path.join(trialdir, "data.loc") shutil.copyfile(params.input.locator, locator_file) data_str += locator_file if params.input.experiment is None: from xfel.ui import known_dials_dispatchers if params.input.dispatcher in known_dials_dispatchers: import importlib dispatcher_params = importlib.import_module(known_dials_dispatchers[params.input.dispatcher]).phil_scope.extract() if hasattr(dispatcher_params, 'input') and hasattr(dispatcher_params.input, 'trial'): assert hasattr(dispatcher_params.input, 'run_num') data_str += " input.trial=%s input.run_num=%s" % ( # pass along for logging params.input.trial, params.input.run_num) else: data_str += " input.trial=%s input.experiment=%s input.run_num=%s" % ( params.input.trial, params.input.experiment, params.input.run_num) for arg in dispatcher_args: extra_str += " %s" % arg if params.input.target is not None: extra_str += " %s" % params.input.target if params.input.rungroup is not None: data_str += " input.rungroup=%d" % params.input.rungroup command = "%s %s output.output_dir=%s %s %s" % ( params.input.dispatcher, data_str, output_dir, logging_str, extra_str ) job_name = "r%s"%params.input.run_num submit_command = get_submit_command_chooser(command, submit_path, stdoutdir, params.mp, job_name=job_name) if params.mp.method in ['lsf', 'sge', 'pbs']: parts = submit_command.split(" ") script = open(parts.pop(-1), "rb") run_command = script.read().split("\n")[-2] command = " ".join(parts + [run_command]) else: command = submit_command print(command) if params.dry_run: print("Dry run: job not submitted. Trial directory created here:", trialdir) print("Execute this command to submit the job:") print(submit_command) elif params.mp.method == 'local': submission_id = os.fork() if submission_id > 0: return submission_id else: stdout = os.open(os.path.join(stdoutdir, 'log.out'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stdout, 1) stderr = os.open(os.path.join(stdoutdir, 'log.err'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stderr, 2) os.execv(command.split()[0], command.split()) else: try: result = easy_run.fully_buffered(command=submit_command) result.raise_if_errors() except Exception as e: if not "Warning: job being submitted without an AFS token." in str(e): raise e print("Job submitted. Output in", trialdir) if params.mp.method == "mpi" or params.mp.method == "lsf": submission_id = None for line in result.stdout_lines: # example for lsf: 'Job <XXXXXX> is submitted to queue <YYYYYYY>.' if len(line.split()) < 2: continue s = line.split()[1].lstrip('<').rstrip('>') try: s = int(s) except ValueError: pass else: submission_id = str(s) print(submission_id) return submission_id elif params.mp.method == 'pbs': submission_id = "".join(result.stdout_lines).strip() print(submission_id) return submission_id return None