Пример #1
0
def do_submit(command, submit_path, stdoutdir, mp_params, job_name, dry_run=False):
  submit_command = get_submit_command_chooser(command, submit_path, stdoutdir, mp_params, job_name=job_name)
  if mp_params.method in ['lsf', 'sge', 'pbs']:
    parts = submit_command.split(" ")
    script = open(parts.pop(-1), "rb")
    run_command = script.read().split("\n")[-2]
    command = " ".join(parts + [run_command])
  else:
    command = submit_command
  print(command)
  submit_command = str(submit_command) # unicode workaround

  if dry_run:
    print("Dry run: job not submitted. Trial directory created here:", os.path.dirname(submit_path))
    print("Execute this command to submit the job:")
    print(submit_command)
  elif mp_params.method == 'local':
    submission_id = os.fork()
    if submission_id > 0:
      return submission_id
    else:
      stdout = os.open(os.path.join(stdoutdir, 'log.out'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stdout, 1)
      stderr = os.open(os.path.join(stdoutdir, 'log.err'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stderr, 2)
      os.execv(command.split()[0], command.split())
  else:
    try:
      result = easy_run.fully_buffered(command=submit_command)
      result.raise_if_errors()
    except Exception as e:
      if not "Warning: job being submitted without an AFS token." in str(e):
        raise e

    return get_submission_id(result, mp_params.method)
Пример #2
0
    def run(self):
        '''Execute the script.'''
        if self.params.striping.run:
            print "processing runs " + ", ".join(
                ["r%04d" % r for r in self.params.striping.run])
        if self.params.striping.rungroup:
            print "processing rungroups " + ", ".join(
                ["rg%03d" % rg for rg in self.params.striping.rungroup])
        batch_chunks = allocate_chunks(
            self.params.striping.results_dir,
            self.params.striping.trial,
            rgs_selected=[
                "rg%03d" % rg for rg in self.params.striping.rungroup
            ],
            respect_rungroup_barriers=self.params.striping.
            respect_rungroup_barriers,
            runs_selected=["r%04d" % r for r in self.params.striping.run],
            stripe=self.params.striping.stripe,
            max_size=self.params.striping.chunk_size,
            integrated=self.params.combine_experiments.keep_integrated)
        self.dirname = "combine_experiments_t%03d" % self.params.striping.trial
        self.intermediates = os.path.join(self.dirname, "intermediates")
        self.extracted = os.path.join(self.dirname, "final_extracted")
        for d in self.dirname, self.intermediates, self.extracted:
            if not os.path.isdir(d):
                os.mkdir(d)
        self.cwd = os.getcwd()
        tag = "stripe" if self.params.striping.stripe else "chunk"
        for batch, ch_list in batch_chunks.iteritems():
            for idx in xrange(len(ch_list)):
                chunk = ch_list[idx]

                # reset for this chunk/stripe
                self.filename = "t%03d_%s_%s%03d" % (
                    self.params.striping.trial, batch, tag, idx)
                self.command_sequence = []

                # set up the file containing input expts and refls (logging)
                chunk_path = os.path.join(self.cwd, self.intermediates,
                                          self.filename)
                if os.path.isfile(chunk_path):
                    os.remove(chunk_path)
                with open(chunk_path, "wb") as outfile:
                    for i in (0, 1):  # expts then refls
                        outfile.write("\n".join(chunk[i]) + "\n")

                # set up the params for dials.combine_experiments
                custom_parts = ["  input {"]
                for expt_path in chunk[0]:
                    custom_parts.append("    experiments = %s" % expt_path)
                for refl_path in chunk[1]:
                    custom_parts.append("    reflections = %s" % refl_path)
                custom_parts.append("  }")
                self.set_up_section("combine_experiments",
                                    "dials.combine_experiments",
                                    clustering=False,
                                    custom_parts=custom_parts)

                # refinement of the grouped experiments
                self.set_up_section("refinement",
                                    "dials.refine",
                                    clustering=self.clustering)

                # refinement of the grouped experiments
                self.set_up_section("recompute_mosaicity",
                                    "cctbx.xfel.recompute_mosaicity",
                                    clustering=self.clustering)

                # reintegration
                if self.params.reintegration.enable:
                    custom_parts = [
                        "  integration.mp.nproc = %d" % self.params.mp.nproc
                    ]
                    self.set_up_section("reintegration",
                                        "dials.integrate",
                                        custom_parts=custom_parts,
                                        clustering=self.clustering)

                # extract results to integration pickles for merging
                if self.params.postprocessing.enable:
                    lambda_diff_str = lambda diff_str: (diff_str % \
                      (os.path.join("..", "final_extracted"))).replace("ITER", "%04d")
                    self.set_up_section("postprocessing",
                                        "cctbx.xfel.frame_extractor",
                                        lambda_diff_str=lambda_diff_str,
                                        clustering=self.clustering)

                # submit queued job from appropriate directory
                os.chdir(self.intermediates)
                command = " && ".join(self.command_sequence)
                if self.params.combine_experiments.clustering.dendrogram:
                    easy_run.fully_buffered(
                        command).raise_if_errors().show_stdout()
                else:
                    submit_path = os.path.join(self.cwd, self.intermediates,
                                               "combine_%s.sh" % self.filename)
                    submit_command = get_submit_command_chooser(
                        command,
                        submit_path,
                        self.intermediates,
                        self.params.mp,
                        log_name=(submit_path.split(".sh")[0] + ".out"))
                    print "executing command: %s" % submit_command
                    try:
                        easy_run.fully_buffered(
                            submit_command).raise_if_errors().show_stdout()
                    except Exception as e:
                        if not "Warning: job being submitted without an AFS token." in str(
                                e):
                            raise e
                os.chdir(self.cwd)
Пример #3
0
class Script(object):
    """ Script to submit XFEL data at LCLS for processing"""
    def __init__(self):
        pass

    def run(self, argv=None):
        """ Set up run folder and submit the job. """
        if argv is None:
            argv = sys.argv[1:]

        if len(argv) == 0 or "-h" in argv or "--help" in argv or "-c" in argv:
            print help_str
            print "Showing phil parameters:"
            print phil_scope.as_str(attributes_level=2)
            return

        user_phil = []
        dispatcher_args = []
        for arg in argv:
            if (os.path.isfile(arg)):
                user_phil.append(parse(file_name=arg))
            else:
                try:
                    user_phil.append(parse(arg))
                except RuntimeError, e:
                    dispatcher_args.append(arg)
        scope, unused = phil_scope.fetch(sources=user_phil,
                                         track_unused_definitions=True)
        params = scope.extract()
        dispatcher_args = [
            "%s=%s" % (u.path, u.object.words[0].value) for u in unused
        ]

        assert params.input.experiment is not None
        assert params.input.run_num is not None

        print "Submitting run %d of experiment %s" % (params.input.run_num,
                                                      params.input.experiment)

        if not os.path.exists(params.output.output_dir):
            os.makedirs(params.output.output_dir)

        rundir = os.path.join(params.output.output_dir,
                              "r%04d" % params.input.run_num)
        if not os.path.exists(rundir):
            os.mkdir(rundir)

        # If a trial number wasn't included, find the next available, up to 999 trials
        if params.input.trial is None:
            found_one = False
            for i in xrange(1000):
                trialdir = os.path.join(rundir, "%03d" % i)
                if params.input.rungroup is not None:
                    trialdir += "_rg%03d" % params.input.rungroup
                if not os.path.exists(trialdir):
                    found_one = True
                    break
            if found_one:
                params.input.trial = i
            else:
                raise Sorry("All trial numbers in use")
        else:
            trialdir = os.path.join(rundir, "%03d" % params.input.trial)
            if params.input.rungroup is not None:
                trialdir += "_rg%03d" % params.input.rungroup
            if os.path.exists(trialdir):
                raise Sorry("Trial %d already in use" % params.input.trial)

        print "Using trial", params.input.trial
        os.mkdir(trialdir)

        # log file will live here
        stdoutdir = os.path.join(trialdir, "stdout")
        os.mkdir(stdoutdir)
        if params.output.split_logs:  # test parameter for split_log then open and close log file and loop over nprocs
            for i in xrange(params.mp.nproc):
                error_files = os.path.join(stdoutdir, "error_rank%04d.out" % i)
                log_files = os.path.join(stdoutdir, "log_rank%04d.out" % i)
                open(log_files, 'a').close()
                open(error_files, 'a').close()
            logging_str = "output.logging_dir=%s" % stdoutdir
        else:
            logging_str = ""

        # Copy any config or phil files specified
        target_num = 1
        has_config = False
        redone_args = []
        for arg in dispatcher_args:
            if not len(arg.split('=')) == 2:
                continue
            name, value = arg.split('=')

            if "cfg" in name and os.path.splitext(value)[1].lower() == ".cfg":
                cfg = value
                if not os.path.exists(cfg):
                    raise Sorry("Config file doesn't exist: %s" % cfg)
                if has_config:
                    raise Sorry("Multiple config files found")
                has_config = True
                target_num = copy_config(cfg, trialdir, "psana", params,
                                         target_num)
                redone_args.append("%s=%s" %
                                   (name, os.path.join(trialdir, "psana.cfg")))
            elif "target" in name or os.path.splitext(
                    value)[1].lower() == ".phil":
                phil = value
                if not os.path.exists(phil):
                    raise Sorry("Phil file doesn't exist: %s" % phil)
                copy_target(phil, trialdir, "params_%d" % target_num)
                redone_args.append(
                    "%s=%s" %
                    (name, os.path.join(trialdir,
                                        "params_%d.phil" % target_num)))
                target_num += 1
            else:
                redone_args.append(arg)
        dispatcher_args = redone_args

        # If additional phil params are provided, copy them over too
        if params.input.target is not None:
            if not os.path.exists(params.input.target):
                raise Sorry("Target file doesn't exist: %s" %
                            params.input.target)
            copy_target(params.input.target, trialdir,
                        "params_%d" % target_num)
            params.input.target = os.path.join(trialdir,
                                               "params_%d.phil" % target_num)
            target_num += 1

        # Some configs files will specify out_dirname. If not, we want to explicitly create one
        # so the dispatcher will have an output directory.
        output_dir = os.path.join(trialdir, "out")
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Write out a script for submitting this job and submit it
        submit_path = os.path.join(trialdir, "submit.sh")

        extra_str = ""
        for arg in dispatcher_args:
            extra_str += " %s" % arg

        if params.input.target is not None:
            extra_str += " %s" % params.input.target

        if params.input.rungroup is not None:
            extra_str += " input.rungroup=%d" % params.input.rungroup

        command = "%s input.experiment=%s input.run_num=%d input.trial=%d output.output_dir=%s %s %s" % (
            params.input.dispatcher, params.input.experiment,
            params.input.run_num, params.input.trial, output_dir, logging_str,
            extra_str)

        submit_command = get_submit_command_chooser(command, submit_path,
                                                    stdoutdir, params.mp)
        if params.mp.method in "lsf sge pbs".split(" "):
            parts = submit_command.split(" ")
            script = open(parts.pop(-1), "rb")
            run_command = script.read().split("\n")[-2]
            command = " ".join(parts + [run_command])
        else:
            command = submit_command
        print command

        if params.dry_run:
            print "Dry run: job not submitted. Trial directory created here:", trialdir
            print "Execute this command to submit the job:"
            print submit_command
        else:
            try:
                result = easy_run.fully_buffered(command=submit_command)
                result.raise_if_errors()
            except Exception, e:
                if not "Warning: job being submitted without an AFS token." in str(
                        e):
                    raise e

            print "Job submitted.  Output in", trialdir

            if params.mp.method == "mpi" or params.mp.method == "lsf":
                submission_id = None
                for line in result.stdout_lines:
                    # example for lsf: 'Job <XXXXXX> is submitted to queue <YYYYYYY>.'
                    if len(line.split()) < 2: continue
                    s = line.split()[1].lstrip('<').rstrip('>')
                    try:
                        s = int(s)
                    except ValueError:
                        pass
                    else:
                        submission_id = str(s)
                return submission_id
  def run(self, argv = None):
    """ Set up run folder and submit the job. """
    if argv is None:
      argv = sys.argv[1:]

    if len(argv) == 0 or "-h" in argv or "--help" in argv or "-c" in argv:
      print(help_str)
      print("Showing phil parameters:")
      print(phil_scope.as_str(attributes_level = 2))
      return

    user_phil = []
    dispatcher_args = []
    for arg in argv:
      if (os.path.isfile(arg)):
        try:
          user_phil.append(parse(file_name=arg))
        except Exception as e:
          if os.path.splitext(arg)[1] == ".phil": raise e
          dispatcher_args.append(arg)
      else:
        try:
          user_phil.append(parse(arg))
        except RuntimeError as e:
          dispatcher_args.append(arg)
    scope, unused = phil_scope.fetch(sources=user_phil, track_unused_definitions=True)
    params = scope.extract()
    dispatcher_args.extend(["%s=%s"%(u.path,u.object.words[0].value) for u in unused])

    assert params.input.run_num is not None
    if params.input.dispatcher in ["cxi.xtc_process", "cctbx.xfel.xtc_process"]:
      # processing XTC streams at LCLS -- dispatcher will locate raw data
      assert params.input.experiment is not None or params.input.locator is not None
      print("Submitting run %d of experiment %s"%(int(params.input.run_num), params.input.experiment))
      rundir = os.path.join(params.output.output_dir, "r%04d"%int(params.input.run_num))
    else:
      print("Submitting run %s"%(params.input.run_num))
      try:
        rundir = os.path.join(params.output.output_dir, "r%04d"%int(params.input.run_num))
      except ValueError:
        rundir = os.path.join(params.output.output_dir, params.input.run_num)

    if not os.path.exists(params.output.output_dir):
       os.makedirs(params.output.output_dir)

    if not os.path.exists(rundir):
      os.mkdir(rundir)

    # If a trial number wasn't included, find the next available, up to 999 trials
    if params.input.trial is None:
      found_one = False
      for i in range(1000):
        trialdir = os.path.join(rundir, "%03d"%i)
        if params.input.rungroup is not None:
          trialdir += "_rg%03d"%params.input.rungroup
        if not os.path.exists(trialdir):
          found_one = True
          break
      if found_one:
        params.input.trial = i
      else:
        raise Sorry("All trial numbers in use")
    else:
      trialdir = os.path.join(rundir, "%03d"%params.input.trial)
      if params.input.rungroup is not None:
        trialdir += "_rg%03d"%params.input.rungroup
      if os.path.exists(trialdir):
        raise Sorry("Trial %d already in use"%params.input.trial)

    print("Using trial", params.input.trial)
    os.mkdir(trialdir)

    # log file will live here
    stdoutdir = os.path.join(trialdir, "stdout")
    os.mkdir(stdoutdir)
    logging_str = ""
    if params.output.split_logs:# test parameter for split_log then open and close log file and loop over nprocs
      for i in range(params.mp.nproc):
        error_files = os.path.join(stdoutdir,"error_rank%04d.out"%i)
        log_files = os.path.join(stdoutdir,"log_rank%04d.out"%i)
        open(log_files,'a').close()
        open(error_files,'a').close()
      logging_str = "output.logging_dir=%s"%stdoutdir
    else:
      logging_str = ""

    # Copy any config or phil files specified
    target_num = 1
    has_config = False
    redone_args = []
    for arg in dispatcher_args:
      if not len(arg.split('=')) == 2:
        redone_args.append(arg)
        continue
      name, value = arg.split('=')

      if "cfg" in name and os.path.splitext(value)[1].lower() == ".cfg":
        cfg = value
        if not os.path.exists(cfg):
          raise Sorry("Config file doesn't exist: %s"%cfg)
        if has_config:
          raise Sorry("Multiple config files found")
        has_config = True
        target_num = copy_config(cfg, trialdir, "psana", params, target_num)
        redone_args.append("%s=%s"%(name, os.path.join(trialdir, "psana.cfg")))
      elif "target" in name or os.path.splitext(value)[1].lower() == ".phil":
        phil = value
        if not os.path.exists(phil):
          raise Sorry("Phil file doesn't exist: %s"%phil)
        copy_target(phil, trialdir, "params_%d"%target_num)
        redone_args.append("%s=%s"%(name, os.path.join(trialdir, "params_%d.phil"%target_num)))
        target_num += 1
      else:
        redone_args.append(arg)
    dispatcher_args = redone_args

    # If additional phil params are provided, copy them over too
    if params.input.target is not None:
      if not os.path.exists(params.input.target):
        raise Sorry("Target file doesn't exist: %s"%params.input.target)
      copy_target(params.input.target, trialdir, "params_%d"%target_num)
      params.input.target = os.path.join(trialdir, "params_%d.phil"%target_num)
      target_num += 1

    # Some configs files will specify out_dirname. If not, we want to explicitly create one
    # so the dispatcher will have an output directory.
    output_dir = os.path.join(trialdir, "out")
    if not os.path.exists(output_dir):
      os.makedirs(output_dir)

    # Write out a script for submitting this job and submit it
    submit_path = os.path.join(trialdir, "submit.sh")

    extra_str = ""
    data_str = ""

    assert [params.input.locator, params.input.experiment].count(None) != 0
    if params.input.locator is not None:
      locator_file = os.path.join(trialdir, "data.loc")
      shutil.copyfile(params.input.locator, locator_file)
      data_str += locator_file
    if params.input.experiment is None:
      from xfel.ui import known_dials_dispatchers
      if params.input.dispatcher in known_dials_dispatchers:
        import importlib
        dispatcher_params = importlib.import_module(known_dials_dispatchers[params.input.dispatcher]).phil_scope.extract()
        if hasattr(dispatcher_params, 'input') and hasattr(dispatcher_params.input, 'trial'):
          assert hasattr(dispatcher_params.input, 'run_num')
          data_str += " input.trial=%s input.run_num=%s" % ( # pass along for logging
            params.input.trial, params.input.run_num)
    else:
      data_str += " input.trial=%s input.experiment=%s input.run_num=%s" % (
        params.input.trial, params.input.experiment, params.input.run_num)

    for arg in dispatcher_args:
      extra_str += " %s" % arg

    if params.input.target is not None:
      extra_str += " %s" % params.input.target

    if params.input.rungroup is not None:
      data_str += " input.rungroup=%d" % params.input.rungroup

    command = "%s %s output.output_dir=%s %s %s" % (
      params.input.dispatcher, data_str, output_dir,
      logging_str, extra_str
    )

    job_name = "r%s"%params.input.run_num

    submit_command = get_submit_command_chooser(command, submit_path, stdoutdir, params.mp, job_name=job_name)
    if params.mp.method in ['lsf', 'sge', 'pbs']:
      parts = submit_command.split(" ")
      script = open(parts.pop(-1), "rb")
      run_command = script.read().split("\n")[-2]
      command = " ".join(parts + [run_command])
    else:
      command = submit_command
    print(command)

    if params.dry_run:
      print("Dry run: job not submitted. Trial directory created here:", trialdir)
      print("Execute this command to submit the job:")
      print(submit_command)
    elif params.mp.method == 'local':
      submission_id = os.fork()
      if submission_id > 0:
        return submission_id
      else:
        stdout = os.open(os.path.join(stdoutdir, 'log.out'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stdout, 1)
        stderr = os.open(os.path.join(stdoutdir, 'log.err'), os.O_WRONLY|os.O_CREAT|os.O_TRUNC); os.dup2(stderr, 2)
        os.execv(command.split()[0], command.split())
    else:
      try:
        result = easy_run.fully_buffered(command=submit_command)
        result.raise_if_errors()
      except Exception as e:
        if not "Warning: job being submitted without an AFS token." in str(e):
          raise e

      print("Job submitted.  Output in", trialdir)

      if params.mp.method == "mpi" or params.mp.method == "lsf":
        submission_id = None
        for line in result.stdout_lines:
          # example for lsf: 'Job <XXXXXX> is submitted to queue <YYYYYYY>.'
          if len(line.split()) < 2: continue
          s = line.split()[1].lstrip('<').rstrip('>')
          try:
            s = int(s)
          except ValueError:
            pass
          else:
            submission_id = str(s)
        print(submission_id)
        return submission_id
      elif params.mp.method == 'pbs':
        submission_id = "".join(result.stdout_lines).strip()
        print(submission_id)
        return submission_id
    return None