示例#1
0
 def check_for_existing_output(self, r, rname):
     """
     Check whether given runcard already has output in the grid needs testing
     as it needs to be able to remove (many) things for production run It
     relies on the base seed from the src.header file to remove the output
     """
     from pyHepGrid.src.header import grid_output_dir, logger
     logger.debug(F"Checking whether runcard {rname} has output for seeds "
                  "that you are trying to submit...")
     checkname = r + "-" + rname
     files = self.gridw.get_dir_contents(grid_output_dir)
     first = True
     if checkname in files:
         from pyHepGrid.src.header import baseSeed, producRun
         for seed in range(baseSeed, baseSeed + producRun):
             filename = self.output_name(r, rname, seed)
             if filename in files:
                 if first:
                     self._press_yes_to_continue(
                         "This runcard already has at least one output file "
                         "at gfal:output with a seed you are trying to "
                         F"submit (looked for '{checkname}').\n"
                         "If you continue, it will be removed now.")
                     logger.warning(
                         F"Runcard {r} has at least one file at output")
                     first = False
                 self.gridw.delete(filename, grid_output_dir)
         logger.info("Output check complete")
 def check_for_existing_output(self, r, rname):
     """ Check whether given runcard already has output in the grid
     needs testing as it needs to be able to remove (many) things for production run
     It relies on the base seed from the src.header file to remove the output
     """
     from pyHepGrid.src.header import grid_output_dir, logger
     logger.debug(
         "Checking whether runcard {0} has output for seeds that you are trying to submit..."
         .format(rname))
     checkname = r + "-" + rname
     files = self.gridw.get_dir_contents(grid_output_dir)
     first = True
     if checkname in files:
         from pyHepGrid.src.header import baseSeed, producRun
         for seed in range(baseSeed, baseSeed + producRun):
             filename = self.output_name(r, rname, seed)
             if filename in files:
                 if first:
                     self._press_yes_to_continue(
                         "It seems this runcard already has at least one file at lfn:output with a seed you are trying to submit (looked for {}). Do you want to remove it/them?"
                         .format(checkname))
                     logger.warning(
                         "Runcard {0} has at least one file at output".
                         format(r))
                     first = False
                 self.gridw.delete(filename, grid_output_dir)
         logger.info("Output check complete")
示例#3
0
 def check_for_existing_output_local(self, r, rname, baseSeed, producRun):
     """
     Check whether given runcard already has output in the local run dir
     (looks for log files)
     """
     import re
     logger.debug(
         "Checking whether runcard {0} has output for seeds that you are "
         "trying to submit...".format(rname))
     local_dir_name = self.get_local_dir_name(r, rname)
     files = os.listdir(local_dir_name)
     runcard = PROGRAMruncard(runcard_file=os.path.join(local_dir_name, r),
                              logger=logger,
                              grid_run=False,
                              use_cvmfs=header.use_cvmfs_lhapdf,
                              cvmfs_loc=header.cvmfs_lhapdf_location)
     runcard_id = runcard.runcard_dict_case_preserving["id"]
     logs = [f for f in files if f.endswith(".log") and runcard_id in f]
     logseed_regex = re.compile(r".s([0-9]+)\.[^\.]+$")
     existing_seeds = set(
         [int(logseed_regex.search(i).group(1)) for i in logs])
     submission_seeds = set(range(baseSeed, baseSeed + producRun))
     overlap = existing_seeds.intersection(submission_seeds)
     if overlap:
         logger.warning(
             "Log files for seed(s) {0} already exist in run folders. "
             "Running will overwrite the logfiles already present.".format(
                 " ".join(str(i) for i in overlap)))
         self._press_yes_to_continue(None)
     return
示例#4
0
 def __check_pulled_warmup(self, success, tmpnm, warmup_extensions):
     if success:
         matches, sizes = self.tarw.check_filesizes(tmpnm, warmup_extensions)
         if len(matches)==0:
             logger.warning("No warmup file found on main Grid Storage")
             return False
         if any(size==0 for size in sizes):
             logger.warning("Empty warmup file found on Grid Storage")
             return False
     return success
示例#5
0
    def check_warmup_files(self, db_id, rcard, resubmit=False):
        """
        Provides stats on whether a warmup file exists for a given run and
        optionally resubmit if absent
        """
        import tempfile
        import tarfile

        origdir = os.path.abspath(os.getcwd())
        tmpdir = tempfile.mkdtemp()

        os.chdir(tmpdir)
        logger.debug("Temporary directory: {0}".format(tmpdir))
        rncards, dCards = util.expandCard()
        tags = ["runcard", "runfolder"]
        runcard_info = self.dbase.list_data(self.table, tags, db_id)[0]
        runcard = runcard_info["runcard"]
        rname = runcard_info["runfolder"]
        try:
            warmup_files = self._bring_warmup_files(runcard,
                                                    rname,
                                                    check_only=True)
            if warmup_files == []:
                status = "\033[93mMissing\033[0m"
            else:
                status = "\033[92mPresent\033[0m"
        except tarfile.ReadError:
            status = "\033[91mCorrupted\033[0m"
        run_id = "{0}-{1}:".format(runcard, rname)
        logger.info("[{0}] {2:55} {1:>20}".format(db_id, status, run_id))
        os.chdir(origdir)

        if resubmit and "Present" not in status:
            done, wait, run, fail, unk = self.stats_job(db_id, do_print=False)
            if run + wait > 0:  # Be more careful in case of unknown status
                logger.warning("Job still running. Not resubmitting for now")
            else:
                # Need to override dictCard for single job submission
                expandedCard = ([runcard], {runcard: rname})
                logger.info(
                    "Warmup not found and job ended. Resubmitting to ARC")
                from pyHepGrid.src.runArcjob import runWrapper
                runWrapper(rcard, expandedCard=expandedCard)