Пример #1
0
    def __get_job_options(self, runNumbers):
        if not self.engine().submit_hook():
            logging.warning(
                "A job with the name {j} has already been submitted.".format(
                    j=self.engine().job_name()))
            return

        CreateDirectory(self.engine().config_dir(), True)

        for r in runNumbers:
            jobFolder = os.path.join(self.__joboptions_dir,
                                     "{ddd}xxx".format(ddd=str(r)[:3]))
            if not os.path.isdir(jobFolder):
                logging.warning(
                    "Job option folder {f} for DSID {r} does not exist. Skipping {r}..."
                    .format(f=jobFolder, r=r))
                continue
            dir_to_copy = os.path.join(jobFolder, str(r))
            if len(dir_to_copy) == 0:
                continue
            shutil.copytree(dir_to_copy,
                            os.path.join(self.engine().config_dir(), str(r)))

            # assemble the config file for the job option
            seeds = []
            while len(seeds) < self.__nJobs:
                s = random.uniform(100000, 500000)
                if s not in seeds:
                    seeds += [s]

            jo = [os.path.join(self.engine().config_dir(), str(r))][0]
            out_dir = os.path.join(self.evgen_dir(), str(r))

            WriteList(
                (ReadListFromFile(self.seed_file()) if os.path.exists(
                    self.seed_file()) else []) + ["%d" % (i) for i in seeds],
                self.seed_file(),
            )
            WriteList(
                (ReadListFromFile(self.run_file())
                 if os.path.exists(self.run_file()) else []) +
                ["%d" % (r) for i in range(self.__nJobs)],
                self.run_file(),
            )
            WriteList(
                (ReadListFromFile(self.job_file())
                 if os.path.exists(self.job_file()) else []) +
                [jo for i in range(self.__nJobs)],
                self.job_file(),
            )
            WriteList(
                (ReadListFromFile(self.out_file())
                 if os.path.exists(self.out_file()) else []) +
                [out_dir for i in range(self.__nJobs)],
                self.out_file(),
            )

            # submit the job array
            self.__n_scheduled += self.__nJobs
            logging.info("INFO <__get_job_options> Found %s" % (jo))
Пример #2
0
 def submit_job(self):
     if self.__submitted: return False
     job_array = WriteList(
         self.merge_lists(),
         "%s/%s.txt" % (self.engine().config_dir(), id_generator(31)))
     final_merge_name = WriteList(
         self.temporary_files(),
         "%s/%s.txt" % (self.engine().config_dir(), id_generator(30)))
     if not self.engine().submit_array(
             script="ClusterSubmission/Merge.sh",
             sub_job=self.job_name(),
             mem=self.engine().merge_mem(),
             env_vars=[
                 ("JobConfigList", job_array),
                 ("OutFileList", final_merge_name),
                 ("ALRB_rootVersion", ROOTVERSION),
             ],
             hold_jobs=self.hold_jobs(),
             run_time=self.engine().merge_time(),
             array_size=len(self.merge_lists())):
         return False
     self.__submitted = True
     if not self.child(): return True
     if not self.child().submit_job(): return False
     return self.engine().submit_clean_job(
         hold_jobs=[self.engine().subjob_name(self.child().job_name())],
         to_clean=self.temporary_files(),
         sub_job=self.job_name())
Пример #3
0
    def __extract_seeds(self, run):
        try:
            EVNT_DIR = [
                os.path.join(self.evgen_dir(), R)
                for R in os.listdir(self.evgen_dir()) if R.startswith(str(run))
            ][0]
        except:
            return
        logging.info(
            "<__extract_seeds> Searching {evntdir} for EVNT files not already processed in derivation format {d}."
            .format(evntdir=EVNT_DIR, d=self.__derivation))

        DERIVATION_DIR = os.path.join(self.aod_dir(),
                                      EVNT_DIR[EVNT_DIR.rfind("/") + 1:])
        CreateDirectory(DERIVATION_DIR, False)
        Evnt_Seeds = [
            int(E[E.find("EVNT") + 5:E.find(".pool")])
            for E in os.listdir(EVNT_DIR) if E.endswith(".root")
        ]
        DAOD_Seeds = [
            int(A.split(".")[-2]) for A in os.listdir(DERIVATION_DIR)
            if A.find(self.__derivation) != -1 and A.endswith(".root")
        ]
        Non_ProcSeeds = [seed for seed in Evnt_Seeds if seed not in DAOD_Seeds]
        if len(Non_ProcSeeds) == 0:
            return
        logging.info("Extracted seeds for run {r}:".format(r=run))
        logging.info("   +-=- {s}".format(
            s=", ".join([str(seed) for seed in Non_ProcSeeds])))

        WriteList(
            (ReadListFromFile(self.seed_file())
             if os.path.exists(self.seed_file()) else []) +
            [str(seed) for seed in Non_ProcSeeds],
            self.seed_file(),
        )
        WriteList(
            (ReadListFromFile(self.run_file())
             if os.path.exists(self.run_file()) else []) +
            [str(run) for seed in Non_ProcSeeds],
            self.run_file(),
        )
        WriteList(
            (ReadListFromFile(self.in_file()) if os.path.exists(self.in_file())
             else []) + [EVNT_DIR for seed in Non_ProcSeeds],
            self.in_file(),
        )
        self.__n_scheduled += len(Non_ProcSeeds)
Пример #4
0
def main():
    CheckRucioSetup()
    CheckRemainingProxyTime()
    """"""
    RunOptions = getArgumentParser().parse_args()

    all_files = []
    if RunOptions.single_out_file and len(RunOptions.out_file_name) == 0:
        logging.error("Please provide a file name if you run with --single-out_file")
        exit(1)
    # Do we have one dataset, or a file with a list of them?
    if os.path.exists(RunOptions.dataset):
        with open(RunOptions.dataset) as dsfile:
            for line in dsfile:
                # Ignore comment lines and empty lines
                if line.startswith('#'): continue
                realline = line.strip()
                if realline.find("_tid") > -1: realline = realline[0:realline.find("_tid")]
                if not realline: continue  # Ignore whitespace

                if not RunOptions.single_out_file:
                    createFileList(realline, RunOptions)
                else:
                    all_files += GetDataSetFiles(realline, RunOptions.RSE, RunOptions.protocols)

    else:
        createFileList(RunOptions.dataset, RunOptions)

    if len(all_files) > 0:
        WriteList(all_files, options.out_file)
Пример #5
0
def downloadDataSets(InputDatasets, Destination, RSE="", use_singularity=False):
    ### Apparently rucio does no longer work in combination with AthAnalysis. So let's
    ### execute it from a singulartity container
    Cmds = []
    image_to_choose = setupBatchSubmitArgParser().get_default("SingularityImage")
    home_dir = setupBatchSubmitArgParser().get_default("BaseFolder") + "/TMP/.singularity/"
    CreateDirectory(Destination, False)
    if use_singularity:
        CreateDirectory(home_dir, False)
    to_clean = []
    for DS in InputDatasets:
        if not use_singularity:
            Cmds += ["rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination)]
        else:
            singularity_dir = home_dir + "/" + id_generator(21)
            to_clean += [singularity_dir]
            singularity_script = WriteList([
                "#!/bin/bash",
                "export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase",
                "source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh",
                "lsetup rucio",
                "echo 'rucio download %s --ndownloader 32 %s --dir %s'" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination),
                "rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination),
            ], "%s/to_exec.sh" % (singularity_dir))
            os.system("chmod 0777 " + singularity_script)
            Cmds += [
                "singularity exec --cleanenv -H %s:/alrb -B %s:/srv  %s/%s %s" %
                (singularity_dir, Destination, SINGULARITY_DIR, image_to_choose, singularity_script)
            ]
    ExecuteCommands(ListOfCmds=Cmds, MaxCurrent=8)

    for c in to_clean:
        os.system("rm -rf %s" % (c))
Пример #6
0
 def write_ship_file(self, env_vars):
     ship_file_name = WriteList(["#!/bin/bash"] + [
         "export %s='%s'" % (var, val)
         for var, val in env_vars + self.common_env_vars()
     ], "%s/%s.sh" % (self.config_dir(), id_generator(74)))
     os.system("chmod 0700 %s" % (ship_file_name))
     return ship_file_name
Пример #7
0
 def submit_move_job(self,
                     hold_jobs=[],
                     to_move=[],
                     destination="",
                     source_dir="",
                     sub_job=""):
     move_cfg = ""
     if len(to_move) > 0:
         move_cfg = "%s/Move_%s.txt" % (self.config_dir(), id_generator(35))
         WriteList(to_move, move_cfg)
     elif len(source_dir) > 0:
         move_cfg = source_dir
     else:
         logging.error("<submit_move_job> Nothing to move")
         return False
     if len(destination) == 0:
         logging.error(
             "<submit_move_job> No destination where to move provided")
         return False
     return self.submit_job(script="ClusterSubmission/Move.sh",
                            mem=100,
                            env_vars=[
                                ("DestinationDir", destination),
                                ("FromDir", move_cfg),
                            ],
                            hold_jobs=hold_jobs,
                            sub_job="Move%s%s" %
                            ("" if len(sub_job) == 0 else "-", sub_job),
                            run_time="01:00:00")
Пример #8
0
def updateBlackList(black_list):
    current_black = getPRWblackList()
    if IsListIn(black_list, current_black): return
    current_black = ClearFromDuplicates(current_black + black_list)
    current_dir = os.getcwd()
    FileName = os.path.realpath(
        ResolvePath("XAMPPbase/BlackListedPRWdatasets.txt"))
    Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase"))
    ###############################################################################
    #      Find out the current branch to propagage only                          #
    #      the updated List to the main repository. Other changes regarding       #
    #      side developments of the package should not be propagated yet          #
    ###############################################################################
    upstream = setupGITupstream()
    current_branch = getBranch()
    os.chdir(Pkg_Dir)
    new_branch = "PRW_%s_%s" % (time.strftime("%Y%m%d"), USERNAME)
    if current_branch:
        os.system(
            "git commit -am \"Commit changes of all files in order to push the 'BlackListedPRWdatasets.txt'\""
        )
    print "INFO: Create new branch %s to update the BlackListedPRWdatasets " % (
        new_branch)
    os.system("git checkout -b %s %s/master" % (new_branch, upstream))
    WriteList(sorted(current_black), FileName)
    os.system("git add BlackListedPRWdatasets.txt")
    os.system(
        "git commit BlackListedPRWdatasets.txt -m \"Updated the list of black prw files\""
    )
    os.system("git push %s %s" % (upstream, new_branch))
    if current_branch: os.system("git checkout %s" % (current_branch))
    os.chdir(current_dir)
Пример #9
0
def insertPRWUser(user):
    Users = getUsersSubmittedPRW()
    if user in Users: return
    Users += [user]
    current_dir = os.getcwd()
    FileName = os.path.realpath(
        ResolvePath("XAMPPbase/UsersWhoSubmittedPRW.txt"))
    Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase"))
    ###############################################################################
    #      Find out the current branch to propagage only                          #
    #      the updated List to the main repository. Other changes regarding       #
    #      side developments of the package should not be propagated yet          #
    ###############################################################################
    upstream = setupGITupstream()
    current_branch = getBranch()
    os.chdir(Pkg_Dir)
    new_branch = "PRW_" + user.replace(".", "_")
    if current_branch:
        os.system(
            "git commit -am \"Commit changes of all files in order to push the 'UsersWhoSubmittedPRW.txt'\""
        )
    print "INFO: Create new branch %s to update the UsersWhoSubmittedPRW " % (
        new_branch)
    os.system("git checkout -b %s %s/master" % (new_branch, upstream))
    print "INFO: %s submitted to the grid prw_config jobs. Add him to the common list such that others can download his files" % (
        user)
    WriteList(sorted(Users), FileName)
    os.system("git add UsersWhoSubmittedPRW.txt")
    os.system(
        "git commit UsersWhoSubmittedPRW.txt -m \"Added %s to the list of users who submitted a prw config creation job\""
        % (user))
    os.system("git push %s %s" % (upstream, new_branch))
    if current_branch: os.system("git checkout %s" % (current_branch))
    os.chdir(current_dir)
Пример #10
0
    def run(self):
        CreateDirectory(self.download_dir(), False)
        CreateDirectory(self.final_directory(), False)
        #self.__datasets = sorted(self.__datasets, key=lambda x: GetPRW_datasetID(x))
        DownloadList = [
            'rucio download --ndownloader 5 --dir %s %s' %
            (self.download_dir(), ds) for ds in self.__datasets
            if ds not in self.__already_on_disk
        ]
        ExecuteCommands(ListOfCmds=DownloadList, MaxCurrent=16)
        self.clearFromDuplicates(self.download_dir())
        Files = []
        for dir in os.listdir(self.download_dir()):
            dir_path = "%s/%s" % (self.download_dir(), dir)
            if not os.path.isdir(dir_path): continue
            if not self.hasDataset(dir): continue
            Files += [
                "%s/%s" % (dir_path, F) for F in os.listdir(dir_path)
                if IsROOTFile(F)
            ]

        WriteList(sorted(Files), "%s/temp_in.txt" % (self.download_dir()))
        #   only 1 entry in the MCPileupReweighting tree per Channel/RunNumber combination is actually needed
        #   thus, remove all others but one in order to significantly reduce the files size!
        #   This is done by the SlimPRWFile macro in XAMPPbase/utils/
        MergeCmd = "SlimPRWFile --InList %s/temp_in.txt --outFile %s" % (
            self.download_dir(), self.final_file())
        print MergeCmd
        os.system(MergeCmd)
        print "INFO: Clean up the temporary file"
        os.system("rm %s/temp_in.txt " % (self.download_dir()))

        self.standaloneCheck()

        print "INFO: Done"
Пример #11
0
def write_status_log(file_location, grid_jobs):
    if len(grid_jobs) > 0:
        max_task_letters = max([len(str(job.jediTaskID)) for job in grid_jobs])
        max_status_letters = max(
            [len(str(job.taskStatus)) for job in grid_jobs])
        max_job_name_letters = max([len(job.jobName) for job in grid_jobs])
        max_progress_letters = max(
            [len(make_progess(job)) for job in grid_jobs])
        log_file = []
        for job in sorted(grid_jobs,
                          key=lambda x: get_progress(x)[0],
                          reverse=True):
            proc_frac, done_jobs, all_jobs = get_progress(job)
            log_file += [
                "https://bigpanda.cern.ch/task/%s/ %s %s %s %s %s %s %s(%.2f%%) %s"
                %
                (job.jediTaskID,
                 FillWhiteSpaces(max_task_letters - len(str(job.jediTaskID))),
                 job.jobName,
                 FillWhiteSpaces(max_job_name_letters - len(job.jobName)),
                 job.taskStatus,
                 FillWhiteSpaces(max_status_letters - len(job.taskStatus)),
                 make_progess(job),
                 FillWhiteSpaces(max_progress_letters -
                                 len(make_progess(job))), proc_frac, job.inDS)
            ]
        WriteList(log_file, file_location)
Пример #12
0
 def submit_copy_job(
         self,
         hold_jobs=[],
         to_copy=[],  ### Give particular files to copy
         destination="",
         source_dir="",  ### Optional
         sub_job=""):
     copy_cfg = ""
     if len(to_copy) > 0:
         copy_cfg = "%s/Copy_%s.txt" % (self.config_dir(), id_generator(35))
         WriteList(to_copy, copy_cfg)
     elif len(source_dir) > 0:
         copy_cfg = source_dir
     else:
         print "<submit_copy_job> Nothing to copy"
         return False
     if len(destination) == 0:
         print "<Submit_copy_job> Where to copy everything?"
         return False
     return self.submit_job(script="ClusterSubmission/Copy.sh",
                            mem=100,
                            env_vars=[
                                ("DestinationDir", destination),
                                ("FromDir", copy_cfg),
                            ],
                            hold_jobs=hold_jobs,
                            sub_job="Copy%s%s" %
                            ("" if len(sub_job) == 0 else "-", sub_job),
                            run_time="01:00:00")
Пример #13
0
 def submit_clean_job(self, hold_jobs=[], to_clean=[], sub_job=""):
     clean_cfg = "%s/Clean_%s.txt" % (self.config_dir(), id_generator(35))
     WriteList(to_clean, clean_cfg)
     return self.submit_job(script="ClusterSubmission/Clean.sh",
                            mem=100,
                            env_vars=[("ToClean", clean_cfg)],
                            hold_jobs=hold_jobs,
                            sub_job="Clean%s%s" %
                            ("" if len(sub_job) == 0 else "-", sub_job),
                            run_time="01:00:00")
Пример #14
0
    def __assemble_merge_list(self, files_to_merge):
        copied_in = [x for x in files_to_merge]
        if self.__shuffle_files: shuffle(copied_in)
        merge_lists = []
        merge_in = []
        for i, fi in enumerate(copied_in):
            if i > 0 and i % self.__files_per_job == 0:
                merge_name = "%s/%s.txt" % (self.engine().config_dir(),
                                            id_generator(85))
                WriteList(merge_in, merge_name)
                merge_lists += [merge_name]
                merge_in = []
            merge_in += [fi]

        ### Pack the last remenants into a last merge job
        if len(merge_in) > 0:
            merge_name = "%s/%s.txt" % (self.engine().config_dir(),
                                        id_generator(85))
            WriteList(merge_in, merge_name)
            merge_lists += [merge_name]
        return merge_lists
Пример #15
0
def createFileList(dsname, options):
    logging.info('Creating file list for ' + dsname)
    DS = GetDataSetFiles(dsname, options.RSE, options.protocols)
    if len(DS) == 0:
        logging.error("No datasets found")
        return
    if dsname.find(":") > -1:
        dsname = dsname[dsname.find(":") + 1:len(dsname)]
    CreateDirectory(options.OutDir, False)
    filelistname = options.OutDir + "/" + dsname.rstrip('/') + ".txt"
    if os.path.exists(filelistname) == True:
        logging.info("Remove the old FileList")
        os.system("rm " + filelistname)
    WriteList(DS, filelistname)
Пример #16
0
def createFileList(dsname, options):
    prettyPrint('Creating file list for', dsname)
    DS = GetDataSetFiles(dsname, options.RSE, options.protocols)
    if len(DS) == 0:
        print "No datasets found"
        return
    if dsname.find(":") > -1:
        dsname = dsname[dsname.find(":") + 1:len(dsname)]
    OutDir = options.OutDir
    if os.path.exists(OutDir) == False:
        print "mkdir -p " + OutDir
        os.system("mkdir -p " + OutDir)
    filelistname = OutDir + "/" + dsname.rstrip('/') + ".txt"
    if os.path.exists(filelistname) == True:
        print "Remove the old FileList"
        os.system("rm " + filelistname)
    WriteList(DS, filelistname)
Пример #17
0
    def pack_environment(self, env_vars, script):
        exec_script = self.link_to_copy_area(script)
        if not exec_script: return False
        ship_file = self.write_ship_file(env_vars)
        if self.run_singularity():
            ship_file = self.write_ship_file([
                ("CONTAINER_SCRIPT", exec_script),
                ("CONTAINER_IMAGE", self.singularity_container()),
                ("CONTAINER_SHIPING_FILE", ship_file),
            ])
            exec_script = self.link_to_copy_area(
                ResolvePath("ClusterSubmission/Singularity.sh"))

        env_script = WriteList([
            "#!/bin/bash",
            "source %s" % (ship_file),
            "source %s" % (exec_script)
        ], "%s/EnvScript_%s.sh" % (self.config_dir(), id_generator(50)))
        os.system("chmod 0700 %s" % (env_script))
        return env_script
Пример #18
0
    def submit_job(self):
        WriteList(
            [
                D.replace(self.evgen_dir(), self.aod_dir())
                for D in ReadListFromFile(self.in_file())
            ],
            self.out_file(),
        )

        extra_args = ""
        if len(self.__preExec) > 0:
            extra_args += ' --preExec "%s" ' % (self.__preExec)
        if len(self.__preInclude) > 0:
            extra_args += ' --preInclude "%s" ' % (self.__preInclude)
        if len(self.__postExec) > 0:
            extra_args += ' --postExec "%s" ' % (self.__postExec)
        if len(self.__postInclude) > 0:
            extra_args += ' --postInclude "%s" ' % (self.__postInclude)

        if not self.engine().submit_array(
                sub_job=self.__derivation,
                script="SubmitMC/batch_derivation.sh",
                mem=self.__mem,
                env_vars=[
                    ("SeedFile", self.seed_file()),
                    ("RunFile", self.run_file()),
                    ("InFile", self.in_file()),
                    ("OutFile", self.out_file()),
                    ("DERIVATION_DIR", self.aod_dir()),
                    ("DerivationRelease", self.__derivRelease),
                    ("DerivationCache", self.__derivCache),
                    ("ReductionConf", self.__derivation),
                    ("ExtraArgs", extra_args),
                ],
                hold_jobs=self.hold_jobs(),
                run_time=self.__run_time,
                array_size=self.n_scheduled(),
        ):
            return False
        return True
Пример #19
0
    def submit_build_job(self):
        if self.__submitted_build:
            print "ERROR <submit_build_job>: Build job is already submitted"
            return False
        if not self.submit_hook(): return False
        if self.send_build_job() and not self.submit_job(
                script="ClusterSubmission/Build.sh",
                sub_job="Build",
                mem=self.__buildMem,
                env_vars=[("CleanOut", self.out_dir()),
                          ("CleanLogs", self.log_dir()),
                          ("CleanTmp", self.tmp_dir()),
                          ("nCoresToUse", self.__buildCores),
                          ("COPYAREA", self.build_dir())],
                run_time=self.__buildTime,
                hold_jobs=self.__holdBuild):
            return False
        elif not self.send_build_job():
            if not CreateDirectory(self.log_dir(),
                                   False) or not CreateDirectory(
                                       self.out_dir(), False):
                return False
            Dummy_Job = "%s/%s.sh" % (self.config_dir(), id_generator(35))
            WriteList([
                "#!/bin/bash",
                "echo \"I'm a dummy build job. Will wait 60 seconds until everything is scheduled\"",
                "sleep 120"
            ], Dummy_Job)
            if not self.submit_job(script=Dummy_Job,
                                   sub_job="Build",
                                   mem=100,
                                   env_vars=[],
                                   run_time="00:05:00",
                                   hold_jobs=self.__holdBuild):
                return False

        self.__submitted_build = True
        self.lock_area()
        return True
Пример #20
0
    def finish(self):
        if len(self.__job_dependency_dict) == 0:
            logging.debug("Nothing has been scheduled")
            return False

        dag_content = []
        for job in self.__job_dependency_dict:
            dag_content += job.get_job_config_str()
        dag_content += ["\n\n\n"]

        for job in self.__job_dependency_dict:
            dag_content += job.get_dependency_str()

        dag_dir = self.log_dir() + "/DAG/"
        dag_location = WriteList(dag_content,
                                 "%s/%s.dag" % (dag_dir, self.job_name()))
        os.chdir(dag_dir)
        cmd = "condor_submit_dag -verbose -maxidle %d %s %s.dag" % (
            self.max_running_per_array(),
            ("-append '+MyProject = \"%s\"'" % self.accountinggroup()
             if self.accountinggroup() else ""), self.job_name())
        return not os.system(cmd)
Пример #21
0
    def submit_build_job(self):
        if self.check_submitted_build():
            logging.warning(
                "<submit_build_job>: Build job is already submitted")
            return True
        if not self.submit_hook(): return False
        ### Few cluster engines go crazy if the log files of the own jobs are deleted
        ### Make sure that the build job deletes the log dir before submission
        if not CreateDirectory(self.log_dir(), True): return False
        if self.send_build_job() and not self.submit_job(
                script="ClusterSubmission/Build.sh",
                sub_job="Build",
                mem=self.get_build_mem(),
                n_cores=self.get_build_cores(),
                env_vars=[("CleanOut", self.out_dir()),
                          ("CleanTmp", self.tmp_dir()),
                          ("nCoresToUse", self.get_build_cores()),
                          ("COPYAREA", self.build_dir())],
                run_time=self.get_build_time(),
                hold_jobs=self.get_build_hold_jobs()):
            return False
        elif not self.send_build_job():
            if not CreateDirectory(self.out_dir(), False): return False
            Dummy_Job = WriteList([
                "#!/bin/bash",
                "echo \"I'm a dummy build job. Will wait 15 seconds until everything is scheduled\"",
                "sleep 15"
            ], "%s/%s.sh" % (self.config_dir(), id_generator(35)))
            if not self.submit_job(script=Dummy_Job,
                                   sub_job="Build",
                                   mem=100,
                                   run_time="00:05:00",
                                   hold_jobs=self.__holdBuild):
                return False

        self.__submitted_build = True
        self.lock_area()
        return True
Пример #22
0
def download_ci_files(options):
    ### Retrieve first the EOS token
    getEOS_token(options)
    ### Check first whether the CI dir actually exits
    smp_dir = "%s/datasamples/" % (options.ciDir)
    if not os.path.isdir(smp_dir):
        print "ERROR: The path to look up for the data samples %s does not exists. Where is my data" % (
            smp_dir)
        exit(1)

    ### Create first the directory to store the temporary files in there
    ### Clean the old remants
    CreateDirectory(options.TEMPdir, True)
    downloaded_smp = []
    for smp in os.listdir(smp_dir):
        smp_name = smp[:smp.rfind(".")]
        print "INFO: Download the files from sample %s" % (smp_name)
        download_to = "%s/%s" % (options.TEMPdir, smp_name)
        CreateDirectory(download_to, False)
        ### Download the files first
        for file_to_load in ReadListFromFile("%s/%s" % (smp_dir, smp)):
            destination_file = "%s/%s" % (
                download_to, file_to_load[file_to_load.rfind("/") + 1:])
            CopyCmd = "xrdcp %s/%s %s" % (options.EOSpath, file_to_load,
                                          destination_file)
            if os.path.exists(destination_file):
                print "INFO: Omit do download %s" % (file_to_load)
            elif os.system(CopyCmd) != 0:
                print "ERROR: Failed to download %s" % (file_to_load)
                exit(1)
        ### Write the file list for the analysis
        file_list = "%s/FileList_%s.txt" % (options.TEMPdir, smp_name)
        WriteList([
            "%s/%s" % (download_to, f[f.rfind("/") + 1:])
            for f in ReadListFromFile("%s/%s" % (smp_dir, smp))
        ], file_list)
        downloaded_smp += [smp_name]
    return downloaded_smp
Пример #23
0
def write_broken_log(options, broken_jobs):
    if len(broken_jobs) > 0:
        logging.info("Found %d unhealthy jobs will prompt them below" %
                     (len(broken_jobs)))
        max_task_letters = max(
            [len(str(job.jediTaskID)) for job in broken_jobs])
        max_status_letters = max(
            [len(str(job.taskStatus)) for job in broken_jobs])
        max_job_name_letters = max([len(job.jobName) for job in broken_jobs])
        log_file = []
        for job in sorted(broken_jobs, key=lambda x: x.jobName):
            log_file += [
                "https://bigpanda.cern.ch/task/%s/ %s %s %s %s %s %s" %
                (job.jediTaskID,
                 FillWhiteSpaces(max_task_letters - len(str(job.jediTaskID))),
                 job.taskStatus,
                 FillWhiteSpaces(max_status_letters - len(job.taskStatus)),
                 job.jobName,
                 FillWhiteSpaces(max_job_name_letters - len(job.jobName)),
                 job.inDS)
            ]
            logging.info(log_file[-1])
        WriteList(log_file, options.broken_log_file)
        chan_in_old = readPRWchannels("%s/%s" % (RunOptions.oldPRWDir, new))
        chan_in_new = readPRWchannels("%s/%s" % (RunOptions.newPRWDir, new))
        messages = []
        AnythingNew = False
        ### Compare the prw channels of both files
        for c in chan_in_new:
            if not c in chan_in_old:
                messages += ["INFO: Channel %d (%s) has been added through the last iteration to %s" % (c, MyxSecDB.name(c), new)]
                AnythingNew = True
            else:
                chan_in_old.remove(c)

        ### The old file somehow contains additional channels. We need to double check
        if len(chan_in_old) > 0:
            messages += [
                "WARNING: The following channels were merged into the old file %s/%s but are no longer present in %s/%s" %
                (RunOptions.oldPRWDir, new, RunOptions.newPRWDir, new)
            ]

            for c in chan_in_old:
                messages += ["     -=-=- %d (%s)" % (c, MyxSecDB.name(c))]
        if not AnythingNew:
            messages += ["INFO: Nothing new has been added to %s/%s w.r.t %s/%s" % (RunOptions.oldPRWDir, new, RunOptions.newPRWDir, new)]

        WriteList(messages, "PRWcheck_%s.log" % (new[:new.rfind(".")]))
        if len(chan_in_old) > 0 and AnythingNew and RunOptions.uniteFiles:
            Cmd = "SlimPRWFile --inFile %s/%s --inFile %s/%s --outFile %s/%s --InIsSlimmed" % (
                RunOptions.newPRWDir, new, RunOptions.oldPRWDir, new, RunOptions.newPRWDir, new)
            os.system(Cmd)
Пример #25
0
    def _write_submission_file(self,
                               sub_job,
                               exec_script,
                               env_vars=[],
                               mem=1,
                               run_time='00:00:01',
                               nproc=1,
                               arraylength=-1):
        self.set_cluster_control_module(
            "ClusterSubmission/ClusterControlHTCONDOR.sh")
        if not exec_script:
            logging.error("<_write_submission_file> No exec_script was given!")
            return False
        if mem < 0:
            logging.error(
                "<_write_submission_file> No memory requirement for the job was specified."
            )
            return False
        job_name = self.subjob_name(sub_job)
        if len([
                x for x in self.__job_dependency_dict
                if x.getJobName() == job_name
        ]):
            logging.error(
                "The job %s has already been defined. Please ensure unique job names"
                % (job_name))
            return False

        log_string = "%s/%s%s" % (
            self.log_dir(), sub_job if len(sub_job) else job_name,
            "_$(CONDOR_TASK_ID)" if arraylength > 0 else "")

        exec_script = self.pack_environment(env_vars, exec_script)
        submision_content = []

        submision_content += [
            "universe                = vanilla",
            "executable              = %s" % (exec_script),
            "output                  = %s.out" % (log_string),
            "error                   = %s.err" % (log_string),
            "log                     = %s.log" % (log_string),
            #"transfer_executable     = True",
            "notification            = Error",
            "notify_user             = %s" % (self.mail_user()),
            "request_memory          = %d" % (mem),
            "on_exit_remove          = (ExitBySignal == False) && (ExitCode == 0)",
            "request_cpus            = %d" % (nproc),
            #### Extra attributes
            "+MaxRuntime             = %d" %
            (TimeToSeconds(run_time)),  ### CERN cloud
            "+RequestRuntime         = %d" %
            (TimeToSeconds(run_time)),  ### DESY cloud
            "+MyProject              = %s" %
            (self.accountinggroup()) if self.accountinggroup() else "",
        ]

        if arraylength > 0:
            submision_content += [
                "environment = CONDOR_TASK_ID=$(CONDOR_TASK_ID)"
            ]
        submision_content += [
            "queue",
        ]
        self.__job_dependency_dict += [
            HTCondorJob(job_name=job_name,
                        submission_file=WriteList(
                            submision_content, "%s/%s.sub" %
                            (self.config_dir(), id_generator(25))),
                        arraylength=arraylength,
                        engine=self,
                        abbreviation=self.__assign_abb_letter())
        ]
        self.__submitted_jobs += 1 if arraylength <= 1 else arraylength
        return True
Пример #26
0
 def lock_area(self):
     WriteList(
         ["###Hook file to prevent double submission of the same job"],
         "%s/.job.lck" % (self.tmp_dir()))
Пример #27
0
    while AOD.rfind("_r") != AOD.find("_r"):
        AOD = AOD[:AOD.rfind("_r")]
    ### Remote the double e -tag
    while AOD.rfind(".e") < AOD.rfind("_e"):
        uscore_pos = AOD.find("_", AOD.rfind(".e"))
        AOD = AOD[:uscore_pos] + AOD[AOD.find("_", uscore_pos + 1):]
    return AOD


if __name__ == '__main__':

    OutDir = os.getcwd()

    parser = argparse.ArgumentParser(
        description='This script converts DAOD filelists to AOD filelists which then can be used for creating pileup reweighting files.',
        prog='CreateAODFromDAODList',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--datasets', '-d', '-D', help='DAOD filelist to be converted into AOD', required=True)
    parser.add_argument('--outFile', help="pipe the output into a script into a file", default='')
    RunOptions = parser.parse_args()

    logging.info('The following DAODs are converted into ADOs:\n')
    DAODsToConvert = [convertToAOD(daod) for daod in ReadListFromFile(RunOptions.datasets)]

    logging.info('\nThe ADOs are:\n')

    for daod in DAODsToConvert:
        logging.info("   --- %s" % (daod))

    if len(RunOptions.outFile) > 0: WriteList(DAODsToConvert, RunOptions.outFile)
Пример #28
0
def main():
    """List datasets located at a RSE location."""
    CheckRucioSetup()
    CheckRemainingProxyTime()

    RunOptions = getArgumentParser().parse_args()

    Today = time.strftime("%Y-%m-%d")
    Patterns = RunOptions.pattern
    OutDir = RunOptions.OutDir
    RSE = RunOptions.RSE
    if ',' in RSE:
        RSE = RSE.split(
            ','
        )[0]  # in case people have more than one RSE in their environment variable for grid submits

    Prefix = ''
    if RunOptions.MyRequests:
        Prefix = 'MyRequestTo_'
        DS = ListUserRequests(RSE, RunOptions.rucio)
    else:
        DS = ListDisk(RSE)


###    MetaFile = open("Content_%s.txt"%(RSE), 'w')
###    for DataSet, Size in ListDiskWithSize(RSE):
###           Owner, ID = GetDataSetInfo(DataSet,RSE)
###           line = "%s  |   %s   | %s  | %.2f GB"%(ID, Owner,DataSet, Size)
###           MetaFile.write("%s\n"%(line))
###           print line
###    MetaFile.close()
###    exit(0)

    if len(DS) == 0:
        logging.warning("Disk is empty.")
        exit(0)
    CreateDirectory(OutDir, False)

    ###########
    #   Define the file list name
    ###########
    FileList = "%s%s_%s" % (Prefix, RSE, Today)
    if len(Patterns) > 0: FileList += "_%s" % ('_'.join(Patterns))
    if len(RunOptions.exclude) > 0:
        FileList += "_exl_%s" % ('_'.join(RunOptions.exclude))
    FileList += '.txt'
    Write = []
    for d in sorted(DS):
        allPatternsFound = True
        for Pattern in Patterns:
            if not Pattern in d:
                allPatternsFound = False
                break
        for Pattern in RunOptions.exclude:
            if Pattern in d:
                allPatternsFound = False
                break
        if allPatternsFound:
            IsInWrite = False
            if d.split(".")[-1].isdigit(): d = d[:d.rfind(".")]
            if d.find("_tid") != -1: d = d[0:d.rfind("_tid")]
            if len([w for w in Write if w.find(d) != -1]) > 0: continue
            logging.info("Write dataset %s" % (d))
            Write.append(d)
    if len(Write) == 0:
        logging.error("No datasets containing given pattern(s) found!")
        exit(0)

    WriteList(Write, "%s/%s" % (OutDir, FileList))
    logging.info("Datasets written to file %s/%s" % (OutDir, FileList))
    OutDir = os.getcwd()

    parser = argparse.ArgumentParser(
        description=
        'This script converts DAOD filelists to AOD filelists which then can be used for creating pileup reweighting files.',
        prog='CreateAODFromDAODList',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--datasets',
                        '-d',
                        '-D',
                        help='DAOD filelist to be converted into AOD',
                        required=True)
    parser.add_argument('--outFile',
                        help="pipe the output into a script into a file",
                        default='')
    RunOptions = parser.parse_args()

    print 'The following DAODs are converted into ADOs:\n'
    DAODsToConvert = [
        convertToAOD(daod) for daod in ReadListFromFile(RunOptions.datasets)
    ]

    print '\nThe ADOs are:\n'

    for daod in DAODsToConvert:
        print "   --- %s" % (daod)

    if len(RunOptions.outFile) > 0:
        WriteList(DAODsToConvert, RunOptions.outFile)
Пример #30
0
    def __prepare_input(self, in_ds=""):
        print "INFO <_prepare_input>: Assemble configuration for %s" % (in_ds)
        ### Name to be piped to the job
        out_name = in_ds[in_ds.rfind("/") + 1:in_ds.rfind(".")] if IsTextFile(
            in_ds) or IsROOTFile(in_ds) else in_ds
        split_dir = "%s/Datasets/%s" % (self.split_cfg_dir(), out_name)
        root_files = []
        ### Now we need to find the corresponding ROOT files
        ### 1) The dataset is a root file itself
        if IsROOTFile(in_ds):
            root_files += [in_ds]
        ### 2) The given dataset is a .txt file
        elif IsTextFile(in_ds):
            ### Find the root files from there
            root_files = self.__extract_root_files(in_ds)
            if len(root_files) == 0: return False
        ### 3) The given dataset is a directory
        elif os.path.isdir(in_ds):
            if in_ds.endswith("/"):
                in_ds = in_ds[:in_ds.rfind("/")]
                out_name = in_ds[in_ds.rfind("/") + 1:]
            split_dir = "%s/Directory/%s" % (self.split_cfg_dir(), out_name)
            root_files = [
                "%s/%s" % (in_ds, F) for F in os.listdir(in_ds)
                if IsROOTFile(F)
            ]
        ### 4) It's a logical dataset stored on d-cache
        else:
            root_files = self.__find_on_dcache(in_ds)
        if len(root_files) == 0:
            print "ERROR: Could not associate anything to %s" % (in_ds)
            return False
        if len(out_name) == 0:
            print "ERROR: How should the output be called %s" % (in_ds)
            return False

        ### Assemble the splitting of the jobs
        main_list = "%s/AllROOTFiles.main" % (split_dir)
        files_in_main = ReadListFromFile(main_list) if os.path.exists(
            main_list) else []
        ### The list is unkown or the content of ROOT files has changed
        ### Redo the splitting again ;-)
        if len(files_in_main) != len(root_files) or not IsListIn(
                files_in_main, root_files):
            print "INFO: Assemble new split for %s" % (in_ds)
            CreateDirectory(split_dir, True)
            WriteList(root_files, main_list)
            os.system("CreateBatchJobSplit -I %s -O %s -EpJ %i" %
                      (main_list, split_dir, self.__events_per_job))
        ### Each of the lists contains the ROOT files to process per each sub job
        split_lists = [
            "%s/%s" % (split_dir, F) for F in os.listdir(split_dir)
            if IsTextFile(F)
        ]
        n_jobs = len(split_lists)
        subjob_outs = [
            "%s/%s_%d.root" % (self.engine().tmp_dir(), out_name, d)
            for d in range(n_jobs)
        ]

        assembled_in = [] if not os.path.exists(
            self.job_input()) else ReadListFromFile(self.job_input())
        assembled_out = [] if not os.path.exists(
            self.job_out_names()) else ReadListFromFile(self.job_out_names())
        start_reg = len(assembled_in)

        ### Write what we've
        WriteList(assembled_in + split_lists, self.job_input())
        WriteList(assembled_out + subjob_outs, self.job_out_names())
        #### Submit the merge jobs
        self.__merge_interfaces += [
            self.engine().create_merge_interface(
                out_name=out_name,
                files_to_merge=subjob_outs,
                hold_jobs=[(self.engine().job_name(),
                            [start_reg + i + 1 for i in range(n_jobs)])],
                files_per_job=self.__files_per_merge_itr,
                final_split=self.__final_split)
        ]
        self.__nsheduled += n_jobs
        return True