Пример #1
0
    def __get_job_options(self, runNumbers):
        if not self.engine().submit_hook():
            logging.warning(
                "A job with the name {j} has already been submitted.".format(
                    j=self.engine().job_name()))
            return

        CreateDirectory(self.engine().config_dir(), True)

        for r in runNumbers:
            jobFolder = os.path.join(self.__joboptions_dir,
                                     "{ddd}xxx".format(ddd=str(r)[:3]))
            if not os.path.isdir(jobFolder):
                logging.warning(
                    "Job option folder {f} for DSID {r} does not exist. Skipping {r}..."
                    .format(f=jobFolder, r=r))
                continue
            dir_to_copy = os.path.join(jobFolder, str(r))
            if len(dir_to_copy) == 0:
                continue
            shutil.copytree(dir_to_copy,
                            os.path.join(self.engine().config_dir(), str(r)))

            # assemble the config file for the job option
            seeds = []
            while len(seeds) < self.__nJobs:
                s = random.uniform(100000, 500000)
                if s not in seeds:
                    seeds += [s]

            jo = [os.path.join(self.engine().config_dir(), str(r))][0]
            out_dir = os.path.join(self.evgen_dir(), str(r))

            WriteList(
                (ReadListFromFile(self.seed_file()) if os.path.exists(
                    self.seed_file()) else []) + ["%d" % (i) for i in seeds],
                self.seed_file(),
            )
            WriteList(
                (ReadListFromFile(self.run_file())
                 if os.path.exists(self.run_file()) else []) +
                ["%d" % (r) for i in range(self.__nJobs)],
                self.run_file(),
            )
            WriteList(
                (ReadListFromFile(self.job_file())
                 if os.path.exists(self.job_file()) else []) +
                [jo for i in range(self.__nJobs)],
                self.job_file(),
            )
            WriteList(
                (ReadListFromFile(self.out_file())
                 if os.path.exists(self.out_file()) else []) +
                [out_dir for i in range(self.__nJobs)],
                self.out_file(),
            )

            # submit the job array
            self.__n_scheduled += self.__nJobs
            logging.info("INFO <__get_job_options> Found %s" % (jo))
Пример #2
0
    def __extract_seeds(self, run):
        try:
            EVNT_DIR = [
                os.path.join(self.evgen_dir(), R)
                for R in os.listdir(self.evgen_dir()) if R.startswith(str(run))
            ][0]
        except:
            return
        logging.info(
            "<__extract_seeds> Searching {evntdir} for EVNT files not already processed in derivation format {d}."
            .format(evntdir=EVNT_DIR, d=self.__derivation))

        DERIVATION_DIR = os.path.join(self.aod_dir(),
                                      EVNT_DIR[EVNT_DIR.rfind("/") + 1:])
        CreateDirectory(DERIVATION_DIR, False)
        Evnt_Seeds = [
            int(E[E.find("EVNT") + 5:E.find(".pool")])
            for E in os.listdir(EVNT_DIR) if E.endswith(".root")
        ]
        DAOD_Seeds = [
            int(A.split(".")[-2]) for A in os.listdir(DERIVATION_DIR)
            if A.find(self.__derivation) != -1 and A.endswith(".root")
        ]
        Non_ProcSeeds = [seed for seed in Evnt_Seeds if seed not in DAOD_Seeds]
        if len(Non_ProcSeeds) == 0:
            return
        logging.info("Extracted seeds for run {r}:".format(r=run))
        logging.info("   +-=- {s}".format(
            s=", ".join([str(seed) for seed in Non_ProcSeeds])))

        WriteList(
            (ReadListFromFile(self.seed_file())
             if os.path.exists(self.seed_file()) else []) +
            [str(seed) for seed in Non_ProcSeeds],
            self.seed_file(),
        )
        WriteList(
            (ReadListFromFile(self.run_file())
             if os.path.exists(self.run_file()) else []) +
            [str(run) for seed in Non_ProcSeeds],
            self.run_file(),
        )
        WriteList(
            (ReadListFromFile(self.in_file()) if os.path.exists(self.in_file())
             else []) + [EVNT_DIR for seed in Non_ProcSeeds],
            self.in_file(),
        )
        self.__n_scheduled += len(Non_ProcSeeds)
Пример #3
0
def getPRWblackList():
    FileName = ResolvePath("XAMPPbase/BlackListedPRWdatasets.txt")
    if not FileName:
        print "ERROR: The file XAMPPbase/data/BlackListedPRWdatasets.txt could not be found in the repository"
        print "ERROR: Did you delete it by accident? Please check!!!!"
        sys.exit(1)
    return sorted(ReadListFromFile(FileName))
Пример #4
0
def getUsersSubmittedPRW():
    FileName = ResolvePath("XAMPPbase/UsersWhoSubmittedPRW.txt")
    if not FileName:
        print "ERROR: The file XAMPPbase/data/UsersWhoSubmittedPRW.txt could not be found in the repository"
        print "ERROR: Did you delete it by accident? Please check!!!!"
        sys.exit(1)
    return sorted(ReadListFromFile(FileName))
Пример #5
0
 def __init__(self,
              campaign,
              stype,
              rtag,
              datasets,
              temp_dir,
              outdir,
              check_consistency=False,
              notDownloadAgain=True):
     threading.Thread.__init__(self)
     self.__campaign = campaign
     self.__stype = stype
     self.__rtag = rtag
     self.__datasets = datasets
     self.__dsids = ClearFromDuplicates(
         [GetPRW_datasetID(ds) for ds in self.__datasets])
     self.__purged = []
     self.__tempdir = temp_dir
     self.__outdir = outdir
     self.__check_consistency = check_consistency
     self.__to_black_list = []
     self.__ds_to_submit = []
     self.__inconsistent_log = []
     self.__already_on_disk = [] if not notDownloadAgain or not os.path.exists(
         "%s/Finished.txt" %
         (self.download_dir())) else ReadListFromFile("%s/Finished.txt" %
                                                      (self.download_dir()))
     if check_consistency:
         getAMIDataBase().getMCDataSets(channels=self.dsids(),
                                        campaign="%s" % (self.campaign()),
                                        derivations=[])
Пример #6
0
def OpenFiles(MyList):
    ROOTFiles = []
    for Entry in MyList:
        if IsROOTFile(Entry): ROOTFiles.append(ROOT.TFile.Open(Entry))
        elif IsTextFile(Entry):
            #### Adapt for the possibility that someone passes a XAMPPplotting config
            if Entry.endswith(".conf"):
                ROOTFiles += [ROOT.TFile.Open(File) for File in readXAMPPplottingInputConfig(Entry)]
            else:
                ROOTFiles += [ROOT.TFile.Open(Line) for Line in ReadListFromFile(Entry)]
    return ROOTFiles
Пример #7
0
 def print_log_file(self, last_lines=10):
     if not os.path.exists(self.log_file()): return
     log_content = ReadListFromFile(self.log_file())
     n_lines = len(log_content)
     for i in range(max(0, n_lines - last_lines), n_lines):
         if self.thread_number() == -1:
             logging.info("<%s> %s" % (self.name(), log_content[i]))
         else:
             logging.info("<%s - %d/%d> %s" %
                          (self.name(), self.thread_number(),
                           self.thread_engine().get_array_size(
                               task_name=self.name()), log_content[i]))
Пример #8
0
def download_ci_files(options):
    ### Retrieve first the EOS token
    getEOS_token(options)
    ### Check first whether the CI dir actually exits
    smp_dir = "%s/datasamples/" % (options.ciDir)
    if not os.path.isdir(smp_dir):
        print "ERROR: The path to look up for the data samples %s does not exists. Where is my data" % (
            smp_dir)
        exit(1)

    ### Create first the directory to store the temporary files in there
    ### Clean the old remants
    CreateDirectory(options.TEMPdir, True)
    downloaded_smp = []
    for smp in os.listdir(smp_dir):
        smp_name = smp[:smp.rfind(".")]
        print "INFO: Download the files from sample %s" % (smp_name)
        download_to = "%s/%s" % (options.TEMPdir, smp_name)
        CreateDirectory(download_to, False)
        ### Download the files first
        for file_to_load in ReadListFromFile("%s/%s" % (smp_dir, smp)):
            destination_file = "%s/%s" % (
                download_to, file_to_load[file_to_load.rfind("/") + 1:])
            CopyCmd = "xrdcp %s/%s %s" % (options.EOSpath, file_to_load,
                                          destination_file)
            if os.path.exists(destination_file):
                print "INFO: Omit do download %s" % (file_to_load)
            elif os.system(CopyCmd) != 0:
                print "ERROR: Failed to download %s" % (file_to_load)
                exit(1)
        ### Write the file list for the analysis
        file_list = "%s/FileList_%s.txt" % (options.TEMPdir, smp_name)
        WriteList([
            "%s/%s" % (download_to, f[f.rfind("/") + 1:])
            for f in ReadListFromFile("%s/%s" % (smp_dir, smp))
        ], file_list)
        downloaded_smp += [smp_name]
    return downloaded_smp
Пример #9
0
def main():
    """Request datasets to RSE location."""
    CheckRucioSetup()
    CheckRemainingProxyTime()

    RunOptions = getArgumentParser().parse_args()
    List = ClearFromDuplicates(ReadListFromFile(RunOptions.list))

    ### Start replication of the datasets
    initiateReplication(ListOfDataSets=List,
                        Rucio=RunOptions.rucio,
                        RSE=RunOptions.RSE,
                        lifeTime=RunOptions.lifetime,
                        approve=RunOptions.askapproval,
                        comment=RunOptions.comment)
Пример #10
0
 def __extract_root_files(self, file_list=""):
     content = ReadListFromFile(file_list)
     if len(content) == 0:
         print "ERROR: The file %s is empty" % (in_ds)
         return []
     n_files_in_cont = len(content) - len(
         [c for c in content if IsROOTFile(c)])
     ### The list contains a list of root_files
     if n_files_in_cont == 0:
         return content
     ### It's a mixture
     elif n_files_in_cont != len(content):
         print "ERROR: You've a mixture of ROOT files and other stuff in %s" % (
             file_list)
         return []
     root_files = []
     for ds in content:
         root_files += self.__find_on_dcache(ds)
     return root_files
Пример #11
0
    def submit_job(self):
        WriteList(
            [
                D.replace(self.evgen_dir(), self.aod_dir())
                for D in ReadListFromFile(self.in_file())
            ],
            self.out_file(),
        )

        extra_args = ""
        if len(self.__preExec) > 0:
            extra_args += ' --preExec "%s" ' % (self.__preExec)
        if len(self.__preInclude) > 0:
            extra_args += ' --preInclude "%s" ' % (self.__preInclude)
        if len(self.__postExec) > 0:
            extra_args += ' --postExec "%s" ' % (self.__postExec)
        if len(self.__postInclude) > 0:
            extra_args += ' --postInclude "%s" ' % (self.__postInclude)

        if not self.engine().submit_array(
                sub_job=self.__derivation,
                script="SubmitMC/batch_derivation.sh",
                mem=self.__mem,
                env_vars=[
                    ("SeedFile", self.seed_file()),
                    ("RunFile", self.run_file()),
                    ("InFile", self.in_file()),
                    ("OutFile", self.out_file()),
                    ("DERIVATION_DIR", self.aod_dir()),
                    ("DerivationRelease", self.__derivRelease),
                    ("DerivationCache", self.__derivCache),
                    ("ReductionConf", self.__derivation),
                    ("ExtraArgs", extra_args),
                ],
                hold_jobs=self.hold_jobs(),
                run_time=self.__run_time,
                array_size=self.n_scheduled(),
        ):
            return False
        return True
Пример #12
0
def main():
    Options = setupScriptSubmitParser().parse_args()
    submit_engine = setup_engine(Options)

    list_of_cmds = submit_engine.link_to_copy_area(Options.ListOfCmds)

    if not list_of_cmds:
        logging.error(
            "Please give a valid file with list of commands to execute")
        exit(1)

    if not submit_engine.submit_build_job():
        logging.error("Submission failed")
        exit(1)
    submit_engine.submit_array(script="ClusterSubmission/Run.sh",
                               mem=Options.vmem,
                               env_vars=[("ListOfCmds", list_of_cmds)],
                               hold_jobs=Options.HoldJob,
                               run_time=Options.RunTime,
                               array_size=len(ReadListFromFile(list_of_cmds)))
    submit_engine.submit_clean_all(hold_jobs=[submit_engine.job_name()])
    submit_engine.finish()
Пример #13
0
    Sample_Dir = ResolvePath(RunOptions.ListDir)
    No_AOD = []
    TO_REQUEST = []

    if not Sample_Dir:
        logging.error("ERROR: Please give a valid  directory")
        exit(1)

    for File in os.listdir(Sample_Dir):
        if os.path.isdir("%s/%s" % (Sample_Dir, File)): continue
        logging.info("Update file list %s" % (File))

        DataSets = sorted(
            ClearFromDuplicates([
                GetPRW_datasetID(DS)
                for DS in ReadListFromFile("%s/%s" % (Sample_Dir, File))
                if DS.find("data") == -1
            ]))
        if len(DataSets) == 0: continue
        logging.info("Call the AMI database")

        DERIVATIONS = []
        NO_DERIVARTION = []
        AODs = []
        getAMIDataBase().getMCDataSets(
            channels=DataSets,
            derivations=["DAOD_%s" % (RunOptions.derivation)])
        #### Find the AODs for each DSID first
        for DSID in DataSets:
            Found_MC16a = False
            Found_MC16d = False
Пример #14
0
    parser.add_argument(
        "--log_file",
        help="Define the location of the log-file from the consistency check",
        default="%s/Merged_NTUP.log" % (os.getcwd()))
    parser.add_argument("--mergeAllTags",
                        help="Merge everything which is available",
                        default=False,
                        action='store_true')

    RunOptions = parser.parse_args()

    Required_DS = []
    if len(RunOptions.requestedDataSets) > 0:
        for requestedDS in RunOptions.requestedDataSets:
            Required_DS.extend(
                [convertToAOD(DS) for DS in ReadListFromFile(requestedDS)])

    Datasets = []
    if RunOptions.readFromList:
        if len(RunOptions.inputFile) == 0:
            print 'ERROR: Please give a file containing PRW files list when using --readFromList option!'
            sys.exit(1)

        for inputFile in RunOptions.inputFile:
            datasetsInList.extend(ReadListFromFile(inputFile))
        Datasets.extend(datasetsInList)

    else:
        print 'INFO: Looking for NTUP_PILEUP datasets in rucio...'
        for c in RunOptions.campaign:
            Datasets += GetDatasets(campaign=c,
                        default=[],
                        nargs="+")
    parser.add_argument("--remainingSplit",
                        help="Specify a remaining split of the files",
                        default=1)
    parser.add_argument("--nFilesPerJob",
                        help="Specify number of files per merge job",
                        default=10)
    parser.add_argument("--HoldJob",
                        help="Specify a list of jobs to hold on",
                        default=[])
    RunOptions = parser.parse_args()
    submit_engine = setup_engine(RunOptions)
    merging = [
        submit_engine.create_merge_interface(
            out_name=L[L.rfind("/") + 1:L.rfind(".")],
            files_to_merge=ReadListFromFile(L),
            files_per_job=RunOptions.nFilesPerJob,
            hold_jobs=RunOptions.HoldJob,
            final_split=RunOptions.remainingSplit)
        for L in RunOptions.fileLists
    ]
    for merge in merging:
        merge.submit_job()
    clean_hold = [
        submit_engine.subjob_name("merge-%s" % (merge.outFileName()))
        for merge in merging
    ]

    submit_engine.submit_clean_all(clean_hold)
Пример #16
0
def AssembleAthenaOptions(RunOptions, Parser=None, IsRemote=False):
    """
    @brief      Assemble athena options from run options and argument parser.
                The athena arguments work like this (as documented here:
                https://gitlab.cern.ch/atlas/athena/blob/21.2/Control/AthenaCommon/python/AthArgumentParser.py#L2)
    
                The command line arguments in the athena call are first passed
                to athena. Every argument that should be passed to the user code
                needs to be prepended by a single additional `-`.
    
                Example:
    
                athena.py XAMPPbase/runXAMPPbase.py  --maxEvt 100 - --noSys
                -----------------------------------------------------------
                         | job option              | athena arg  | user arg
    
    @param      RunOptions  The run options
    @param      Parser      The parser
    @param      IsRemote    Flag to toggle option parsing for pathena instead of
                            athena for running on the grid
    
    @return     List with athena command line options
    """
    Options = []
    if not IsRemote and RunOptions.testJob:
        RunOptions.noSyst = True
        RunOptions.parseFilesForPRW = True
    athena_args = ["skipEvents", "evtMax", "filesInput"]
    local_only = ["outFile", "parseFilesForPRW"] + athena_args
    from XAMPPbase.SubmitToBatch import exclusiveBatchOpt
    from XAMPPbase.SubmitToGrid import exclusiveGridOpts

    black_listed = ["jobOptions", "valgrind"] + [
        x.dest for x in exclusiveBatchOpt()._actions
    ] + [x.dest for x in exclusiveGridOpts()._actions]
    attributes = [
        att for att in dir(RunOptions)
        if not att.startswith("_") and att not in black_listed
    ]
    attributes.sort(key=lambda x: x not in athena_args)
    ath_delimiter = False
    l_delim = -1
    for att in attributes:
        if ath_delimiter and att in athena_args: ath_delimiter = False
        if not ath_delimiter and not att in athena_args:
            ath_delimiter = True
            Options += ["-"]
            l_delim = len(Options)
        ### Skip all arguments which are default from the parser
        if IsArgumentDefault(getattr(RunOptions, att), att, Parser): continue
        if IsRemote and att in local_only: continue
        ### Attributed
        if att == "filesInput" and (os.path.isfile(RunOptions.filesInput)
                                    and not IsROOTFile(RunOptions.filesInput)
                                    or os.path.isdir(RunOptions.filesInput)):
            Options += [
                "--%s '%s'" % (att, ",".join(
                    ReadListFromFile(RunOptions.filesInput)
                    if not os.path.isdir(RunOptions.filesInput) else [
                        "%s/%s" % (RunOptions.filesInput, item)
                        for item in os.listdir(RunOptions.filesInput)
                        if IsROOTFile(item)
                    ]))
            ]
        elif isinstance(getattr(RunOptions, att), bool):
            Options += ["--%s" % (att)]
        elif isinstance(getattr(RunOptions, att), list):
            Options += ["--%s %s" % (att, " ".join(getattr(RunOptions, att)))]
        else:
            Options += ["--%s %s" % (att, getattr(RunOptions, att))]
    ### No extra options were parsed. Get rid of the trailing -
    if len(Options) == l_delim:
        Options.pop()
    return Options
Пример #17
0
    def __prepare_input(self, in_ds=""):
        print "INFO <_prepare_input>: Assemble configuration for %s" % (in_ds)
        ### Name to be piped to the job
        out_name = in_ds[in_ds.rfind("/") + 1:in_ds.rfind(".")] if IsTextFile(
            in_ds) or IsROOTFile(in_ds) else in_ds
        split_dir = "%s/Datasets/%s" % (self.split_cfg_dir(), out_name)
        root_files = []
        ### Now we need to find the corresponding ROOT files
        ### 1) The dataset is a root file itself
        if IsROOTFile(in_ds):
            root_files += [in_ds]
        ### 2) The given dataset is a .txt file
        elif IsTextFile(in_ds):
            ### Find the root files from there
            root_files = self.__extract_root_files(in_ds)
            if len(root_files) == 0: return False
        ### 3) The given dataset is a directory
        elif os.path.isdir(in_ds):
            if in_ds.endswith("/"):
                in_ds = in_ds[:in_ds.rfind("/")]
                out_name = in_ds[in_ds.rfind("/") + 1:]
            split_dir = "%s/Directory/%s" % (self.split_cfg_dir(), out_name)
            root_files = [
                "%s/%s" % (in_ds, F) for F in os.listdir(in_ds)
                if IsROOTFile(F)
            ]
        ### 4) It's a logical dataset stored on d-cache
        else:
            root_files = self.__find_on_dcache(in_ds)
        if len(root_files) == 0:
            print "ERROR: Could not associate anything to %s" % (in_ds)
            return False
        if len(out_name) == 0:
            print "ERROR: How should the output be called %s" % (in_ds)
            return False

        ### Assemble the splitting of the jobs
        main_list = "%s/AllROOTFiles.main" % (split_dir)
        files_in_main = ReadListFromFile(main_list) if os.path.exists(
            main_list) else []
        ### The list is unkown or the content of ROOT files has changed
        ### Redo the splitting again ;-)
        if len(files_in_main) != len(root_files) or not IsListIn(
                files_in_main, root_files):
            print "INFO: Assemble new split for %s" % (in_ds)
            CreateDirectory(split_dir, True)
            WriteList(root_files, main_list)
            os.system("CreateBatchJobSplit -I %s -O %s -EpJ %i" %
                      (main_list, split_dir, self.__events_per_job))
        ### Each of the lists contains the ROOT files to process per each sub job
        split_lists = [
            "%s/%s" % (split_dir, F) for F in os.listdir(split_dir)
            if IsTextFile(F)
        ]
        n_jobs = len(split_lists)
        subjob_outs = [
            "%s/%s_%d.root" % (self.engine().tmp_dir(), out_name, d)
            for d in range(n_jobs)
        ]

        assembled_in = [] if not os.path.exists(
            self.job_input()) else ReadListFromFile(self.job_input())
        assembled_out = [] if not os.path.exists(
            self.job_out_names()) else ReadListFromFile(self.job_out_names())
        start_reg = len(assembled_in)

        ### Write what we've
        WriteList(assembled_in + split_lists, self.job_input())
        WriteList(assembled_out + subjob_outs, self.job_out_names())
        #### Submit the merge jobs
        self.__merge_interfaces += [
            self.engine().create_merge_interface(
                out_name=out_name,
                files_to_merge=subjob_outs,
                hold_jobs=[(self.engine().job_name(),
                            [start_reg + i + 1 for i in range(n_jobs)])],
                files_per_job=self.__files_per_merge_itr,
                final_split=self.__final_split)
        ]
        self.__nsheduled += n_jobs
        return True
Пример #18
0
def AssembleIO():
    #--------------------------------------------------------------
    # Reduce the event loop spam a bit
    #--------------------------------------------------------------
    from AthenaCommon.Logging import logging
    recoLog = logging.getLogger('MuonAnalysis I/O')
    recoLog.info('****************** STARTING the job *****************')

    if os.path.exists("%s/athfile-cache.ascii.gz" % (os.getcwd())):
        recoLog.info(
            "Old athfile-cache found. Will delete it otherwise athena just freaks out. This little boy."
        )
        os.system("rm %s/athfile-cache.ascii.gz" % (os.getcwd()))
    from GaudiSvc.GaudiSvcConf import THistSvc
    from AthenaCommon.JobProperties import jobproperties
    import AthenaPoolCnvSvc.ReadAthenaPool
    from AthenaCommon.AthenaCommonFlags import athenaCommonFlags as acf
    from AthenaServices.AthenaServicesConf import AthenaEventLoopMgr
    from AthenaCommon.AppMgr import ServiceMgr
    from ClusterSubmission.Utils import ReadListFromFile, ResolvePath, IsROOTFile
    from MuonAnalysis.Utils import IsTextFile
    ServiceMgr += AthenaEventLoopMgr(EventPrintoutInterval=1000000)

    ServiceMgr += THistSvc()
    OutFileName = "AnalysisOutput.root" if not "outFile" in globals(
    ) else outFile
    ServiceMgr.THistSvc.Output += [
        "MuonAnalysis DATAFILE='{}' OPT='RECREATE'".format(OutFileName)
    ]
    recoLog.info("Will save the job's output to " + OutFileName)
    ROOTFiles = []

    if "inputFile" in globals():
        recoLog.info("Use the following %s as input" % (inputFile))
        ROOTFiles = []
        ResolvedInFile = ResolvePath(inputFile)

        if inputFile.startswith('root://'):
            ROOTFiles.append(inputFile)

        elif ResolvedInFile and os.path.isfile(ResolvedInFile):
            if IsTextFile(ResolvedInFile):
                ROOTFiles = ReadListFromFile(ResolvedInFile)
            else:
                ROOTFiles.append(ResolvedInFile)

        elif ResolvedInFile and os.path.isdir(ResolvedInFile):
            for DirEnt in os.listdir(ResolvedInFile):
                if IsROOTFile(DirEnt):
                    if DirEnt.find(ResolvedInFile) != -1:
                        ROOTFiles.append(DirEnt)
                    else:
                        ROOTFiles.append("%s/%s" % (ResolvedInFile, DirEnt))
        else:
            raise RuntimeError("Invalid input " + inputFile)
        if len(ROOTFiles) == 0:
            raise RuntimeError("No ROOT files could be loaded as input")
        ServiceMgr.EventSelector.InputCollections = ROOTFiles
        acf.FilesInput = ROOTFiles

    if "nevents" in globals():
        recoLog.info("Only run on %i events" % (int(nevents)))
        theApp.EvtMax = int(nevents)
    if "nskip" in globals():
        recoLog.info("Skip the first %i events" % (int(nskip)))
        ServiceMgr.EventSelector.SkipEvents = int(nskip)
    """if isData(): recoLog.info("We're running over data today")
    parser.add_argument(
        '-l',
        '--list',
        help='specify a list containing the datasets to be requested',
        required=True)
    parser.add_argument("--rucio",
                        help="With this option you can set the rucio_account",
                        default=RUCIO_ACCOUNT)
    parser.add_argument(
        "--lifetime",
        help=
        "Defines a lifetime after which the rules are automatically deleted",
        type=int,
        default=-1)
    parser.add_argument("--askapproval",
                        help="Asks for approval of the request",
                        default=False,
                        action="store_true")
    parser.add_argument("--comment", help="Comment", default="")

    RunOptions = parser.parse_args()
    List = ReadListFromFile(RunOptions.list)

    ### Start replication of the datasets
    initiateReplication(ListOfDataSets=List,
                        Rucio=RunOptions.rucio,
                        RSE=RunOptions.RSE,
                        lifeTime=RunOptions.lifetime,
                        approve=RunOptions.askapproval,
                        comment=RunOptions.comment)
Пример #20
0
                        help='Changes the RunTime of the analysis Jobs',
                        default='07:59:59')
    parser.add_argument('--vmem',
                        help='Changes the virtual memory needed by each jobs',
                        type=int,
                        default=2000)
    return parser


if __name__ == '__main__':
    Options = setupScriptSubmitParser().parse_args()
    submit_engine = setup_engine(Options)

    list_of_cmds = submit_engine.link_to_copy_area(Options.ListOfCmds)

    if not list_of_cmds:
        print "ERROR: Please give a valid file with list of commands to execute"
        exit(1)

    if not submit_engine.submit_build_job():
        print "ERROR: Submission failed"
        exit(1)
    submit_engine.submit_array(script="ClusterSubmission/Run.sh",
                               mem=Options.vmem,
                               env_vars=[("ListOfCmds", list_of_cmds)],
                               hold_jobs=Options.HoldJob,
                               run_time=Options.RunTime,
                               array_size=len(ReadListFromFile(list_of_cmds)))
    submit_engine.submit_clean_all(hold_jobs=[submit_engine.job_name()])
    submit_engine.finish()
    OutDir = os.getcwd()

    parser = argparse.ArgumentParser(
        description=
        'This script converts DAOD filelists to AOD filelists which then can be used for creating pileup reweighting files.',
        prog='CreateAODFromDAODList',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--datasets',
                        '-d',
                        '-D',
                        help='DAOD filelist to be converted into AOD',
                        required=True)
    parser.add_argument('--outFile',
                        help="pipe the output into a script into a file",
                        default='')
    RunOptions = parser.parse_args()

    print 'The following DAODs are converted into ADOs:\n'
    DAODsToConvert = [
        convertToAOD(daod) for daod in ReadListFromFile(RunOptions.datasets)
    ]

    print '\nThe ADOs are:\n'

    for daod in DAODsToConvert:
        print "   --- %s" % (daod)

    if len(RunOptions.outFile) > 0:
        WriteList(DAODsToConvert, RunOptions.outFile)
Пример #22
0
def main():
    """Merge files from a list using the MergeClass in ClusterEngine."""
    RunOptions = getArgumentParser().parse_args()
    if RunOptions.fileListsFolder != "":
        if len(RunOptions.fileLists) > 0:
            logging.warning(
                'You gave both a folder containing filelists and separate filelists, will merge both!'
            )
        if not os.path.isdir(RunOptions.fileListsFolder):
            logging.error(' %s is not a directory, exiting...' %
                          RunOptions.fileListsFolder)
            sys.exit(1)
        for l in os.listdir(RunOptions.fileListsFolder):
            if not os.path.isdir('%s/%s' % (RunOptions.fileListsFolder, l)):
                RunOptions.fileLists.append('%s/%s' %
                                            (RunOptions.fileListsFolder, l))
    submit_engine = setup_engine(RunOptions)
    merging = [
        submit_engine.create_merge_interface(
            out_name=L[L.rfind("/") + 1:L.rfind(".")],
            files_to_merge=ReadListFromFile(L),
            files_per_job=RunOptions.nFilesPerJob,
            hold_jobs=RunOptions.HoldJob,
            final_split=RunOptions.remainingSplit)
        for L in RunOptions.fileLists
    ]
    ### Rucio lists
    if len(RunOptions.RucioDSList) > 0:
        CheckRucioSetup()
        CheckRemainingProxyTime()
        #### Check that we can actually obtain the datasets
        if len(RunOptions.RucioRSE) == 0 and not RunOptions.download:
            logging.error(
                "Please specifiy either the RSE on which the datasets are stored via --RucioRSE or activate the download option"
            )
            exit(1)

        ds_to_merge = ReadListFromFile(RunOptions.RucioDSList)
        download_dir = submit_engine.tmp_dir() + "TMP_DOWNLOAD/"
        if RunOptions.download:
            downloadDataSets(InputDatasets=ds_to_merge,
                             Destination=download_dir,
                             RSE=RunOptions.RucioRSE,
                             use_singularity=False)

        to_wait = []
        hold_jobs = []
        for ds in ds_to_merge:
            ds_name = ds[ds.find(":") + 1:]
            if RunOptions.batch_size <= 0:
                merging += [
                    submit_engine.create_merge_interface(
                        out_name=ds_name,
                        files_to_merge=GetDataSetFiles(dsname=ds,
                                                       RSE=RunOptions.RucioRSE,
                                                       protocols="root")
                        if not RunOptions.download else [
                            download_dir + ds_name + "/" + x
                            for x in os.listdir(download_dir + ds_name)
                        ],
                        files_per_job=RunOptions.nFilesPerJob,
                        hold_jobs=RunOptions.HoldJob + hold_jobs,
                        final_split=RunOptions.remainingSplit)
                ]

            else:
                merging += [
                    DataSetFileHandler(rucio_container=ds,
                                       dest_rse=RunOptions.RucioRSE,
                                       download=RunOptions.download,
                                       merge=True,
                                       download_dir=download_dir,
                                       destination_dir=submit_engine.out_dir(),
                                       cluster_engine=submit_engine,
                                       max_merged_size=RunOptions.batch_size *
                                       1024 * 1024 * 1024,
                                       hold_jobs=RunOptions.HoldJob +
                                       hold_jobs,
                                       files_per_merge_job=2)
                ]
            to_wait += [submit_engine.subjob_name(merging[-1].job_name())]
            if len(to_wait) % 5 == 0:
                hold_jobs = [w for w in to_wait]
                to_wait = []
    for merge in merging:
        merge.submit_job()

    clean_hold = [
        submit_engine.subjob_name(merge.job_name()) for merge in merging
    ]

    submit_engine.submit_clean_all(clean_hold)
    submit_engine.finish()