total_summary = {} nsamples = 0 for year, sample_map in year_sample_map: merged_dir = "/nfs-7/userdata/{}/tupler_babies/merged/FT/{}/output/year_{}".format( os.getenv("USER"), tag, year) for dsname, shortname in sample_map: if "/hadoop/" in dsname: sample = DirectorySample( dataset=dsname.split("|", 1)[0].strip(), location=dsname.split("|", 1)[1].strip(), ) else: sample = SNTSample( dataset=dsname, exclude_tag_pattern= "CMS4_V08-*", # ignore new samples by sicheng for 2016 tag=tag_match, # tag="CMS4_V09-04-13", # if not specified, get latest tag ) skip_tail = "/SMS" in dsname # skip_tail = False task = CondorTask( sample=sample, files_per_output=split_func(dsname), output_name="output.root", tag=tag, min_completion_fraction=0.90 if skip_tail else 1.0, condor_submit_params={ # "sites":"T2_US_UCSD,UCSB", # I/O is hella faster "sites": "T2_US_UCSD", # I/O is hella faster
for babyname, dsname in snt_samples.items(): cmsswver = "CMSSW_10_1_0" scramarch = "slc6_amd64_gcc700" tag = "v30_1" tarfile = "tarfiles/input_" + tag + ".tar.gz" cms3tag = "CMS4_V08-00-06" if "80X" in dsname else None excltag = "CMS4_V09*" samptyp = "1" if "SMS" in babyname else "2" if ( "GJets" in dsname or "Photon" in dsname or "Gamma" in dsname) else "0" extrarg = "" # extrarg = " topcandTree=topcands" if "TTJets" in dsname else "" maker_task = CondorTask( sample=SNTSample(dataset=dsname, tag=cms3tag, exclude_tag_pattern=excltag), files_per_output=20 if "data" in babyname else 1 if "SMS" in babyname else 2, tag=tag, outdir_name="stopBaby_" + babyname, output_name="stopbaby.root", executable="condor_executable.sh", cmssw_version=cmsswver, scram_arch=scramarch, arguments=samptyp + extrarg, tarfile=tarfile, # condor_submit_params = {"sites": "T2_US_UCSD"}, # condor_submit_params = {"sites": "UAF"}, condor_submit_params={"use_xrootd": True}, # max_jobs = 1, # temporary for submission test
"TTBAR_PH", "/TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM": "TTTTnew", "/TTTW_TuneCP5_13TeV-madgraph-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM": "TTTW", } # submission tag tag = "v1_PMtest" merged_dir = "/nfs-7/userdata/{}/tupler_babies/merged/FT/{}/output/".format( os.getenv("USER"), tag) for dsname, shortname in sample_map.items(): task = CondorTask( sample=SNTSample( dataset=dsname, # tag="CMS4_V09-04-13", # if not specified, get latest CMS4 tag ), files_per_output=split_func(dsname), output_name="output.root", tag=tag, condor_submit_params={"use_xrootd": True}, cmssw_version="CMSSW_9_2_8", input_executable= "inputs/condor_executable_metis.sh", # your condor executable here tarfile= "inputs/package.tar.xz", # your tarfile with assorted goodies here special_dir= "FTbabies/", # output files into /hadoop/cms/store/<user>/<special_dir> ) # When babymaking task finishes, fire off a task that takes outputs and merges them locally (hadd) # into a file that ends up on nfs (specified by `merged_dir` above)
multiboson_map = { "/WW_TuneCP5_13TeV-pythia8/RunIIFall17MiniAOD-94X_mc2017_realistic_v10-v1/MINIAODSIM" : "ww", "/WZ_TuneCP5_13TeV-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM" : "wz" } # sample_map = ttbar_map + singletop_map + ttX_map + zinv_map + qcd_map + dy_map + wjets_map + gjets_map + multiboson_map sample_map = multiboson_map # submission tag tag = "CMS4_V09-04-19" merged_dir = "/nfs-6/userdata/dpgilber/{}/".format(tag) tasks = {} merge_tasks = {} for dsname,shortname in sample_map.items(): task = CondorTask( sample = SNTSample( dataset=dsname, # tag="CMS4_V09-04-13", # uncomment this and set it to the desired tag, otherwise Metis will run on the most recent version ), files_per_output = 1, output_name = "output.root", tag = tag, condor_submit_params = {"use_xrootd":True}, cmssw_version = "CMSSW_9_4_9", input_executable = mt2home+"/babymaker/batchsubmit/metis_executable.sh", # your condor executable here tarfile = mt2home+"/babymaker/batchsubmit/job_input/input.tar.xz", # your tarfile with assorted goodies here special_dir = "mt2babies", # output files into /hadoop/cms/store/<user>/<special_dir> ) # When babymaking task finishes, fire off a task that takes outputs and merges them locally (hadd) # into a file that ends up on nfs (specified by `merged_dir` above) merge_task = LocalMergeTask( input_filenames=task.get_outputs(), output_filename="{}/{}.root".format(merged_dir,shortname)
] # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes # The globber must be customized (by default, it is *.root) in order to pick up the text files # Make a CondorTask (3 in total, one for each input) maker_tasks = [] merge_tasks = [] for dsname in dataset_names: cmsswver = "CMSSW_9_4_0_pre2" scramarch = "slc6_amd64_gcc700" tarfile = "input.tar.gz" tag = "v25_3" maker_task = CondorTask( sample=SNTSample(dataset=dsname), files_per_output=1, tag=tag, outdir_name="stopBaby_" + dsname[5:34].strip("_"), output_name="stopbaby.root", executable="condor_executable.sh", cmssw_version=cmsswver, scram_arch=scramarch, arguments="1" if dsname[:4] == "/SMS" else "0", # isFastsim tarfile=tarfile, condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"}, ) merge_task = CondorTask( sample=DirectorySample( dataset=dsname.replace("MINIAODSIM", "MERGE"), location=maker_task.get_outputdir(),
def get_tasks(samples_dictionary, year, baby_type, baby_version_tag, dotestrun=False, files_per_output_func=UNITY): job_tag = "{}{}_{}".format(baby_type, year, baby_version_tag) # file/dir paths main_dir = os.path.dirname(os.path.abspath(__file__)) metis_path = os.path.dirname(os.path.dirname(metis.__file__)) tar_path = os.path.join(metis_path, "package_{}.tar".format(job_tag)) tar_gz_path = tar_path + ".gz" exec_path = os.path.join(main_dir, "metis.sh") merge_exec_path = os.path.join(main_dir, "merge.sh") hadoop_path = "metis/{}/{}".format(hadoop_dirname, job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path" # Extra arguments that will be passed on to ./processBaby to specific which baby to create from the babymaker binary executable args = "" if job_tag.find("WVZ") != -1: args = "1" if job_tag.find("Dilep") != -1: args = "2" if job_tag.find("Trilep") != -1: args = "3" if job_tag.find("WVZMVA") != -1: args = "4" if job_tag.find("Truth") != -1: args = "5" if job_tag.find("WVZAll") != -1: args = "6" # Change directory to metis os.chdir(metis_path) tasks = [] # BEGIN Sample Loop --------> # loop over the samples for sample in sorted(samples_dictionary.iterkeys()): # # Job 1 : Creating baby # # define the task maker_task = CondorTask( sample = SNTSample(dataset=sample, # exclude_tag_pattern="CMS4_V08-*", # ignore new samples by sicheng for 2016 exclude_tag_pattern="*v516*", # ignore new samples by sicheng for 2016 ), tag = job_tag, arguments = args, executable = exec_path, tarfile = tar_gz_path, special_dir = hadoop_path, output_name = "output.root", files_per_output = files_per_output_func(sample), # files_per_output = 1, condor_submit_params = {"sites" : "T2_US_UCSD"}, # condor_submit_params = {"sites" : "UAF,T2_US_Wisconsin,T2_US_Florida,T2_US_Nebraska,T2_US_Caltech,T2_US_MIT,T2_US_Purdue"}, # condor_submit_params = {"sites" : "UAF,T2_US_Wisconsin,T2_US_Florida,T2_US_Nebraska,T2_US_Caltech,T2_US_MIT"}, # condor_submit_params = {"sites" : "UAF"}, open_dataset = False, flush = True, max_jobs = 5 if dotestrun else 0, # min_completion_fraction = 1.0 if "Run201" in sample else 0.9, # min_completion_fraction = 0.9, #no_load_from_backup = True, ) print sample, job_tag tasks.append(maker_task) # # Job 2 : Merging baby outputs # if maker_task.complete() and not dotestrun: merge_sample_name = "/MERGE_"+sample[1:] #merge_task = CondorTask( # sample = DirectorySample(dataset=merge_sample_name, location=maker_task.get_outputdir()), # # open_dataset = True, flush = True, # executable = merge_exec_path, # tarfile = tar_gz_path, # files_per_output = 1, # output_dir = maker_task.get_outputdir() + "/merged", # output_name = samples_dictionary[sample] + ".root", # condor_submit_params = {"sites":"T2_US_UCSD"}, # output_is_tree = True, # # check_expectedevents = True, # tag = job_tag, # cmssw_version = "CMSSW_9_2_0", # scram_arch = "slc6_amd64_gcc530", # #no_load_from_backup = True, # max_jobs = 1, # ) # merge_task.reset_io_mapping() # merge_task.update_mapping() # tasks.append(merge_task) # merge_task = LocalMergeTask( # input_filenames=maker_task.get_outputs(), # output_filename="{}/{}.root".format(maker_task.get_outputdir() + "/merged", samples_dictionary[sample]), # ignore_bad = False, # ) input_arg = maker_task.get_outputs()[0].name.replace("_1.root", "") hadd_command = "sh ../rooutil/addHistos.sh /tmp/{}_1 {}".format(samples_dictionary[sample], input_arg) hadoop_output = "{}/{}_1.root".format(maker_task.get_outputdir() + "/merged", samples_dictionary[sample]) cp_command = "cp /tmp/{}_1.root {}".format(samples_dictionary[sample], hadoop_output) print "" print "" print "" print "" print "" print hadoop_output if not os.path.exists(hadoop_output): print hadd_command # os.system(hadd_command) print cp_command # os.system(cp_command) print "" print "" print "" print "" # if not merge_task.complete(): # merge_task.process() # <------ END Sample Loop return tasks
def get_tasks(samples_dictionary, year, baby_type, baby_version_tag, dotestrun=False): job_tag = "{}{}_{}".format(baby_type, year, baby_version_tag) # file/dir paths main_dir = os.path.dirname(os.path.abspath(__file__)) metis_path = os.path.dirname(os.path.dirname(metis.__file__)) tar_path = os.path.join(metis_path, "package.tar") tar_gz_path = tar_path + ".gz" exec_path = os.path.join(main_dir, "metis.sh") merge_exec_path = os.path.join(main_dir, "merge.sh") hadoop_path = "metis/wwwbaby/{}".format(job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path" # Extra arguments that will be passed on to ./processBaby to specific which baby to create from the babymaker binary executable if job_tag.find("WWW") != -1: args = "0" # WWWBaby elif job_tag.find("FR") != -1: args = "1" # FRBaby elif job_tag.find("OS") != -1: args = "2" # OSBaby elif job_tag.find("TnP") != -1: args = "3" # TnPBaby elif job_tag.find("All") != -1: args = "4" # AllBaby elif job_tag.find("POG") != -1: args = "5" # POGBaby elif job_tag.find("Loose") != -1: args = "6" # LooseBaby # Change directory to metis os.chdir(metis_path) tasks = [] # BEGIN Sample Loop --------> # loop over the samples for sample in sorted(samples_dictionary.iterkeys()): # # Job 1 : Creating baby # # define the task maker_task = CondorTask( sample = SNTSample(dataset=sample, exclude_tag_pattern="CMS4_V08-*", # ignore new samples by sicheng for 2016 ), tag = job_tag, arguments = args, executable = exec_path, tarfile = tar_gz_path, special_dir = hadoop_path, output_name = "output.root", files_per_output = 4, condor_submit_params = {"sites" : "T2_US_UCSD"}, open_dataset = False, flush = True, max_jobs = 5 if dotestrun else 0 #no_load_from_backup = True, ) print sample, job_tag tasks.append(maker_task) # # Job 2 : Merging baby outputs # if maker_task.complete() and not dotestrun: merge_sample_name = "/MERGE_"+sample[1:] merge_task = CondorTask( sample = DirectorySample(dataset=merge_sample_name, location=maker_task.get_outputdir()), # open_dataset = True, flush = True, executable = merge_exec_path, tarfile = tar_gz_path, files_per_output = 100000, output_dir = maker_task.get_outputdir() + "/merged", output_name = samples_dictionary[sample] + ".root", condor_submit_params = {"sites":"T2_US_UCSD"}, output_is_tree = True, # check_expectedevents = True, tag = job_tag, cmssw_version = "CMSSW_9_2_0", scram_arch = "slc6_amd64_gcc530", #no_load_from_backup = True, ) merge_task.reset_io_mapping() merge_task.update_mapping() tasks.append(merge_task) # <------ END Sample Loop return tasks
def test_everything(self): nfiles = 5 tag = "v1" dsname = "/DummyDataset/Dummy/TEST" basedir = "/tmp/{0}/metis/sntsample_test/".format(os.getenv("USER")) # make a directory, touch <nfiles> files Utils.do_cmd("mkdir -p {0} ; rm {0}/*.root".format(basedir)) for i in range(1, nfiles + 1): Utils.do_cmd("touch {0}/output_{1}.root".format(basedir, i)) # push a dummy dataset to DIS using the dummy location # and make sure we updated the sample without problems dummy = SNTSample( dataset=dsname, tag=tag, read_only=True, # note that this is the default! ) dummy.info["location"] = basedir dummy.info["nevents"] = 123 dummy.info["gtag"] = "stupidtag" # will fail the first time, since it's read only updated = dummy.do_update_dis() self.assertEqual(updated, False) # flip the bool and updating should succeed dummy.read_only = False updated = dummy.do_update_dis() self.assertEqual(updated, True) # make a new sample, retrieve from DIS, and check # that the location was written properly check = SNTSample( dataset=dsname, tag=tag, ) self.assertEqual(len(check.get_files()), nfiles) self.assertEqual(check.get_globaltag(), dummy.info["gtag"]) self.assertEqual(check.get_nevents(), dummy.info["nevents"]) self.assertEqual(check.get_location(), basedir)