Пример #1
0
 total_summary = {}
 nsamples = 0
 for year, sample_map in year_sample_map:
     merged_dir = "/nfs-7/userdata/{}/tupler_babies/merged/FT/{}/output/year_{}".format(
         os.getenv("USER"), tag, year)
     for dsname, shortname in sample_map:
         if "/hadoop/" in dsname:
             sample = DirectorySample(
                 dataset=dsname.split("|", 1)[0].strip(),
                 location=dsname.split("|", 1)[1].strip(),
             )
         else:
             sample = SNTSample(
                 dataset=dsname,
                 exclude_tag_pattern=
                 "CMS4_V08-*",  # ignore new samples by sicheng for 2016
                 tag=tag_match,
                 # tag="CMS4_V09-04-13", # if not specified, get latest tag
             )
         skip_tail = "/SMS" in dsname
         # skip_tail = False
         task = CondorTask(
             sample=sample,
             files_per_output=split_func(dsname),
             output_name="output.root",
             tag=tag,
             min_completion_fraction=0.90 if skip_tail else 1.0,
             condor_submit_params={
                 # "sites":"T2_US_UCSD,UCSB",  # I/O is hella faster
                 "sites":
                 "T2_US_UCSD",  # I/O is hella faster
Пример #2
0
    for babyname, dsname in snt_samples.items():
        cmsswver = "CMSSW_10_1_0"
        scramarch = "slc6_amd64_gcc700"
        tag = "v30_1"
        tarfile = "tarfiles/input_" + tag + ".tar.gz"
        cms3tag = "CMS4_V08-00-06" if "80X" in dsname else None
        excltag = "CMS4_V09*"
        samptyp = "1" if "SMS" in babyname else "2" if (
            "GJets" in dsname or "Photon" in dsname
            or "Gamma" in dsname) else "0"
        extrarg = ""
        # extrarg = " topcandTree=topcands" if "TTJets" in dsname else ""
        maker_task = CondorTask(
            sample=SNTSample(dataset=dsname,
                             tag=cms3tag,
                             exclude_tag_pattern=excltag),
            files_per_output=20
            if "data" in babyname else 1 if "SMS" in babyname else 2,
            tag=tag,
            outdir_name="stopBaby_" + babyname,
            output_name="stopbaby.root",
            executable="condor_executable.sh",
            cmssw_version=cmsswver,
            scram_arch=scramarch,
            arguments=samptyp + extrarg,
            tarfile=tarfile,
            # condor_submit_params = {"sites": "T2_US_UCSD"},
            # condor_submit_params = {"sites": "UAF"},
            condor_submit_params={"use_xrootd": True},
            # max_jobs = 1,      # temporary for submission test
Пример #3
0
        "TTBAR_PH",
        "/TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM":
        "TTTTnew",
        "/TTTW_TuneCP5_13TeV-madgraph-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM":
        "TTTW",
    }

    # submission tag
    tag = "v1_PMtest"

    merged_dir = "/nfs-7/userdata/{}/tupler_babies/merged/FT/{}/output/".format(
        os.getenv("USER"), tag)
    for dsname, shortname in sample_map.items():
        task = CondorTask(
            sample=SNTSample(
                dataset=dsname,
                # tag="CMS4_V09-04-13", # if not specified, get latest CMS4 tag
            ),
            files_per_output=split_func(dsname),
            output_name="output.root",
            tag=tag,
            condor_submit_params={"use_xrootd": True},
            cmssw_version="CMSSW_9_2_8",
            input_executable=
            "inputs/condor_executable_metis.sh",  # your condor executable here
            tarfile=
            "inputs/package.tar.xz",  # your tarfile with assorted goodies here
            special_dir=
            "FTbabies/",  # output files into /hadoop/cms/store/<user>/<special_dir>
        )
        # When babymaking task finishes, fire off a task that takes outputs and merges them locally (hadd)
        # into a file that ends up on nfs (specified by `merged_dir` above)
Пример #4
0
    multiboson_map = {
        "/WW_TuneCP5_13TeV-pythia8/RunIIFall17MiniAOD-94X_mc2017_realistic_v10-v1/MINIAODSIM"                    : "ww",
        "/WZ_TuneCP5_13TeV-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM" : "wz"
        }
#    sample_map = ttbar_map + singletop_map + ttX_map + zinv_map + qcd_map + dy_map + wjets_map + gjets_map + multiboson_map
    sample_map = multiboson_map

    # submission tag
    tag = "CMS4_V09-04-19"
    merged_dir = "/nfs-6/userdata/dpgilber/{}/".format(tag)
    tasks = {}
    merge_tasks = {}
    for dsname,shortname in sample_map.items():
        task = CondorTask(
                sample = SNTSample(
                    dataset=dsname,
                    # tag="CMS4_V09-04-13", # uncomment this and set it to the desired tag, otherwise Metis will run on the most recent version
                    ),
                files_per_output = 1,
                output_name = "output.root",
                tag = tag,
                condor_submit_params = {"use_xrootd":True},
                cmssw_version = "CMSSW_9_4_9",
                input_executable = mt2home+"/babymaker/batchsubmit/metis_executable.sh", # your condor executable here
                tarfile = mt2home+"/babymaker/batchsubmit/job_input/input.tar.xz", # your tarfile with assorted goodies here
                special_dir = "mt2babies", # output files into /hadoop/cms/store/<user>/<special_dir>
        )
        # When babymaking task finishes, fire off a task that takes outputs and merges them locally (hadd)
        # into a file that ends up on nfs (specified by `merged_dir` above)
        merge_task = LocalMergeTask(
                input_filenames=task.get_outputs(),
                output_filename="{}/{}.root".format(merged_dir,shortname)
Пример #5
0
    ]

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files

    # Make a CondorTask (3 in total, one for each input)
    maker_tasks = []
    merge_tasks = []

    for dsname in dataset_names:
        cmsswver = "CMSSW_9_4_0_pre2"
        scramarch = "slc6_amd64_gcc700"
        tarfile = "input.tar.gz"
        tag = "v25_3"
        maker_task = CondorTask(
            sample=SNTSample(dataset=dsname),
            files_per_output=1,
            tag=tag,
            outdir_name="stopBaby_" + dsname[5:34].strip("_"),
            output_name="stopbaby.root",
            executable="condor_executable.sh",
            cmssw_version=cmsswver,
            scram_arch=scramarch,
            arguments="1" if dsname[:4] == "/SMS" else "0",  # isFastsim
            tarfile=tarfile,
            condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        )
        merge_task = CondorTask(
            sample=DirectorySample(
                dataset=dsname.replace("MINIAODSIM", "MERGE"),
                location=maker_task.get_outputdir(),
Пример #6
0
def get_tasks(samples_dictionary, year, baby_type, baby_version_tag, dotestrun=False, files_per_output_func=UNITY):

    job_tag = "{}{}_{}".format(baby_type, year, baby_version_tag)

    # file/dir paths
    main_dir             = os.path.dirname(os.path.abspath(__file__))
    metis_path           = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path             = os.path.join(metis_path, "package_{}.tar".format(job_tag))
    tar_gz_path          = tar_path + ".gz"
    exec_path            = os.path.join(main_dir, "metis.sh")
    merge_exec_path      = os.path.join(main_dir, "merge.sh")
    hadoop_path          = "metis/{}/{}".format(hadoop_dirname, job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"

    # Extra arguments that will be passed on to ./processBaby to specific which baby to create from the babymaker binary executable
    args = ""
    if job_tag.find("WVZ") != -1:
        args = "1"
    if job_tag.find("Dilep") != -1:
        args = "2"
    if job_tag.find("Trilep") != -1:
        args = "3"
    if job_tag.find("WVZMVA") != -1:
        args = "4"
    if job_tag.find("Truth") != -1:
        args = "5"
    if job_tag.find("WVZAll") != -1:
        args = "6"

    # Change directory to metis
    os.chdir(metis_path)

    tasks = []

    # BEGIN Sample Loop -------->
    # loop over the samples
    for sample in sorted(samples_dictionary.iterkeys()):

        #
        # Job 1 : Creating baby
        #

        # define the task
        maker_task = CondorTask(
                sample               = SNTSample(dataset=sample,
                                                 # exclude_tag_pattern="CMS4_V08-*", # ignore new samples by sicheng for 2016 
                                                 exclude_tag_pattern="*v516*", # ignore new samples by sicheng for 2016 
                                                 ),
                tag                  = job_tag,
                arguments            = args,
                executable           = exec_path,
                tarfile              = tar_gz_path,
                special_dir          = hadoop_path,
                output_name          = "output.root",
                files_per_output     = files_per_output_func(sample),
                # files_per_output     = 1,
                condor_submit_params = {"sites" : "T2_US_UCSD"},
                # condor_submit_params = {"sites" : "UAF,T2_US_Wisconsin,T2_US_Florida,T2_US_Nebraska,T2_US_Caltech,T2_US_MIT,T2_US_Purdue"},
                # condor_submit_params = {"sites" : "UAF,T2_US_Wisconsin,T2_US_Florida,T2_US_Nebraska,T2_US_Caltech,T2_US_MIT"},
                # condor_submit_params = {"sites" : "UAF"},
                open_dataset         = False,
                flush                = True,
                max_jobs             = 5 if dotestrun else 0,
                # min_completion_fraction = 1.0 if "Run201" in sample else 0.9,
                # min_completion_fraction = 0.9,
                #no_load_from_backup  = True,
                )

        print sample, job_tag

        tasks.append(maker_task)

        #
        # Job 2 : Merging baby outputs
        #

        if maker_task.complete() and not dotestrun:

            merge_sample_name = "/MERGE_"+sample[1:]

            #merge_task = CondorTask(
            #        sample                 = DirectorySample(dataset=merge_sample_name, location=maker_task.get_outputdir()),
            #        # open_dataset         = True, flush = True,
            #        executable             = merge_exec_path,
            #        tarfile                = tar_gz_path,
            #        files_per_output       = 1,
            #        output_dir             = maker_task.get_outputdir() + "/merged",
            #        output_name            = samples_dictionary[sample] + ".root",
            #        condor_submit_params   = {"sites":"T2_US_UCSD"},
            #        output_is_tree         = True,
            #        # check_expectedevents = True,
            #        tag                    = job_tag,
            #        cmssw_version          = "CMSSW_9_2_0",
            #        scram_arch             = "slc6_amd64_gcc530",
            #        #no_load_from_backup    = True,
            #        max_jobs               = 1,
            #        )
            # merge_task.reset_io_mapping()
            # merge_task.update_mapping()
            # tasks.append(merge_task)

            # merge_task = LocalMergeTask(
            #         input_filenames=maker_task.get_outputs(),
            #         output_filename="{}/{}.root".format(maker_task.get_outputdir() + "/merged", samples_dictionary[sample]),
            #         ignore_bad = False,
            #         )

            input_arg = maker_task.get_outputs()[0].name.replace("_1.root", "")
            hadd_command = "sh ../rooutil/addHistos.sh /tmp/{}_1 {}".format(samples_dictionary[sample], input_arg)
            hadoop_output = "{}/{}_1.root".format(maker_task.get_outputdir() + "/merged", samples_dictionary[sample])
            cp_command = "cp /tmp/{}_1.root {}".format(samples_dictionary[sample], hadoop_output)

            print ""
            print ""
            print ""
            print ""
            print ""

            print hadoop_output

            if not os.path.exists(hadoop_output):

                print hadd_command
                # os.system(hadd_command)
                print cp_command
                # os.system(cp_command)

            print ""
            print ""
            print ""
            print ""

            # if not merge_task.complete():
            #     merge_task.process()


    # <------ END Sample Loop

    return tasks
Пример #7
0
def get_tasks(samples_dictionary, year, baby_type, baby_version_tag, dotestrun=False):

    job_tag = "{}{}_{}".format(baby_type, year, baby_version_tag)

    # file/dir paths
    main_dir             = os.path.dirname(os.path.abspath(__file__))
    metis_path           = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path             = os.path.join(metis_path, "package.tar")
    tar_gz_path          = tar_path + ".gz"
    exec_path            = os.path.join(main_dir, "metis.sh")
    merge_exec_path      = os.path.join(main_dir, "merge.sh")
    hadoop_path          = "metis/wwwbaby/{}".format(job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"

    # Extra arguments that will be passed on to ./processBaby to specific which baby to create from the babymaker binary executable
    if job_tag.find("WWW") != -1:
        args = "0" # WWWBaby
    elif job_tag.find("FR") != -1:
        args = "1" # FRBaby
    elif job_tag.find("OS") != -1:
        args = "2" # OSBaby
    elif job_tag.find("TnP") != -1:
        args = "3" # TnPBaby
    elif job_tag.find("All") != -1:
        args = "4" # AllBaby
    elif job_tag.find("POG") != -1:
        args = "5" # POGBaby
    elif job_tag.find("Loose") != -1:
        args = "6" # LooseBaby

    # Change directory to metis
    os.chdir(metis_path)

    tasks = []

    # BEGIN Sample Loop -------->
    # loop over the samples
    for sample in sorted(samples_dictionary.iterkeys()):

        #
        # Job 1 : Creating baby
        #

        # define the task
        maker_task = CondorTask(
                sample               = SNTSample(dataset=sample,
                                                 exclude_tag_pattern="CMS4_V08-*", # ignore new samples by sicheng for 2016 
                                                 ),
                tag                  = job_tag,
                arguments            = args,
                executable           = exec_path,
                tarfile              = tar_gz_path,
                special_dir          = hadoop_path,
                output_name          = "output.root",
                files_per_output     = 4,
                condor_submit_params = {"sites" : "T2_US_UCSD"},
                open_dataset         = False,
                flush                = True,
                max_jobs             = 5 if dotestrun else 0
                #no_load_from_backup  = True,
                )

        print sample, job_tag

        tasks.append(maker_task)

        #
        # Job 2 : Merging baby outputs
        #

        if maker_task.complete() and not dotestrun:

            merge_sample_name = "/MERGE_"+sample[1:]

            merge_task = CondorTask(
                    sample                 = DirectorySample(dataset=merge_sample_name, location=maker_task.get_outputdir()),
                    # open_dataset         = True, flush = True,
                    executable             = merge_exec_path,
                    tarfile                = tar_gz_path,
                    files_per_output       = 100000,
                    output_dir             = maker_task.get_outputdir() + "/merged",
                    output_name            = samples_dictionary[sample] + ".root",
                    condor_submit_params   = {"sites":"T2_US_UCSD"},
                    output_is_tree         = True,
                    # check_expectedevents = True,
                    tag                    = job_tag,
                    cmssw_version          = "CMSSW_9_2_0",
                    scram_arch             = "slc6_amd64_gcc530",
                    #no_load_from_backup    = True,
                    )
            merge_task.reset_io_mapping()
            merge_task.update_mapping()

            tasks.append(merge_task)

    # <------ END Sample Loop

    return tasks
Пример #8
0
    def test_everything(self):
        nfiles = 5
        tag = "v1"
        dsname = "/DummyDataset/Dummy/TEST"
        basedir = "/tmp/{0}/metis/sntsample_test/".format(os.getenv("USER"))

        # make a directory, touch <nfiles> files
        Utils.do_cmd("mkdir -p {0} ; rm {0}/*.root".format(basedir))
        for i in range(1, nfiles + 1):
            Utils.do_cmd("touch {0}/output_{1}.root".format(basedir, i))

        # push a dummy dataset to DIS using the dummy location
        # and make sure we updated the sample without problems
        dummy = SNTSample(
            dataset=dsname,
            tag=tag,
            read_only=True,  # note that this is the default!
        )
        dummy.info["location"] = basedir
        dummy.info["nevents"] = 123
        dummy.info["gtag"] = "stupidtag"

        # will fail the first time, since it's read only
        updated = dummy.do_update_dis()
        self.assertEqual(updated, False)

        # flip the bool and updating should succeed
        dummy.read_only = False
        updated = dummy.do_update_dis()
        self.assertEqual(updated, True)

        # make a new sample, retrieve from DIS, and check
        # that the location was written properly
        check = SNTSample(
            dataset=dsname,
            tag=tag,
        )
        self.assertEqual(len(check.get_files()), nfiles)
        self.assertEqual(check.get_globaltag(), dummy.info["gtag"])
        self.assertEqual(check.get_nevents(), dummy.info["nevents"])
        self.assertEqual(check.get_location(), basedir)