Exemplo n.º 1
0
def get_tasks():

    pds = [
            "MuonEG","SingleElectron","MET","SinglePhoton","SingleMuon","DoubleMuon","JetHT","DoubleEG","HTMHT"
            ]
    out = dis.query("/*/Run2016*-17Jul2018*/MINIAOD")
    all_names = out["response"]["payload"]
    all_names = sorted([ds for ds in all_names if any("/{}/".format(pd) in ds for pd in pds)])

    out = dis.query("/*/Run2016*-17Jul2018*/MINIAOD,cms3tag=*17 | grep dataset_name",typ="snt")
    already_done = out["response"]["payload"]
    already_done = sorted([ds for ds in already_done if any("/{}/".format(pd) in ds for pd in pds)])
    dataset_names = list(set(all_names)-set(already_done))

    # dataset_names = [ds for ds in dataset_names if ds not in ["/MET/Run2016H-17Jul2018-v1/MINIAOD"]]

    # dataset_names = []
    # dataset_names += ["/MET/Run2016B-17Jul2018_ver1-v1/MINIAOD"]
    # dataset_names += ["/SingleMuon/Run2016B-17Jul2018_ver2-v1/MINIAOD"] # deleted a corruption and want to make sure this gets redone
    # dataset_names += ["/SinglePhoton/Run2016E-17Jul2018-v1/MINIAOD"] # deleted a corruption and want to make sure this gets redone

    # dataset_names = []
    # dataset_names += ["/JetHT/Run2016B-17Jul2018_ver2-v2/MINIAOD"] # deleted a corruption and want to make sure this gets redone

    tasks = []
    for dsname in dataset_names:

        cmsswver = "CMSSW_9_4_9"
        tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V09-04-17_949.tar.gz"
        pset = "psets/pset_data2016_94x_v2.py"
        scramarch = "slc6_amd64_gcc630"

        task = CMSSWTask(
                sample = DBSSample(dataset=dsname),
                open_dataset = False,
                events_per_output = 400e3,
                output_name = "merged_ntuple.root",
                tag = "CMS4_V09-04-17",
                pset = pset,
                pset_args = "data=True prompt=False name=DQM",
                scram_arch = scramarch,
                cmssw_version = cmsswver,
                condor_submit_params = {"use_xrootd":True},
                tarfile = tarfile,
                is_data = True,
                publish_to_dis = True,
                snt_dir = True,
                special_dir = "run2_data2016_94x/",
        )
        tasks.append(task)
    return tasks
Exemplo n.º 2
0
    def load_from_dis(self):

        (status, val) = self.check_params_for_dis_query()
        if not status:
            self.logger.error(
                "[Dataset] Failed to load info for dataset %s from DIS because parameter %s is missing."
                % (self.info["dataset"], val))
            return False

        query_str = "status=%s, dataset_name=%s, sample_type=%s" % (
            Constants.VALID_STR, self.info["dataset"], self.info["type"])
        if self.info["type"] != "CMS3":
            query_str += ", analysis=%s" % (self.info["analysis"])
        if self.info["tag"]:
            query_str += ", cms3tag=%s" % (self.info["tag"])

        response = {}
        try:
            response = dis.query(query_str, typ='snt', detail=True)
            response = response["payload"]
            if len(response) == 0:
                self.logger.error(
                    " Query found no matching samples for: status = %s, dataset = %s, type = %s analysis = %s"
                    % (self.info["status"], self.info["dataset"],
                       self.info["type"], self.info["analysis"]))
                return False

            if len(response) > 1:
                # response = self.sort_query_by_key(response,"timestamp")
                response = self.sort_query_by_key(response, "cms3tag")

            if hasattr(self,
                       "exclude_tag_pattern") and self.exclude_tag_pattern:
                new_response = []
                for samp in response:
                    tag = samp.get("tag", samp.get("cms3tag", ""))
                    if fnmatch.fnmatch(tag, self.exclude_tag_pattern): continue
                    new_response.append(samp)
                response = new_response

            self.info["gtag"] = response[0]["gtag"]
            self.info["kfact"] = response[0]["kfactor"]
            self.info["xsec"] = response[0]["xsec"]
            self.info["filtname"] = response[0].get("filter_name", "NoFilter")
            self.info["efact"] = response[0]["filter_eff"]
            self.info["analysis"] = response[0].get("analysis", "")
            self.info["tag"] = response[0].get("tag",
                                               response[0].get("cms3tag"))
            self.info["version"] = response[0].get("version", "v1.0")
            self.info["nevts_in"] = response[0]["nevents_in"]
            self.info["nevts"] = response[0]["nevents_out"]
            self.info["location"] = response[0]["location"]
            self.info["status"] = response[0].get("status",
                                                  Constants.VALID_STR)
            self.info["twiki"] = response[0].get("twiki_name", "")
            self.info["files"] = response[0].get("files", [])
            self.info["comments"] = response[0].get("comments", "")
            return True
        except:
            return False
Exemplo n.º 3
0
    def do_update_dis(self):

        if hasattr(self, "read_only") and self.read_only:
            self.logger.debug(
                "Not updating DIS since this sample has read_only=True")
            return False

        self.logger.debug("Updating DIS")
        query_str = "dataset_name={},sample_type={},cms3tag={},gtag={},location={},nevents_in={},nevents_out={},xsec={},kfactor={},filter_eff={},timestamp={}".format(
            self.info["dataset"], self.info["tier"], self.info["tag"],
            self.info["gtag"], self.info["location"], self.info["nevents_in"],
            self.info["nevents"], self.info["xsec"], self.info["kfact"],
            self.info["efact"], int(time.time()))

        response = {}
        try:
            succeeded = False
            response = dis.query(query_str, typ='update_snt')
            response = response["payload"]
            if "updated" in response and str(
                    response["updated"]).lower() == "true":
                succeeded = True
            self.logger.debug("Updated DIS")
        except:
            pass

        if not succeeded:
            self.logger.debug(
                "WARNING: failed to update sample using DIS with query_str: {}"
                .format(query_str))
            self.logger.debug("WARNING: got response: {}".format(response))

        return succeeded
Exemplo n.º 4
0
def get_file_replicas(dsname):
    rawresponse = dis.query(dsname, typ="sites", detail=True)
    info = rawresponse["payload"]["block"]
    file_replicas = {}
    for block in info:
        for fd in block["file"]:
            filesizeGB = round(fd["bytes"] / (1.0e6), 2)
            fname = fd["name"]
            nodes = []
            for node in fd["replica"]:
                name = str(node["node"])
                if node.get("se", None) and "TAPE" in node["se"]:
                    continue  # no tape
                if "_US_" not in name: continue  # only US
                if "FNAL" in name:  # can't run directly at fnal, but purdue is basically next to fnal
                    name = "T2_US_Purdue"
                    # though if it's already at purdue anyway, no need to duplicate the node name
                    if name in nodes: continue
                nodes.append(name)
            file_replicas[fname] = {
                "name": fname,
                "nodes": nodes,
                "filesizeGB": filesizeGB
            }
    return file_replicas
Exemplo n.º 5
0
def get_file_replicas_uncached(dsname, dasgoclient=False):
    if os.getenv("USEDASGOCLIENT", False):
        dasgoclient = True
    if dasgoclient:
        url = "https://cmsweb.cern.ch/phedex/datasvc/json/prod/fileReplicas?dataset={}".format(
            dsname)
        response = urllib.urlopen(url).read()
        info = json.loads(response)["phedex"]["block"]
    else:
        rawresponse = dis.query(dsname, typ="sites", detail=True)
        info = rawresponse["payload"]["block"]
    file_replicas = {}
    for block in info:
        for fd in block["file"]:
            filesizeGB = round(fd["bytes"] / (1.0e6), 2)
            fname = fd["name"]
            nodes = []
            for node in fd["replica"]:
                name = str(node["node"])
                if node.get("se", None) and "TAPE" in node["se"]:
                    continue  # no tape
                if "_US_" not in name: continue  # only US
                if "FNAL" in name:  # can't run directly at fnal, but purdue is basically next to fnal
                    name = "T2_US_Purdue"
                    # though if it's already at purdue anyway, no need to duplicate the node name
                    if name in nodes: continue
                nodes.append(name)
            file_replicas[fname] = {
                "name": fname,
                "nodes": nodes,
                "filesizeGB": filesizeGB
            }
    return file_replicas
Exemplo n.º 6
0
    def do_dis_query(self, ds, typ="files"):

        self.logger.debug("Doing DIS query of type {0} for {1}".format(
            typ, ds))

        rawresponse = dis.query(ds, typ=typ, detail=True)
        response = rawresponse["payload"]
        if not len(response):
            self.logger.error("Query failed with response:" + str(rawresponse))

        return response
Exemplo n.º 7
0
    def load_from_dis(self):

        (status, val) = self.check_params_for_dis_query()
        if not status:
            self.logger.error(
                "[Dataset] Failed to load info for dataset %s from DIS because parameter %s is missing."
                % (self.info["dataset"], val))
            return False

        query_str = "status=%s, dataset_name=%s, sample_type=%s" % (
            Constants.VALID_STR, self.info["dataset"], self.info["type"])
        if self.info["type"] != "CMS3":
            query_str += ", analysis=%s" % (self.info["analysis"])

        response = {}
        try:
            dis_status = False
            response = dis.query(query_str, typ='snt')
            response = response["response"]["payload"]
            if len(response) == 0:
                self.logger.error(
                    " Query found no matching samples for: status = %s, dataset = %s, type = %s analysis = %s"
                    % (self.info["status"], self.info["dataset"],
                       self.info["type"], self.info["analysis"]))
                return False

            if len(response) > 1:
                response = self.sort_query_by_timestamp(response)

            self.info["gtag"] = response[0]["gtag"]
            self.info["kfact"] = response[0]["kfactor"]
            self.info["xsec"] = response[0]["xsec"]
            self.info["filtname"] = response[0]["filter_name"]
            self.info["efilt"] = response[0]["filter_eff"]
            self.info["analysis"] = response[0]["analysis"]
            self.info["tag"] = response[0].get("tag",
                                               response[0].get("cms3tag"))
            self.info["version"] = response[0].get("version", "v1.0")
            self.info["nevts_in"] = response[0]["nevents_in"]
            self.info["nevts"] = response[0]["nevents_out"]
            self.info["location"] = response[0]["location"]
            self.info["creator"] = response[0]["assigned_to"]
            self.info["status"] = response[0].get("status",
                                                  Constants.VALID_STR)
            self.info["twiki"] = response[0]["twiki_name"]
            self.info["siblings"] = response[0].get("siblings", [])
            self.info["files"] = response[0].get("files", [])
            self.info["comments"] = response[0]["comments"]
            return True
        except:
            return False
Exemplo n.º 8
0
def get_tasks():

    pds = [
        "SingleElectron", "MET", "SinglePhoton", "SingleMuon", "DoubleMuon",
        "JetHT", "DoubleEG", "HTMHT"
    ]
    out = dis.query("/*/Run2016*-17Jul2018*/MINIAOD")
    dataset_names = out["response"]["payload"]
    dataset_names = sorted([
        ds for ds in dataset_names
        if any("/{}/".format(pd) in ds for pd in pds)
    ])

    tasks = []
    for dsname in dataset_names:

        cmsswver = "CMSSW_9_4_9"
        tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V09-04-17_949.tar.gz"
        pset = "psets_cms4/main_pset_V09-04-17.py"
        scramarch = "slc6_amd64_gcc630"

        task = CMSSWTask(
            sample=DBSSample(dataset=dsname),
            open_dataset=False,
            events_per_output=400e3,
            output_name="merged_ntuple.root",
            tag="CMS4_V09-04-17",
            pset=pset,
            pset_args="data=True prompt=False name=DQM",
            scram_arch=scramarch,
            cmssw_version=cmsswver,
            condor_submit_params={"use_xrootd": True},
            tarfile=tarfile,
            is_data=True,
            publish_to_dis=True,
            snt_dir=True,
            special_dir="run2_data2016_94x/",
        )
        tasks.append(task)
    return tasks
Exemplo n.º 9
0
def get_tasks():

    pds = ["MuonEG", "DoubleMuon", "EGamma", "JetHT", "MET", "SingleMuon"]
    # pds = ["MuonEG","DoubleMuon","EGamma"] #, "JetHT", "MET", "SingleMuon"]
    # pds = ["EGamma"]
    proc_vers = [
        ("Run2018A", "v1"),
        ("Run2018A", "v2"),
        ("Run2018A", "v3"),
        ("Run2018B", "v1"),
        ("Run2018B", "v2"),
        ("Run2018C", "v1"),
        ("Run2018C", "v2"),
        ("Run2018C", "v3"),

        # ("Run2018C","v1"),

        # ("Run2018D","v1"), # very short, not in golden json, most PDs are missing on DAS
        # ("Run2018D","v2"),
    ]
    dataset_names = [
        "/{0}/{1}-17Sep2018-{2}/MINIAOD".format(x[0], x[1][0], x[1][1])
        for x in itertools.product(pds, proc_vers)
    ]

    # skip datasets that aren't on das
    # out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD")
    # NOTE Screw it, just get all the datasets and pretend they are open. Comp/ops people allow production->valid flag if
    # the dataset is 99% complete to not "block their distribution" even though it's stupid.
    # See https://hypernews.cern.ch/HyperNews/CMS/get/physics-validation/3267/1/1/1/1/2/2.html
    out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD,all")
    dis_names = out["response"]["payload"]
    dis_names = [
        ds for ds in dis_names if any("/{}/".format(pd) in ds for pd in pds)
    ]
    dataset_names = list(set(dataset_names) & set(dis_names))

    # print dataset_names
    # blah

    tasks = []
    for dsname in dataset_names:

        scram_arch = "slc6_amd64_gcc700"
        cmsswver = "CMSSW_10_2_4_patch1"
        pset = "psets/pset_data2018_102x.py"
        tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10-02-01_1024p1.tar.xz"

        task = CMSSWTask(
            sample=DBSSample(dataset=dsname),
            open_dataset=True,
            # flush = True,
            # flush = ((i+1)%48==0),
            # flush = ((i)%48==0),
            events_per_output=300e3,
            output_name="merged_ntuple.root",
            tag="CMS4_V10-02-01",
            scram_arch=scram_arch,
            pset=pset,
            pset_args=
            "data=True prompt=True",  # NOTE, this isn't actually prompt. but just read the NtupleMaker 10x branch readme, ok?
            cmssw_version=cmsswver,
            # condor_submit_params = {"use_xrootd":True},
            tarfile=tarfile,
            is_data=True,
            publish_to_dis=True,
            snt_dir=True,
            special_dir="run2_data2018/",
        )
        tasks.append(task)
    return tasks
Exemplo n.º 10
0
def get_tasks():

    pds = ["MuonEG", "DoubleMuon", "EGamma", "JetHT", "MET", "SingleMuon"]
    proc_vers = [
        ("Run2018A", "v1"),
        ("Run2018A", "v2"),
        ("Run2018A", "v3"),
        ("Run2018B", "v1"),
        ("Run2018B", "v2"),
        ("Run2018C", "v1"),
        ("Run2018C", "v2"),
        ("Run2018C", "v3"),
        ("Run2018D",
         "v1"),  # very short, not in golden json, most PDs are missing on DAS
        ("Run2018D", "v2"),
    ]
    dataset_names = [
        "/{0}/{1}-PromptReco-{2}/MINIAOD".format(x[0], x[1][0], x[1][1])
        for x in itertools.product(pds, proc_vers)
    ]

    # skip datasets that aren't on das
    out = dis.query("/*/Run2018*-PromptReco*/MINIAOD")
    dis_names = out["response"]["payload"]
    dis_names = [
        ds for ds in dis_names if any("/{}/".format(pd) in ds for pd in pds)
    ]
    dataset_names = list(set(dataset_names) & set(dis_names))

    tasks = []
    for dsname in dataset_names:

        cmsswver, tarfile = None, None
        scram_arch = "slc6_amd64_gcc630"
        pset = "psets/pset_prompt10x_data_1.py"

        if "Run2018A-PromptReco-v1" in dsname:
            cmsswver = "CMSSW_10_1_2_patch2"
            tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1012p2.tar.gz"
        elif "Run2018A-PromptReco-v2" in dsname:
            cmsswver = "CMSSW_10_1_5"
            tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1015.tar.gz"
        elif "Run2018A-PromptReco-v3" in dsname:
            cmsswver = "CMSSW_10_1_5"
            tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1015.tar.gz"
        elif "Run2018B-PromptReco-v1" in dsname:
            cmsswver = "CMSSW_10_1_5"
            tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1015.tar.gz"
        elif "Run2018B-PromptReco-v2" in dsname:
            cmsswver = "CMSSW_10_1_7"
            tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1017.tar.gz"
        elif ("Run2018C-PromptReco-" in dsname) or ("Run2018D-PromptReco-"
                                                    in dsname):
            cmsswver = "CMSSW_10_2_1"
            tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1021_nodeepak8.tar.gz"
            scram_arch = "slc6_amd64_gcc700"
            pset = "psets/pset_prompt10x_data_2.py"

        task = CMSSWTask(
            sample=DBSSample(dataset=dsname),
            open_dataset=False,
            # flush = ((i+1)%48==0),
            # flush = ((i)%48==0),
            events_per_output=350e3,
            output_name="merged_ntuple.root",
            recopy_inputs=False,
            tag="CMS4_V10-01-00",
            scram_arch=scram_arch,
            # global_tag = "", # if global tag blank, one from DBS is used
            pset=pset,
            pset_args="data=True prompt=True",
            cmssw_version=cmsswver,
            # condor_submit_params = {"use_xrootd":True},
            tarfile=tarfile,
            is_data=True,
            publish_to_dis=True,
            snt_dir=True,
            special_dir="run2_data2018_prompt/",
        )
        tasks.append(task)
    return tasks
Exemplo n.º 11
0
 "/WWW_4F_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM",
 "/WWZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM",
 "/WW_TuneCUETP8M1_13TeV-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v2/MINIAODSIM",
 "/WZTo3LNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM",
 "/WZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM",
 "/WZ_TuneCUETP8M1_13TeV-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v2/MINIAODSIM",
 "/WpWpJJ_EWK-QCD_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM",
 "/WpWpJJ_EWK_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM",
 "/WpWpJJ_QCD_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM",
 "/ZGTo2LG_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v1/MINIAODSIM",
 "/ZZTo4L_13TeV_powheg_pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM",
 "/ZZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM",
 "/tZq_ll_4f_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v1/MINIAODSIM",
 "/ttHToNonbb_M125_TuneCUETP8M2_ttHtranche3_13TeV-powheg-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM",
 "/ttZJets_13TeV_madgraphMLM-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM",
 ]

# Now query SNT samples with the same PD, get the latest, then fetch the xsecs to print out
# dsname|xsec|kfactor|efactor
# which is the format we use in all the metis submission scripts
import scripts.dis_client as dis
for ds in dsnames:
    q = "/{}/*80X*/MINIAODSIM".format(ds.split("/")[1])
    ret = sorted(dis.query(q,typ="snt")["response"]["payload"],key=lambda x:-x.get("timestamp",-1))
    if not ret: 
        print "ERROR with {}".format(ds)
        continue
    ret = ret[0]
    print "{}|{}|{}|{}".format(ds, ret["xsec"], ret["kfactor"], ret["filter_eff"])

Exemplo n.º 12
0
def get_tasks():

    pds = ["MuonEG","DoubleMuon","EGamma", "JetHT", "MET", "SingleMuon"]
    # pds = ["MuonEG","DoubleMuon","EGamma"] #, "JetHT", "MET", "SingleMuon"]
    # pds = ["EGamma"]
    proc_vers = [

            ("Run2018A","v1"),
            ("Run2018A","v2"),
            ("Run2018A","v3"),
            ("Run2018B","v1"),
            ("Run2018B","v2"),
            ("Run2018C","v1"),
            ("Run2018C","v2"),
            ("Run2018C","v3"),

            # ("Run2018C","v1"),

            # ("Run2018D","v1"), # very short, not in golden json, most PDs are missing on DAS
            # ("Run2018D","v2"),

            ]
    dataset_names =  ["/{0}/{1}-17Sep2018-{2}/MINIAOD".format(x[0],x[1][0],x[1][1]) for x in itertools.product(pds,proc_vers)]

    # skip datasets that aren't on das
    # out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD")
    # NOTE Screw it, just get all the datasets and pretend they are open. Comp/ops people allow production->valid flag if
    # the dataset is 99% complete to not "block their distribution" even though it's stupid.
    # See https://hypernews.cern.ch/HyperNews/CMS/get/physics-validation/3267/1/1/1/1/2/2.html
    out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD,all")
    dis_names = out["response"]["payload"]
    dis_names = [ds for ds in dis_names if any("/{}/".format(pd) in ds for pd in pds)]
    dataset_names = list(set(dataset_names) & set(dis_names))

    # print dataset_names
    # blah

    tasks = []
    for dsname in dataset_names:

        scram_arch = "slc6_amd64_gcc700"
        cmsswver = "CMSSW_10_2_4_patch1"
        pset = "psets/pset_data2018_102x.py"
        tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10-02-01_1024p1.tar.xz"

        task = CMSSWTask(
                sample = DBSSample(dataset=dsname),
                open_dataset = True,
                # flush = True,
                # flush = ((i+1)%48==0),
                # flush = ((i)%48==0),
                events_per_output = 300e3,
                output_name = "merged_ntuple.root",
                tag = "CMS4_V10-02-01",
                scram_arch = scram_arch,
                pset = pset,
                pset_args = "data=True prompt=True", # NOTE, this isn't actually prompt. but just read the NtupleMaker 10x branch readme, ok?
                cmssw_version = cmsswver,
                # condor_submit_params = {"use_xrootd":True},
                tarfile = tarfile,
                is_data = True,
                publish_to_dis = True,
                snt_dir = True,
                special_dir = "run2_data2018/",
        )
        tasks.append(task)
    return tasks
Exemplo n.º 13
0
out = crabCommand('checkwrite', site="T2_US_UCSD", **proxy_file_dict)
print "Done. Status: %s" % out["status"]
print


# Take first dataset name in instructions.txt
print BLUE, "Taking the first sample in instructions.txt. If it's not a FullSim MC sample, then you're going to have a bad time!", ENDC
sample = u.read_samples()[0]
dataset_name = sample["dataset"]
gtag = sample["gtag"]
print "  --> %s" % dataset_name
print


# Find the smallest MINIAOD file
filelist = dis.query(dataset_name, detail=True, typ="files")
filelist = filelist["response"]["payload"]
filelist = sorted(filelist, key=lambda x: x.get("sizeGB", 999.0))
smallest_filename = filelist[0]["name"]
print BLUE, "Smallest file", ENDC
print "  --> %s" % smallest_filename
print 


# Use xrootd to get that file
ntuple_name = "input.root"
print BLUE, "Using xrootd to download the file", ENDC
os.system("xrdcp -f root://xrootd.unl.edu/%s %s" % (smallest_filename, ntuple_name))
if os.path.isfile(ntuple_name): print "Success!"
else: print "ERROR: failed to download using xrootd"
print
Exemplo n.º 14
0
    for ds in sorted(dsnew & dsold, key=old.get):
        oldloc = old[ds]
        newloc = new[ds]
        if not os.path.exists(oldloc): continue
        nmiss = num_missing_files(newloc)
        if nmiss > 0:
            print "# Can't delete {} because there's {} missing files still".format(oldloc,nmiss)
            continue
        print get_cmd(oldloc)
    print


if __name__ == "__main__":

    # 2017 MC
    old = {x["dataset_name"]:x["location"] for x in dis.query("*Fall17MiniAOD*,cms3tag=CMS4_V10-02-04 | grep dataset_name,location",typ="snt")["response"]["payload"]}
    new = {x["dataset_name"]:x["location"] for x in dis.query("*Fall17MiniAOD*,cms3tag=CMS4_V10-02-05 | grep dataset_name,location",typ="snt")["response"]["payload"]}
    dsold, dsnew = set(old), set(new)
    print_cmds(old,new,label="2017 MC")

    # 2017 Data
    old = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2017*/MINIAOD,cms3tag=CMS4_V10-02-04 | grep dataset_name,location",typ="snt")["response"]["payload"]}
    new = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2017*/MINIAOD,cms3tag=CMS4_V10-02-05 | grep dataset_name,location",typ="snt")["response"]["payload"]}
    dsold, dsnew = set(old), set(new)
    print_cmds(old,new,label="2017 data")

    # 2016 Data
    old = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2016*/MINIAOD,cms3tag=CMS4_V10-02-04 | grep dataset_name,location",typ="snt")["response"]["payload"]}
    new = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2016*/MINIAOD,cms3tag=CMS4_V10-02-05 | grep dataset_name,location",typ="snt")["response"]["payload"]}
    dsold, dsnew = set(old), set(new)
    print_cmds(old,new,label="2016 94X data")
Exemplo n.º 15
0
# Check write permissions
print BLUE, "Checking write permissions to UCSD...", ENDC
out = crabCommand('checkwrite', site="T2_US_UCSD", **proxy_file_dict)
print "Done. Status: %s" % out["status"]
print

# Take first dataset name in instructions.txt
print BLUE, "Taking the first sample in instructions.txt. If it's not a FullSim MC sample, then you're going to have a bad time!", ENDC
sample = u.read_samples()[0]
dataset_name = sample["dataset"]
gtag = sample["gtag"]
print "  --> %s" % dataset_name
print

# Find the smallest MINIAOD file
filelist = dis.query(dataset_name, detail=True, typ="files")
filelist = filelist["response"]["payload"]
filelist = sorted(filelist, key=lambda x: x.get("sizeGB", 999.0))
smallest_filename = filelist[0]["name"]
print BLUE, "Smallest file", ENDC
print "  --> %s" % smallest_filename
print

# Use xrootd to get that file
ntuple_name = "input.root"
print BLUE, "Using xrootd to download the file", ENDC
os.system("xrdcp -f root://xrootd.unl.edu/%s %s" %
          (smallest_filename, ntuple_name))
if os.path.isfile(ntuple_name): print "Success!"
else: print "ERROR: failed to download using xrootd"
print
Exemplo n.º 16
0
    def do_dis_query(self, typ="files"):

        ds = self.info["dataset"]

        self.logger.debug("Doing DIS query of type {0} for {1}".format(
            typ, ds))

        if not ds:
            self.logger.error("No dataset name declared!")
            return False

        response = {}

        do_test = False
        if do_test:
            if typ in ["files"]:
                response = [{
                    u'nevents': 95999,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/C243580A-534C-E711-97A2-02163E01A1FE.root',
                    u'sizeGB': 2.1000000000000001
                }, {
                    u'nevents': 104460,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/36C47789-564C-E711-B555-02163E019C8A.root',
                    u'sizeGB': 2.3500000000000001
                }, {
                    u'nevents': 140691,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/685FF878-554C-E711-8E35-02163E01A4E3.root',
                    u'sizeGB': 3.1200000000000001
                }, {
                    u'nevents': 107552,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/C2147D89-5B4C-E711-843F-02163E01415B.root',
                    u'sizeGB': 2.4100000000000001
                }, {
                    u'nevents': 119678,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/0E34AC81-5A4C-E711-9961-02163E01A549.root',
                    u'sizeGB': 2.6800000000000002
                }, {
                    u'nevents': 182253,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/6ED7F30F-594C-E711-8DD3-02163E01A2A9.root',
                    u'sizeGB': 4.0499999999999998
                }, {
                    u'nevents': 120161,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/68E6D0F8-5C4C-E711-A50F-02163E019DD2.root',
                    u'sizeGB': 2.6699999999999999
                }, {
                    u'nevents': 75886,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/172/00000/4AAC3A09-634C-E711-B5C4-02163E019CCE.root',
                    u'sizeGB': 1.1899999999999999
                }, {
                    u'nevents': 188508,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/106C6FC9-604C-E711-904C-02163E019C2C.root',
                    u'sizeGB': 4.1500000000000004
                }, {
                    u'nevents': 174713,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/164397BB-5F4C-E711-869C-02163E01A3B3.root',
                    u'sizeGB': 4.1500000000000004
                }, {
                    u'nevents': 91384,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/36AD29F3-6F4C-E711-90D1-02163E01A491.root',
                    u'sizeGB': 2.1400000000000001
                }, {
                    u'nevents': 117960,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/174/00000/5A4A5050-804C-E711-BF0C-02163E01A270.root',
                    u'sizeGB': 1.8400000000000001
                }, {
                    u'nevents': 123173,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/84BB76F4-654C-E711-86E5-02163E01A676.root',
                    u'sizeGB': 2.8399999999999999
                }, {
                    u'nevents': 178903,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/987DAF51-634C-E711-A339-02163E01415B.root',
                    u'sizeGB': 4.1600000000000001
                }, {
                    u'nevents': 48567,
                    u'name':
                    u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/168/00000/98029456-6D4C-E711-911F-02163E019E8D.root',
                    u'sizeGB': 1.1299999999999999
                }]
            if typ in ["config"]:
                response = {
                    u'app_name': u'cmsRun',
                    u'output_module_label': u'Merged',
                    u'create_by': u'*****@*****.**',
                    u'pset_hash': u'GIBBERISH',
                    u'creation_date': u'2017-06-08 06:02:28',
                    u'release_version': u'CMSSW_9_2_1',
                    u'global_tag': u'92X_dataRun2_Prompt_v4',
                    u'pset_name': None
                }
        else:
            rawresponse = dis.query(ds, typ=typ, detail=True)
            response = rawresponse["response"]["payload"]
            if not len(response):
                self.logger.error("Query failed with response:" +
                                  str(rawresponse["response"]))

        return response