Пример #1
0
def get_dataset(inputDataset = None, inputID = None):
    dbstore = DbStore()
    if inputDataset is not None:
        resultset = dbstore.find(Dataset, Dataset.name == inputDataset)
    elif inputID is not None:
        resultset = dbstore.find(Dataset, Dataset.dataset_id == inputID)
    return list(resultset.values(Dataset.name, Dataset.dataset_id, Dataset.nevents, Dataset.process))
Пример #2
0
def get_sample(inputSample = None, inputID = None):
    dbstore = DbStore()
    if inputSample is not None:
        resultset = dbstore.find(Sample, Sample.name == inputSample)
    elif inputID is not None:
        resultset = dbstore.find(Sample, Sample.sample_id == inputID)
    return list(resultset.values(Sample.name, Sample.sample_id, Sample.source_dataset_id, Sample.code_version))
def compute_luminosity(sample, options):
    print("Computing luminosity for %r") % str(sample.name)

    lumi = 0
    if not options.local:
        print("Running brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment")
        print('')

        cmds = ['brilcalc', 'lumi', '--normtag', '~lumipro/public/normtag_file/OfflineNormtagV2.json', '--output-style', 'csv', '-i', '"%s"' % str(sample.processed_lumi.replace('"', ''))]
        cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.0.3/bin:$PATH"; ' + ' '.join(cmds)
        ssh_cmds = ['ssh', '*****@*****.**' % options.username, cmd]
        brilcalc_result = subprocess.check_output(ssh_cmds)

        lumi = parse_luminosity_csv(brilcalc_result)
    else:
        print("Running brilcalc locally...")
        # FIXME one day
        print("Error: running brilcalc locally is not supported for the moment.")
        return 0

    print("Sample luminosity: %.3f /pb" % lumi)
    print('')

    store = DbStore()
    # Update luminosity in the database
    store.find(Sample, Sample.sample_id == sample.sample_id).set(luminosity = lumi)

    store.commit()

    return lumi
Пример #4
0
def main():
    """Main function"""
    # get the options
    optmgr = MyOptionParser()
    opts   = optmgr.get_opt()
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # check that the LHCO exists and obtain the dataset id
    check = dbstore.find(Sample,Sample.sample_id==opts.lhco_id)
    if check.is_empty() or check.one().sampletype != "LHCO":
      raise IndexError("No LHCO with such index: %d"%opts.lhco_id)
    opts.dataset = findDataset(check.one())
    if opts.dataset is None:
      raise RuntimeError("Impossible to get the dataset id.")
    # check that the process exists
    check = dbstore.find(MadWeight,MadWeight.process_id==opts.process)
    if check.is_empty():
      raise IndexError("No process with such index: %d"%opts.process)
    # create the MW run object
    mw_run = MadWeightRun(opts.process,opts.lhco_id)
    mw_run.systematics = unicode(opts.syst)
    mw_run.user_comment = unicode(opts.comment)
    mw_run.version = opts.version
    if mw_run.version is None:
      check = dbstore.find(MadWeightRun,(MadWeightRun.madweight_process==mw_run.madweight_process) & (MadWeightRun.lhco_sample_id==mw_run.lhco_sample_id))
      if not check.is_empty():
        mw_run.version = check.order_by(MadWeightRun.version).last().version + 1
      else:
        mw_run.version = 1
    else:
      check = dbstore.find(MadWeightRun,(MadWeightRun.madweight_process==mw_run.madweight_process) & (MadWeightRun.lhco_sample_id==mw_run.lhco_sample_id) & (MadWeightRun.version==mw_run.version))
      if not check.is_empty():
        raise RuntimeError("There is already one such MadWeight run with the same version number:\n%s\n"%str(check.one()))
    # read the file
    inputfile = open(opts.filepath)
    count = 0
    for line in inputfile:
      data = line.rstrip('\n').split('\t')
      # get the event
      run_number = int(data[0].split('.')[0])
      event_number = int(data[0].split('.')[1])
      event_query = dbstore.find(Event, (Event.event_number==event_number) & (Event.run_number==run_number) & (Event.dataset_id==opts.dataset))
      if event_query.is_empty():
        event = Event(event_number,run_number,opts.dataset)
      else:
        event = event_query.one()
      # create the weight
      weight = Weight()
      weight.event = event
      weight.mw_run = mw_run
      weight.value = float(data[1])
      weight.uncertainty = float(data[2])
      dbstore.add(weight)
      count += 1
    # confirm and commit
    print mw_run
    print "Adding weights to %d events."%count
    if confirm(prompt="Insert into the database?", resp=True):
      dbstore.commit()
Пример #5
0
def get_sample(id=None, name=None):
    store = DbStore()
    if (id):
        result = store.find(Sample, Sample.sample_id == id)
    else:
        result = store.find(Sample, Sample.name == unicode(name))

    return result.one()
def get_sample(id, name):

    dbstore = DbStore()

    if id is not None:
        result = dbstore.find(Sample, Sample.sample_id == id)
    elif name is not None:
        result = dbstore.find(Sample, Sample.name.like(unicode(name.replace('*', '%').replace('?', '_'))))

    return result.one()
Пример #7
0
    def getSample(self, iSample):
        """ Get sample from the DB, using the sample ID or name. """
   
        sample = ""
        dbstore = DbStore()
        if isinstance(iSample, int):
            sample = dbstore.find(Sample, Sample.sample_id == iSample).one()
        elif isinstance(iSample, str):
            sample = dbstore.find(Sample, Sample.name == unicode(iSample)).one()
        else:
            raise Exception("Argument should be sample ID or DB name.")

        return sample
Пример #8
0
    def getSampleFiles(self, iSample):
        """ Get sample name/lit of sample files from the DB, using the sample ID or name. """
   
        sample = ""
        dbstore = DbStore()
        if isinstance(iSample, int):
            sample = dbstore.find(Sample, Sample.sample_id == iSample).one()
        elif isinstance(iSample, str):
            sample = dbstore.find(Sample, Sample.name == unicode(iSample)).one()
        else:
            raise Exception("Argument should be sample ID or DB name.")

        return sample.name, [ "/storage/data/cms/" + str(file.lfn) for file in sample.files ]
Пример #9
0
def get_samples(name):
    store = DbStore()
    results = store.find(Sample,
                         Sample.name.like(unicode(name.replace('*', '%'))))

    if results.count() == 0:
        raise Exception("Could not find any sample matching {}".format(name))

    print("Found samples: ")
    for sample in results:
        print(sample.name)

    return results
Пример #10
0
    def getSample(self, iSample):
        """ Get sample from the DB, using the sample ID or name. """

        sample = ""
        dbstore = DbStore()
        if isinstance(iSample, int):
            sample = dbstore.find(Sample, Sample.sample_id == iSample).one()
        elif isinstance(iSample, str):
            sample = dbstore.find(Sample,
                                  Sample.name == unicode(iSample)).one()
        else:
            raise Exception("Argument should be sample ID or DB name.")

        return sample
Пример #11
0
    def getSampleFiles(self, iSample):
        """ Get sample name/lit of sample files from the DB, using the sample ID or name. """

        sample = ""
        dbstore = DbStore()
        if isinstance(iSample, int):
            sample = dbstore.find(Sample, Sample.sample_id == iSample).one()
        elif isinstance(iSample, str):
            sample = dbstore.find(Sample,
                                  Sample.name == unicode(iSample)).one()
        else:
            raise Exception("Argument should be sample ID or DB name.")

        return sample.name, [
            "/storage/data/cms/" + str(file.lfn) for file in sample.files
        ]
Пример #12
0
def main():
    """Main function"""
    # get the options
    optmgr = MyOptionParser()
    opts = optmgr.get_opt()
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # build the query
    if opts.objtype == "dataset":
      objectClass = Dataset
      objectId = Dataset.dataset_id
    elif opts.objtype == "sample":
      objectClass = Sample
      objectId = Sample.sample_id
    elif opts.objtype == "madweight":
      objectClass = MadWeight
      objectId = MadWeight.process_id
    else:
      objectClass = Result
      objectId = Result.result_id

    if opts.objid is not None:
      result = dbstore.find(objectClass, objectId==opts.objid)
    elif opts.path is not None:
      result = dbstore.find(objectClass, objectClass.path.like(unicode(opts.path.replace('*', '%').replace('?', '_'))))
    elif opts.name is not None:
      result = dbstore.find(objectClass, objectClass.name.like(unicode(opts.name.replace('*', '%').replace('?', '_'))))
    else: 
      result = dbstore.find(objectClass)

    result = result.order_by(objectId)
    # loop and print
    if opts.longOutput:
      for entry in result:
        print entry
        print "--------------------------------------------------------------------------------------"
    else:
      if opts.objtype != "result":
        data = result.values(objectId, objectClass.name)
      else:
        data = result.values(objectId, objectClass.description)
      for dset in data:
        print "%i\t%s"%(dset[0], dset[1])
Пример #13
0
def main():
    """Main function"""
    # get the options
    optmgr = MyOptionParser()
    opts   = optmgr.get_opt()
    # build the result from user input
    result = Result(unicode(opts.path))
    result.description = unicode(opts.desc)
    result.author = unicode(opts.author)
    result.creation_time = opts.datetime
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # unless the source is set, prompt the user and present a list to make a choice
    if opts.inputSamples is None:
      inputSamples = prompt_samples(dbstore)
    else:
      inputSamples = parse_samples(opts.inputSamples)
    # create and store the relations
    samples = dbstore.find(Sample,Sample.sample_id.is_in(inputSamples))
    if samples.is_empty():
      dbstore.add(result)
    else:
      for sample in samples:
        sample.results.add(result)
    print result
    if confirm(prompt="Insert into the database?", resp=True):
      dbstore.commit()
def main():
    options = get_options()

    print "Merging non-resonant HH samples: %s" % ', '.join(
        str(i) for i in options.ids)
    print("")

    dbstore = DbStore()

    print("Checking that the samples already exist in the database...")
    samples = []
    for id in options.ids:
        sample = get_sample(id, dbstore)
        if not sample:
            raise AssertionError(
                "Aborting: the sample %d does not exist in the database, please insert it first"
                % id)

        samples.append(sample)
    print("All good. Continuing...")

    # Sanity check. Ensure that all parent datasets have the same cross-section
    dataset_xsec = None
    for sample in samples:
        if not dataset_xsec:
            dataset_xsec = sample.source_dataset.xsection
        else:
            if dataset_xsec != sample.source_dataset.xsection:
                raise AssertionError(
                    "Aborting: the parent datasets do not have the same cross-section. Merging would be ill-defined."
                )

    r = re.compile('(GluGluToHHTo2B2VTo2L2Nu_)node_.*(_13TeV-madgraph_.*)')

    # Ensure that all samples match the regex
    for sample in samples:
        if not r.match(sample.name):
            raise AssertionError(
                "Aborting: sample %d's name (%s) does not match expected name."
                % (sample.sample_id, sample.name))

    merged_sample_name = r.sub(r'\g<1>all_nodes\g<2>', samples[0].name)

    print "Constructing merged sample %r ..." % merged_sample_name

    comment = 'Merging of SAMADhi samples %s' % ', '.join(
        str(i) for i in options.ids)

    add_merged_sample(samples, merged_sample_name, comment, dbstore)
Пример #15
0
def add_sample(NAME, localpath, type, dataset_nevents, nselected, AnaUrl, FWUrl, dataset_id):
    # Large part of this imported from SAMADhi add_sample.py
    sample = Sample(unicode(NAME), unicode(localpath), unicode(type), dataset_nevents) 
    sample.nevents = nselected
    sample.normalization = 1.0
    sample.luminosity  = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
#    sample.user_comment =
    sample.source_dataset_id = dataset_id
#    sample.source_sample_id = None
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)
#    sample.creation_time = 
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # check that source dataset exist
    if dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d"%sample.source_dataset_id)
    # check that there is no existing entry
    checkExisting = dbstore.find(Sample,Sample.name==sample.name)
    if checkExisting.is_empty():
      print sample
      if confirm(prompt="Insert into the database?", resp=True):
        dbstore.add(sample)
        # compute the luminosity, if possible
        if sample.luminosity is None:
          dbstore.flush()
          sample.luminosity = sample.getLuminosity()
    else:
      existing = checkExisting.one()
      prompt  = "Replace existing "
      prompt += str(existing)
      prompt += "\nby new "
      prompt += str(sample)
      prompt += "\n?"
      if confirm(prompt, resp=False):
        existing.replaceBy(sample)
        if existing.luminosity is None:
          dbstore.flush()
          existing.luminosity = existing.getLuminosity()
    # commit
    dbstore.commit()
Пример #16
0
def get_sample(name):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.name == name)
    return resultset.one()
Пример #17
0
def get_sample(inputSample):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.name==inputSample)
    return list(resultset.values(Sample.path, Sample.normalization, Sample.nevents_processed))
Пример #18
0
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
#    sample.luminosity  = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        import json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        if confirm(prompt="Insert into the database?", resp=True):
            dbstore.commit()
            return

    else:
        sample.luminosity = sample.getLuminosity()
        prompt  = "A sample with the same name already exists in the database. Replace by:\n"
        prompt += str(sample)
        prompt += "\n?"
        if confirm(prompt, resp=False):
            dbstore.commit()
            return

    # rollback
    dbstore.rollback()
Пример #19
0
def get_sample(iSample):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.sample_id == iSample)
    return resultset.one()
Пример #20
0
def main():
    """Main function"""
    # get the options
    optmgr = MyOptionParser()
    opts   = optmgr.get_opt()
    # build the configuration from user input
    madweightCfg = MadWeight(unicode(opts.name))
    for card in cards:
      setattr(madweightCfg, card, unicode(open(opts.path+"/Cards/"+card+".dat","r").read()))
    # get the transfert functions
    madweightCfg.transfer_fctVersion = unicode(open('%s/Source/MadWeight/transfer_function/Transfer_FctVersion.txt'%opts.path,"r").read().strip('\n'))
    theCfg = madweightCfg.transfer_fctVersion.split(':')[0]
    if not os.path.exists("%s/Source/MadWeight/transfer_function/data/TF_%s.dat"%(opts.path,theCfg)):
      raise RuntimeError("Could not find the transfert functions TF_%s.dat"%theCfg)
    madweightCfg.transfer_function = unicode(open("%s/Source/MadWeight/transfer_function/data/TF_%s.dat"%(opts.path,theCfg),"r").read())
    # find the generate line(s)
    theCfg = filter(lambda x:x.startswith("generate"),map(lambda x:x.lstrip(' \t'),madweightCfg.proc_card_mg5.splitlines()))
    if len(theCfg)!=1:
      raise RuntimeError("Could not find a unique generate statement in proc_card_mg5.dat")
    madweightCfg.diagram = theCfg[0][8:].lstrip(' \t')
    # find the ISR correction parameter
    theCfg = filter(lambda x:x.startswith("isr"),map(lambda x:x.lstrip(' \t'),madweightCfg.MadWeight_card.splitlines()))
    if len(theCfg)!=1:
      raise RuntimeError("Could not find a unique isr statement in MadWeight_card.dat")
    madweightCfg.isr=int(theCfg[0].split(None,2)[1])
    # find the NWA configuration parameter
    theCfg = filter(lambda x:x.startswith("nwa"),map(lambda x:x.lstrip(' \t'),madweightCfg.MadWeight_card.splitlines()))
    if len(theCfg)!=1:
      raise RuntimeError("Could not find a unique nwa statement in MadWeight_card.dat")
    nwa = theCfg[0].split(None,2)[1]
    if nwa=='F':
      madweightCfg.nwa=False
    elif nwa=='T':
      madweightCfg.nwa=True
    else:
      raise RuntimeError("Unrecognized value for the nwa parameter in MadWeight_card.dat: %s"%nwa)
    # find the beam energy and store cm energy in TeV
    theCfg = filter(lambda x:"ebeam1" in x,madweightCfg.run_card.splitlines())
    try:
      madweightCfg.cm_energy = float(theCfg[0].split()[0])*0.002
    except:
      print "Cannot find the beam energy in the run card"
      raise
    # find and add the Higgs weight (can be null, so no error if missing)
    theCfg = filter(lambda x:x.startswith("DECAY"),map(lambda x:x.lstrip(' \t'),madweightCfg.param_card_1.splitlines()))
    for cfg in theCfg:
      fields = cfg.split()
      if fields[1]=="25":
        madweightCfg.higgs_width = float(fields[2])
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # check that there is no existing entry
    checkExisting = dbstore.find(MadWeight,MadWeight.name==madweightCfg.name)
    if checkExisting.is_empty():
      print madweightCfg
      if confirm(prompt="Insert into the database?", resp=True):
        dbstore.add(madweightCfg)
    else:
      existing = checkExisting.one()
      prompt  = "Replace existing "
      prompt += str(existing)
      prompt += "\nby new "
      prompt += str(madweightCfg)
      prompt += "\n?"
      if confirm(prompt, resp=False):
        existing.replaceBy(madweightCfg)
    # commit
    dbstore.commit()
Пример #21
0
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment):
    # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id'
    dbstore = DbStore()
    sample = None

    # check that source dataset exist
    # Skip: should exist, the check has been done before calling this function

    # check that there is no existing entry
    update = False
    localpath = ''
    nevents = 0
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    # collecting contents
    sample.nevents_processed = 0
    sample.nevents = 0
    sample.normalization = 1
    sample.event_weight_sum = 0
    extras_event_weight_sum = {}
    dataset_nevents = 0
    processed_lumi = LumiList()
    for i, s in enumerate(samples):
        if i == 0:
            sample.source_dataset_id = s['dataset_id']
            sample.source_sample_id = s['sample_id']
        results = dbstore.find(Sample, Sample.sample_id == s['sample_id'])
        # Should exist, the check has been done before calling this function
        sample.nevents_processed += results[0].nevents_processed
        sample.nevents += results[0].nevents
        sample.event_weight_sum += results[0].event_weight_sum
        extra_sumw = results[0].extras_event_weight_sum
        if extra_sumw is not None:
            extra_sumw = json.loads(extra_sumw)
            for key in extra_sumw:
                try:
                    extras_event_weight_sum[key] += extra_sumw[key]
                except KeyError:
                    extras_event_weight_sum[key] = extra_sumw[key]
        tmp_processed_lumi = results[0].processed_lumi
        if tmp_processed_lumi is not None:
            tmp_processed_lumi = json.loads( tmp_processed_lumi )
            processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi)
        # Get info from file table
        results = dbstore.find(File, File.sample_id == s['sample_id'])
        for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)):
            f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents)
            sample.files.add(f)
        # Get info from parent datasets
        results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id'])
        dataset_nevents +=  results[0].nevents
    if len(extras_event_weight_sum) > 0:
        sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum))
    if len(processed_lumi.getCompactList()) > 0:
        sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList()))
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if sample.nevents_processed != dataset_nevents:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment)
    else:
        sample.user_comment = unicode(comment)
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
def get_sample(iSample):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.sample_id == iSample)
    return resultset.one()
Пример #23
0
def main():
    """Main function"""
    # get the options
    optmgr = MyOptionParser()
    opts   = optmgr.get_opt()
    # build the sample from user input
    sample  = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed)
    sample.nevents = opts.nevents
    sample.normalization = opts.normalization
    sample.luminosity = opts.luminosity
    sample.code_version = unicode(opts.code_version)
    sample.user_comment = unicode(opts.user_comment)
    sample.source_dataset_id = opts.source_dataset_id
    sample.source_sample_id = opts.source_sample_id
    sample.author = unicode(opts.author)
    sample.creation_time = opts.datetime
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # unless the source is set, prompt the user and present a list to make a choice
    if sample.source_dataset_id is None:
      prompt_dataset(sample,dbstore)
    if sample.source_sample_id is None:
      prompt_sample(sample,dbstore)
    # check that source sample and dataset exist
    if sample.source_dataset_id is not None:
      checkExisting = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id)
      if checkExisting.is_empty():
        raise IndexError("No dataset with such index: %d"%sample.source_dataset_id)
    if sample.source_sample_id is not None:
      checkExisting = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id)
      if checkExisting.is_empty():
        raise IndexError("No sample with such index: %d"%sample.source_sample_id)
    # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order
    if sample.nevents_processed is None and sample.source_sample_id is not None:
      sample.nevents_processed = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id).one().nevents_processed
    if sample.nevents_processed is None and sample.source_dataset_id is not None:
      sample.nevents_processed = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).one().nevents
    if sample.nevents_processed is None:
      print "Warning: Number of processed events not given, and no way to guess it."
    # check that there is no existing entry
    checkExisting = dbstore.find(Sample,Sample.name==sample.name)
    if checkExisting.is_empty():
      print sample
      if confirm(prompt="Insert into the database?", resp=True):
        dbstore.add(sample)
        # compute the luminosity, if possible
        if sample.luminosity is None:
          dbstore.flush()
          sample.luminosity = sample.getLuminosity()
    else:
      existing = checkExisting.one()
      prompt  = "Replace existing "
      prompt += str(existing)
      prompt += "\nby new "
      prompt += str(sample)
      prompt += "\n?"
      if confirm(prompt, resp=False):
        existing.replaceBy(sample)
        if existing.luminosity is None:
          dbstore.flush()
          existing.luminosity = existing.getLuminosity()
    # commit
    dbstore.commit()
def main(crabUsername, ingridUsername):
    dbstore = DbStore()

    print "##### Get the list of potential DB samples of interest"
    list_allDBsamples = []
    results = dbstore.find(Sample)
    for r in results:
        if r.author is None:
            continue
        for f in r.files:
            if crabUsername in f.lfn:
                p = '/storage/data/cms' + re.sub('/output.*root', '', f.lfn)
                if p not in list_allDBsamples:
                    list_allDBsamples.append(p)
        if crabUsername in r.path or ingridUsername in r.author:
            if r.path == '':
                continue
            if r.path not in list_allDBsamples:
                list_allDBsamples.append(r.path)
#            print r.path
    print ""

    storageDir = join('/storage/data/cms/store/user/', crabUsername)
    print "##### Get the list of user paths in %s" % storageDir

    list_allUserDirs = {}
    currentTime = dt.datetime.now()
    tcut = getDateMinusT(currentTime, month = 1)
    for d in listdir(storageDir):
        if not isdir(join(storageDir, d)):
            continue
        if 'CRAB_PrivateMC' in d or 'testFiles' in d :
            continue
        for subd in listdir(join(storageDir, d)):
            if not isdir(join(storageDir, d, subd)):
                continue
            for taskStamp in listdir(join(storageDir, d, subd)):
                if not isdir(join(storageDir, d, subd, taskStamp)):
                    continue
                try:
                    ttask = int(taskStamp.replace('_', ''))
                except ValueError:
                    print("Warning: could not interpret path {}, skipping it...".format(taskStamp))
                    continue
                if ttask >= tcut:
                    continue
                for taskID in listdir(join(storageDir, d, subd, taskStamp)):
                    if not isdir(join(storageDir, d, subd, taskStamp, taskID)):
                        continue
                    myPath = join(storageDir, d, subd, taskStamp, taskID)
                    if myPath in list_allDBsamples:
                        continue
#                    print isFramework(myPath), myPath
                    try:
                        mySize = subprocess.check_output(["du", '-s', myPath]).split()[0].decode('utf-8')
                    except subprocess.CalledProcessError:
                        print("Error while accessing file in path {}, skipping it!".format(myPath))
                        continue
                    list_allUserDirs[ttask] = {'path': myPath, 'size': int(mySize) * 1024, 'is CP3-llbb': isFramework(myPath)}

    print '# Tasks older than 6 months'
    print '# timestamp= ', getDateMinusT(currentTime, month = 6)
    totalSize = 0
    finalprint = ''
    for t in list_allUserDirs:
        if t < getDateMinusT(currentTime, month = 6) and list_allUserDirs[t]['is CP3-llbb']:
            totalSize += list_allUserDirs[t]['size']
            finalprint += "# size= %s\nrm -r %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path'])
    print '# totalSize= ', sizeof_fmt(totalSize)
    print finalprint

    print '# Tasks between 3 and 6 months old'
    print '# timestamp= ', getDateMinusT(currentTime, month = 3)
    totalSize = 0
    finalprint = ''
    for t in list_allUserDirs:
        if getDateMinusT(currentTime, month = 6) < t < getDateMinusT(currentTime, month = 3) and list_allUserDirs[t]['is CP3-llbb']:
            totalSize += list_allUserDirs[t]['size']
            finalprint += "# size= %s\nrm -r %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path'])
    print '# totalSize= ', sizeof_fmt(totalSize)
    print finalprint

    print '# Tasks between 1 and 3 months old'
    print '# timestamp= ', getDateMinusT(currentTime, month = 1)
    totalSize = 0
    finalprint = ''
    for t in list_allUserDirs:
        if getDateMinusT(currentTime, month = 3) < t < getDateMinusT(currentTime, month = 1) and list_allUserDirs[t]['is CP3-llbb']:
            totalSize += list_allUserDirs[t]['size']
            finalprint += "# size= %s\nrm -r %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path'])
    print '# totalSize= ', sizeof_fmt(totalSize)
    print finalprint

    print '# The following tasks could not be asserted to be cp3_llbb framework tasks or not... deal with them as you see fit:'
    totalSize = 0
    finalprint = ''
    for t in list_allUserDirs:
        if not list_allUserDirs[t]['is CP3-llbb']:
            totalSize += list_allUserDirs[t]['size']
            finalprint += "# size= %s\tpath= %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path'])
    print '# totalSize= ', sizeof_fmt(totalSize)
    print finalprint
Пример #25
0
def get_dataset(inputDataset):
    dbstore = DbStore()
    resultset = dbstore.find(Dataset, Dataset.name == inputDataset)
    return list(
        resultset.values(Dataset.name, Dataset.dataset_id, Dataset.nevents))
Пример #26
0
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
    sample.extras_event_weight_sum = unicode(json.dumps(extras_sumw, separators=(',', ':')))
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Пример #27
0
def get_sample(sample):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.name==sample)
    return list(resultset.values(Sample.sample_id))
def main(crabUsername, ingridUsername, DEBUG = False, evaluateSize = False):
    if DEBUG:
        print "RUNNING IN DEBUG MODE"
        print "Nothing will be deleted\n"

    dbstore = DbStore()

    print "##### Get the list of potential DB samples of interest"
    list_allDBsamples = []
    results = dbstore.find(Sample)
    for r in results:
        if crabUsername in r.path:
            list_allDBsamples.append([r.name, r.source_dataset_id])
    print ""

    print "##### Get the list of existing productions"
    # before anything else: get the list of tags to not touch
    whitelist = requests.get('https://raw.githubusercontent.com/cp3-llbb/GridIn/master/data/SAMADhi_doNOTdelete_whitelist.json').json()
    if DEBUG:
        print "production whitelist= ", whitelist
    list_all_productions = []
    for i, s in enumerate(list_allDBsamples):
        s_name, s_id = s
        isProdAlreadyListed = False
        isSampleProtected = False
        for FWtag, Anatag in list_all_productions:
            if FWtag in str(s_name) and Anatag in str(s_name):
#                print "This prod is already in the list, FWtag= ", FWtag, "Anatag= ", Anatag
                isProdAlreadyListed = True
                break
        if isProdAlreadyListed:
            continue
        tags = str(s_name)
        # Get the tags: 
        # First of all: check if the sample is protected or not
        for ana in whitelist:
            part = str(ana)
            for protectedtag in whitelist[ana]:
                t = str(protectedtag).split('_%s_' % part)
                if t[0] in tags and t[1] in tags:
                    if DEBUG:
                        print '\tSkipping whitelisted sample %s' % s_name
                    isSampleProtected = True
            if not isSampleProtected:
                tags = tags.replace(part, '') # remove HHAnalyzer and the like from the name of the sample
        if isSampleProtected:
            continue
        # now extract the fw and analyzer tags
        # for analyzer, this is always the last part of the sample name so we don't have to worry about naming conventions there (fortunately)
        tags = tags.split('_')
        Anatag = tags[-1]
        tags = tags[:-1]
        # for FW the following regex should work ((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})
        # it matches either:
        #   - a framework tag (possibly with a final X): v1.2.0+7415
        #   - possibly followed by a number of commits and a 'g' plus 7 to 40 characters git hash: v1.2.0+7415-79-ga5b16ff
        #   - or alternatively a 7 to 40 characters git hash: f2f0a44
        tags = [x for x in tags if re.match('((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})', x)]
        if DEBUG:
            print tags, Anatag
        if len(tags) != 1:
            print "ERROR, there are spurious things in the sample name, please figure out what is happening:"
            print "FWtags= ", tags
            return 1
        FWtag = tags[0]
        list_all_productions.append([FWtag, Anatag])

    for i, p in enumerate(list_all_productions):
        if DEBUG and i > 0:
            break
        FWtag, Anatag = p

        extrastring = ''
        if not evaluateSize:
            extrastring = '(evaluation of the disk size is OFF by default)'
        print "\n##### Now looking at prod FWtag= ", FWtag, 'Anatag= ', Anatag, 'and list the associated folders %s' % extrastring
        totalSize = 0
        totalSamples = 0
        cannotManageToDeleteThisProd = False
        for s_name, s_id in list_allDBsamples:
            if FWtag in str(s_name) and Anatag in str(s_name):
                result = dbstore.find(Sample, Sample.name == s_name)
                s = result.one()
                if evaluateSize:
                    totalSize += int(subprocess.check_output(["du", '-s', str(s.path)]).split()[0].decode('utf-8'))
                totalSamples += 1
                if s.source_sample is not None:
                    print "WARNING, the sample", s_name, "depend on another sample, aborting now"
                    cannotManageToDeleteThisProd = True
                    break
                if s.derived_samples.count() > 0:
                    print "WARNING, the sample", s_name, "has derived samples, aborting now"
                    cannotManageToDeleteThisProd = True
                    break
                if s.results.count() > 0:
                    print "WARNING, the sample", s_name, "has derived results, aborting now"
                    cannotManageToDeleteThisProd = True
                    break
                print s.path
        if cannotManageToDeleteThisProd:
            continue

        print '\tFWtag= ', FWtag, 'Anatag= ', Anatag, 'totalSamples= ', totalSamples, 'totalSize= ', totalSize, "(%s)" % sizeof_fmt(totalSize)
        if confirm(prompt='\tDo you REALLY want to DELETE this prod from disk and from SAMADhi?', resp=False):
            for s_name, s_id in list_allDBsamples:
                if FWtag in str(s_name) and Anatag in str(s_name):
                    result = dbstore.find(Sample, Sample.name == s_name)
                    s = result.one()
                    if DEBUG:
                        print 'rm -r %s' % s.path
                        print 'rm -r %s' % str(s.path).rsplit('/0000', 1)[0]
                        print 'dbstore.remove()'
                    else:
                        try:
                            shutil.rmtree(s.path)
                            shutil.rmtree(str(s.path).rsplit('/0000', 1)[0])
                        except OSError:
                            print "Seems we have a buggy path: %s" % s.path
                            print "deleting the DB entry then moving on..."
                        dbstore.remove(s)
                        dbstore.commit()
Пример #29
0
def add_sample(NAME,
               localpath,
               type,
               nevents,
               nselected,
               AnaUrl,
               FWUrl,
               dataset_id,
               sumw,
               extras_sumw,
               has_job_processed_everything,
               dataset_nevents,
               files,
               processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type),
                        nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
    sample.extras_event_weight_sum = unicode(
        json.dumps(extras_sumw, separators=(',', ':')))
    sample.code_version = unicode(
        AnaUrl + ' ' +
        FWUrl)  #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " +
                                      str(nevents) + "/" +
                                      str(dataset_nevents) +
                                      " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Пример #30
0
def get_sample(name):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.name == name)
    return resultset.one()
Пример #31
0
def get_sample(iSample):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.sample_id == iSample)
    return list(resultset.values(Sample.name, Sample.path))
Пример #32
0
)

CMSSW_BASE = os.environ['CMSSW_BASE']
SCRAM_ARCH = os.environ['SCRAM_ARCH']
sys.path.append(os.path.join(CMSSW_BASE, 'bin', SCRAM_ARCH))
from SAMADhi import Dataset, Sample, DbStore

import inspect
scriptDir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.append(scriptDir)

from sampleList import samples_dict, number_of_bases, analysis_tags

# Connect to the database
dbstore = DbStore()


def build_sample_name(name, tag):
    return "{}*{}".format(name, tag)


def get_sample_ids_from_name(name):
    results = dbstore.find(Sample,
                           Sample.name.like(unicode(name.replace('*', '%'))))

    if results.count() == 0:
        return None

    if results.count() > 1:
        print(
Пример #33
0
 def get_sample(self, name, tag):
     dbstore = DbStore()
     resultset = dbstore.find(Sample,
                              Sample.name.like(unicode(name + "%_" + tag)))
     return resultset.one()
Пример #34
0
def get_dataset(inputDataset):
    dbstore = DbStore()
    resultset = dbstore.find(Dataset, Dataset.name==inputDataset)
    return list(resultset.values(Dataset.name, Dataset.dataset_id, Dataset.nevents))
Пример #35
0
def main():
    """Main function"""
    # get the options
    optmgr  = DASOptionParser()
    opts    = optmgr.get_opt()
    host    = opts.host
    debug   = opts.verbose
    sample  = opts.sample
    query1  = "dataset="+sample+" | grep dataset.name, dataset.nevents, dataset.size, dataset.tag, dataset.datatype, dataset.creation_time"
    query2  = "release dataset="+sample+" | grep release.name"
    idx     = opts.idx
    thr     = opts.threshold
    ckey    = opts.ckey
    cert    = opts.cert
    das_h   = opts.das_headers
    # perform the DAS queries
    jsondict1 = get_data(host, query1, idx, 1, debug, thr, ckey, cert, das_h)
    jsondict2 = get_data(host, query2, idx, 1, debug, thr, ckey, cert, das_h)
    # check the result
    if len(jsondict1)>1: print "Error: more than one element in jsondict1..."
    tmp = [{u'dataset' : [{}]},]
    for i in range(0,len(jsondict1[0]["dataset"])):
        if jsondict1[0]["dataset"][i]["name"]==sample:
            for key in jsondict1[0]["dataset"][i]:
                tmp[0]["dataset"][0][key] = jsondict1[0]["dataset"][i][key]
    if not "tag" in tmp[0]["dataset"][0]:
        print "global tag not found: looks to be always the case now, value will be 'None'"
        tmp[0]["dataset"][0][u'tag']=None 
    print "****das query:", tmp
    jsondict1 = tmp
    if not(isinstance(jsondict1, list) and
           len(jsondict1)==1 and
           isinstance(jsondict1[0], dict) and
           isinstance(jsondict1[0]["dataset"],list) and
           len(jsondict1[0]["dataset"])==1 and
           isinstance(jsondict1[0]["dataset"][0],dict) and
           isinstance(jsondict2, list) and
           len(jsondict2)==1 and
           isinstance(jsondict2[0], dict) and
           isinstance(jsondict2[0]["release"],list) and
           len(jsondict2[0]["release"])==1 and
           isinstance(jsondict2[0]["release"][0],dict)):
      raise RuntimeError("Incorrect response from DAS:\n"+str(jsondict1)+"\n"+str(jsondict2))
    # prepare the summary json object
    jsondict1[0]["dataset"][0][u"release"] = jsondict2[0]["release"][0]["name"]
    jsondict1[0]["dataset"][0].update({ u"process":unicode(opts.process), 
                                        u"xsection":opts.xsection, u"energy":opts.energy, 
                                        u"comment":unicode(opts.comment) })
    # convert the jsondict into a Dataset
    dataset = asDataset(jsondict1[0]["dataset"][0])
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # check that there is no existing entry
    checkExisting = dbstore.find(Dataset,Dataset.name==dataset.name)
    if checkExisting.is_empty():
      print dataset
      if confirm(prompt="Insert into the database?", resp=True):
        dbstore.add(dataset)
    else:
      existing = checkExisting.one()
      prompt  = "Replace existing entry:\n"
      prompt += str(existing)
      prompt += "\nby new entry:\n"
      prompt += str(dataset)
      prompt += "\n?"
      if confirm(prompt, resp=False):
        existing.replaceBy(dataset)
    # commit
    dbstore.commit()
Пример #36
0
def get_sample(sample):
    dbstore = DbStore()
    resultset = dbstore.find(Sample, Sample.name == sample)
    return list(resultset.values(Sample.sample_id))
Пример #37
0
def add_sample(NAME,
               localpath,
               type,
               nevents,
               nselected,
               AnaUrl,
               FWUrl,
               dataset_id,
               sumw,
               has_job_processed_everything,
               dataset_nevents,
               files,
               processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type),
                        nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
    #    sample.luminosity  = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs
    sample.code_version = unicode(
        AnaUrl + ' ' +
        FWUrl)  #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " +
                                      str(nevents) + "/" +
                                      str(dataset_nevents) +
                                      " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        import json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        if confirm(prompt="Insert into the database?", resp=True):
            dbstore.commit()
            return

    else:
        sample.luminosity = sample.getLuminosity()
        prompt = "A sample with the same name already exists in the database. Replace by:\n"
        prompt += str(sample)
        prompt += "\n?"
        if confirm(prompt, resp=False):
            dbstore.commit()
            return

    # rollback
    dbstore.rollback()
def main(crabUsername, ingridUsername, DEBUG=False, evaluateSize=False):
    if DEBUG:
        print "RUNNING IN DEBUG MODE"
        print "Nothing will be deleted\n"

    dbstore = DbStore()

    print "##### Get the list of potential DB samples of interest"
    list_allDBsamples = []
    results = dbstore.find(Sample)
    for r in results:
        if crabUsername in r.path:
            list_allDBsamples.append([r.name, r.source_dataset_id])
    print ""

    print "##### Get the list of existing productions"
    # before anything else: get the list of tags to not touch
    whitelist = requests.get(
        'https://raw.githubusercontent.com/cp3-llbb/GridIn/master/data/SAMADhi_doNOTdelete_whitelist.json'
    ).json()
    if DEBUG:
        print "production whitelist= ", whitelist
    list_all_productions = []
    for i, s in enumerate(list_allDBsamples):
        s_name, s_id = s
        isProdAlreadyListed = False
        isSampleProtected = False
        for FWtag, Anatag in list_all_productions:
            if FWtag in str(s_name) and Anatag in str(s_name):
                #                print "This prod is already in the list, FWtag= ", FWtag, "Anatag= ", Anatag
                isProdAlreadyListed = True
                break
        if isProdAlreadyListed:
            continue
        tags = str(s_name)
        # Get the tags:
        # First of all: check if the sample is protected or not
        for ana in whitelist:
            part = str(ana)
            for protectedtag in whitelist[ana]:
                t = str(protectedtag).split('_%s_' % part)
                if t[0] in tags and t[1] in tags:
                    if DEBUG:
                        print '\tSkipping whitelisted sample %s' % s_name
                    isSampleProtected = True
            if not isSampleProtected:
                tags = tags.replace(
                    part, ''
                )  # remove HHAnalyzer and the like from the name of the sample
        if isSampleProtected:
            continue
        # now extract the fw and analyzer tags
        # for analyzer, this is always the last part of the sample name so we don't have to worry about naming conventions there (fortunately)
        tags = tags.split('_')
        Anatag = tags[-1]
        tags = tags[:-1]
        # for FW the following regex should work ((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})
        # it matches either:
        #   - a framework tag (possibly with a final X): v1.2.0+7415
        #   - possibly followed by a number of commits and a 'g' plus 7 to 40 characters git hash: v1.2.0+7415-79-ga5b16ff
        #   - or alternatively a 7 to 40 characters git hash: f2f0a44
        tags = [
            x for x in tags if re.match(
                '((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})',
                x)
        ]
        if DEBUG:
            print tags, Anatag
        if len(tags) != 1:
            print "ERROR, there are spurious things in the sample name, please figure out what is happening:"
            print "FWtags= ", tags
            return 1
        FWtag = tags[0]
        list_all_productions.append([FWtag, Anatag])

    for i, p in enumerate(list_all_productions):
        if DEBUG and i > 0:
            break
        FWtag, Anatag = p

        extrastring = ''
        if not evaluateSize:
            extrastring = '(evaluation of the disk size is OFF by default)'
        print "\n##### Now looking at prod FWtag= ", FWtag, 'Anatag= ', Anatag, 'and list the associated folders %s' % extrastring
        totalSize = 0
        totalSamples = 0
        cannotManageToDeleteThisProd = False
        for s_name, s_id in list_allDBsamples:
            if FWtag in str(s_name) and Anatag in str(s_name):
                result = dbstore.find(Sample, Sample.name == s_name)
                s = result.one()
                if evaluateSize:
                    totalSize += int(
                        subprocess.check_output(["du", '-s',
                                                 str(s.path)
                                                 ]).split()[0].decode('utf-8'))
                totalSamples += 1
                if s.source_sample is not None:
                    print "WARNING, the sample", s_name, "depend on another sample, aborting now"
                    cannotManageToDeleteThisProd = True
                    break
                if s.derived_samples.count() > 0:
                    print "WARNING, the sample", s_name, "has derived samples, aborting now"
                    cannotManageToDeleteThisProd = True
                    break
                if s.results.count() > 0:
                    print "WARNING, the sample", s_name, "has derived results, aborting now"
                    cannotManageToDeleteThisProd = True
                    break
                print s.path
        if cannotManageToDeleteThisProd:
            continue

        print '\tFWtag= ', FWtag, 'Anatag= ', Anatag, 'totalSamples= ', totalSamples, 'totalSize= ', totalSize, "(%s)" % sizeof_fmt(
            totalSize)
        if confirm(
                prompt=
                '\tDo you REALLY want to DELETE this prod from disk and from SAMADhi?',
                resp=False):
            for s_name, s_id in list_allDBsamples:
                if FWtag in str(s_name) and Anatag in str(s_name):
                    result = dbstore.find(Sample, Sample.name == s_name)
                    s = result.one()
                    if DEBUG:
                        print 'rm -r %s' % s.path
                        print 'rm -r %s' % str(s.path).rsplit('/0000', 1)[0]
                        print 'dbstore.remove()'
                    else:
                        try:
                            shutil.rmtree(s.path)
                            shutil.rmtree(str(s.path).rsplit('/0000', 1)[0])
                        except OSError:
                            print "Seems we have a buggy path: %s" % s.path
                            print "deleting the DB entry then moving on..."
                        dbstore.remove(s)
                        dbstore.commit()
Пример #39
0
 def get_sample(self, name, tag):
     dbstore = DbStore()
     resultset = dbstore.find(Sample, Sample.name.like(unicode(name + "%_" + tag)))
     return resultset.one()