def get_dataset(inputDataset = None, inputID = None): dbstore = DbStore() if inputDataset is not None: resultset = dbstore.find(Dataset, Dataset.name == inputDataset) elif inputID is not None: resultset = dbstore.find(Dataset, Dataset.dataset_id == inputID) return list(resultset.values(Dataset.name, Dataset.dataset_id, Dataset.nevents, Dataset.process))
def get_sample(inputSample = None, inputID = None): dbstore = DbStore() if inputSample is not None: resultset = dbstore.find(Sample, Sample.name == inputSample) elif inputID is not None: resultset = dbstore.find(Sample, Sample.sample_id == inputID) return list(resultset.values(Sample.name, Sample.sample_id, Sample.source_dataset_id, Sample.code_version))
def compute_luminosity(sample, options): print("Computing luminosity for %r") % str(sample.name) lumi = 0 if not options.local: print("Running brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment") print('') cmds = ['brilcalc', 'lumi', '--normtag', '~lumipro/public/normtag_file/OfflineNormtagV2.json', '--output-style', 'csv', '-i', '"%s"' % str(sample.processed_lumi.replace('"', ''))] cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.0.3/bin:$PATH"; ' + ' '.join(cmds) ssh_cmds = ['ssh', '*****@*****.**' % options.username, cmd] brilcalc_result = subprocess.check_output(ssh_cmds) lumi = parse_luminosity_csv(brilcalc_result) else: print("Running brilcalc locally...") # FIXME one day print("Error: running brilcalc locally is not supported for the moment.") return 0 print("Sample luminosity: %.3f /pb" % lumi) print('') store = DbStore() # Update luminosity in the database store.find(Sample, Sample.sample_id == sample.sample_id).set(luminosity = lumi) store.commit() return lumi
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # connect to the MySQL database using default credentials dbstore = DbStore() # check that the LHCO exists and obtain the dataset id check = dbstore.find(Sample,Sample.sample_id==opts.lhco_id) if check.is_empty() or check.one().sampletype != "LHCO": raise IndexError("No LHCO with such index: %d"%opts.lhco_id) opts.dataset = findDataset(check.one()) if opts.dataset is None: raise RuntimeError("Impossible to get the dataset id.") # check that the process exists check = dbstore.find(MadWeight,MadWeight.process_id==opts.process) if check.is_empty(): raise IndexError("No process with such index: %d"%opts.process) # create the MW run object mw_run = MadWeightRun(opts.process,opts.lhco_id) mw_run.systematics = unicode(opts.syst) mw_run.user_comment = unicode(opts.comment) mw_run.version = opts.version if mw_run.version is None: check = dbstore.find(MadWeightRun,(MadWeightRun.madweight_process==mw_run.madweight_process) & (MadWeightRun.lhco_sample_id==mw_run.lhco_sample_id)) if not check.is_empty(): mw_run.version = check.order_by(MadWeightRun.version).last().version + 1 else: mw_run.version = 1 else: check = dbstore.find(MadWeightRun,(MadWeightRun.madweight_process==mw_run.madweight_process) & (MadWeightRun.lhco_sample_id==mw_run.lhco_sample_id) & (MadWeightRun.version==mw_run.version)) if not check.is_empty(): raise RuntimeError("There is already one such MadWeight run with the same version number:\n%s\n"%str(check.one())) # read the file inputfile = open(opts.filepath) count = 0 for line in inputfile: data = line.rstrip('\n').split('\t') # get the event run_number = int(data[0].split('.')[0]) event_number = int(data[0].split('.')[1]) event_query = dbstore.find(Event, (Event.event_number==event_number) & (Event.run_number==run_number) & (Event.dataset_id==opts.dataset)) if event_query.is_empty(): event = Event(event_number,run_number,opts.dataset) else: event = event_query.one() # create the weight weight = Weight() weight.event = event weight.mw_run = mw_run weight.value = float(data[1]) weight.uncertainty = float(data[2]) dbstore.add(weight) count += 1 # confirm and commit print mw_run print "Adding weights to %d events."%count if confirm(prompt="Insert into the database?", resp=True): dbstore.commit()
def get_sample(id=None, name=None): store = DbStore() if (id): result = store.find(Sample, Sample.sample_id == id) else: result = store.find(Sample, Sample.name == unicode(name)) return result.one()
def get_sample(id, name): dbstore = DbStore() if id is not None: result = dbstore.find(Sample, Sample.sample_id == id) elif name is not None: result = dbstore.find(Sample, Sample.name.like(unicode(name.replace('*', '%').replace('?', '_')))) return result.one()
def getSampleFiles(self, iSample): """ Get sample name/lit of sample files from the DB, using the sample ID or name. """ sample = "" dbstore = DbStore() if isinstance(iSample, int): sample = dbstore.find(Sample, Sample.sample_id == iSample).one() elif isinstance(iSample, str): sample = dbstore.find(Sample, Sample.name == unicode(iSample)).one() else: raise Exception("Argument should be sample ID or DB name.") return sample.name, [ "/storage/data/cms/" + str(file.lfn) for file in sample.files ]
def getSample(self, iSample): """ Get sample from the DB, using the sample ID or name. """ sample = "" dbstore = DbStore() if isinstance(iSample, int): sample = dbstore.find(Sample, Sample.sample_id == iSample).one() elif isinstance(iSample, str): sample = dbstore.find(Sample, Sample.name == unicode(iSample)).one() else: raise Exception("Argument should be sample ID or DB name.") return sample
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the result from user input result = Result(unicode(opts.path)) result.description = unicode(opts.desc) result.author = unicode(opts.author) result.creation_time = opts.datetime # connect to the MySQL database using default credentials dbstore = DbStore() # unless the source is set, prompt the user and present a list to make a choice if opts.inputSamples is None: inputSamples = prompt_samples(dbstore) else: inputSamples = parse_samples(opts.inputSamples) # create and store the relations samples = dbstore.find(Sample,Sample.sample_id.is_in(inputSamples)) if samples.is_empty(): dbstore.add(result) else: for sample in samples: sample.results.add(result) print result if confirm(prompt="Insert into the database?", resp=True): dbstore.commit()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # connect to the MySQL database using default credentials dbstore = DbStore() # build the query if opts.objtype == "dataset": objectClass = Dataset objectId = Dataset.dataset_id elif opts.objtype == "sample": objectClass = Sample objectId = Sample.sample_id elif opts.objtype == "madweight": objectClass = MadWeight objectId = MadWeight.process_id else: objectClass = Result objectId = Result.result_id if opts.objid is not None: result = dbstore.find(objectClass, objectId==opts.objid) elif opts.path is not None: result = dbstore.find(objectClass, objectClass.path.like(unicode(opts.path.replace('*', '%').replace('?', '_')))) elif opts.name is not None: result = dbstore.find(objectClass, objectClass.name.like(unicode(opts.name.replace('*', '%').replace('?', '_')))) else: result = dbstore.find(objectClass) result = result.order_by(objectId) # loop and print if opts.longOutput: for entry in result: print entry print "--------------------------------------------------------------------------------------" else: if opts.objtype != "result": data = result.values(objectId, objectClass.name) else: data = result.values(objectId, objectClass.description) for dset in data: print "%i\t%s"%(dset[0], dset[1])
def add_sample(NAME, localpath, type, dataset_nevents, nselected, AnaUrl, FWUrl, dataset_id): # Large part of this imported from SAMADhi add_sample.py sample = Sample(unicode(NAME), unicode(localpath), unicode(type), dataset_nevents) sample.nevents = nselected sample.normalization = 1.0 sample.luminosity = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good # sample.user_comment = sample.source_dataset_id = dataset_id # sample.source_sample_id = None sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) # sample.creation_time = # connect to the MySQL database using default credentials dbstore = DbStore() # check that source dataset exist if dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).is_empty(): raise IndexError("No dataset with such index: %d"%sample.source_dataset_id) # check that there is no existing entry checkExisting = dbstore.find(Sample,Sample.name==sample.name) if checkExisting.is_empty(): print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.add(sample) # compute the luminosity, if possible if sample.luminosity is None: dbstore.flush() sample.luminosity = sample.getLuminosity() else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(sample) if existing.luminosity is None: dbstore.flush() existing.luminosity = existing.getLuminosity() # commit dbstore.commit()
def get_samples(name): store = DbStore() results = store.find(Sample, Sample.name.like(unicode(name.replace('*', '%')))) if results.count() == 0: raise Exception("Could not find any sample matching {}".format(name)) print("Found samples: ") for sample in results: print(sample.name) return results
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw # sample.luminosity = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs sample.code_version = unicode( AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json import json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.commit() return else: sample.luminosity = sample.getLuminosity() prompt = "A sample with the same name already exists in the database. Replace by:\n" prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): dbstore.commit() return # rollback dbstore.rollback()
def get_dataset(inputDataset): dbstore = DbStore() resultset = dbstore.find(Dataset, Dataset.name == inputDataset) return list( resultset.values(Dataset.name, Dataset.dataset_id, Dataset.nevents))
def get_sample(name): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.name == name) return resultset.one()
def main(crabUsername, ingridUsername): dbstore = DbStore() print "##### Get the list of potential DB samples of interest" list_allDBsamples = [] results = dbstore.find(Sample) for r in results: if r.author is None: continue for f in r.files: if crabUsername in f.lfn: p = '/storage/data/cms' + re.sub('/output.*root', '', f.lfn) if p not in list_allDBsamples: list_allDBsamples.append(p) if crabUsername in r.path or ingridUsername in r.author: if r.path == '': continue if r.path not in list_allDBsamples: list_allDBsamples.append(r.path) # print r.path print "" storageDir = join('/storage/data/cms/store/user/', crabUsername) print "##### Get the list of user paths in %s" % storageDir list_allUserDirs = {} currentTime = dt.datetime.now() tcut = getDateMinusT(currentTime, month = 1) for d in listdir(storageDir): if not isdir(join(storageDir, d)): continue if 'CRAB_PrivateMC' in d or 'testFiles' in d : continue for subd in listdir(join(storageDir, d)): if not isdir(join(storageDir, d, subd)): continue for taskStamp in listdir(join(storageDir, d, subd)): if not isdir(join(storageDir, d, subd, taskStamp)): continue try: ttask = int(taskStamp.replace('_', '')) except ValueError: print("Warning: could not interpret path {}, skipping it...".format(taskStamp)) continue if ttask >= tcut: continue for taskID in listdir(join(storageDir, d, subd, taskStamp)): if not isdir(join(storageDir, d, subd, taskStamp, taskID)): continue myPath = join(storageDir, d, subd, taskStamp, taskID) if myPath in list_allDBsamples: continue # print isFramework(myPath), myPath try: mySize = subprocess.check_output(["du", '-s', myPath]).split()[0].decode('utf-8') except subprocess.CalledProcessError: print("Error while accessing file in path {}, skipping it!".format(myPath)) continue list_allUserDirs[ttask] = {'path': myPath, 'size': int(mySize) * 1024, 'is CP3-llbb': isFramework(myPath)} print '# Tasks older than 6 months' print '# timestamp= ', getDateMinusT(currentTime, month = 6) totalSize = 0 finalprint = '' for t in list_allUserDirs: if t < getDateMinusT(currentTime, month = 6) and list_allUserDirs[t]['is CP3-llbb']: totalSize += list_allUserDirs[t]['size'] finalprint += "# size= %s\nrm -r %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path']) print '# totalSize= ', sizeof_fmt(totalSize) print finalprint print '# Tasks between 3 and 6 months old' print '# timestamp= ', getDateMinusT(currentTime, month = 3) totalSize = 0 finalprint = '' for t in list_allUserDirs: if getDateMinusT(currentTime, month = 6) < t < getDateMinusT(currentTime, month = 3) and list_allUserDirs[t]['is CP3-llbb']: totalSize += list_allUserDirs[t]['size'] finalprint += "# size= %s\nrm -r %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path']) print '# totalSize= ', sizeof_fmt(totalSize) print finalprint print '# Tasks between 1 and 3 months old' print '# timestamp= ', getDateMinusT(currentTime, month = 1) totalSize = 0 finalprint = '' for t in list_allUserDirs: if getDateMinusT(currentTime, month = 3) < t < getDateMinusT(currentTime, month = 1) and list_allUserDirs[t]['is CP3-llbb']: totalSize += list_allUserDirs[t]['size'] finalprint += "# size= %s\nrm -r %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path']) print '# totalSize= ', sizeof_fmt(totalSize) print finalprint print '# The following tasks could not be asserted to be cp3_llbb framework tasks or not... deal with them as you see fit:' totalSize = 0 finalprint = '' for t in list_allUserDirs: if not list_allUserDirs[t]['is CP3-llbb']: totalSize += list_allUserDirs[t]['size'] finalprint += "# size= %s\tpath= %s\n" % (sizeof_fmt(list_allUserDirs[t]['size']), list_allUserDirs[t]['path']) print '# totalSize= ', sizeof_fmt(totalSize) print finalprint
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw # sample.luminosity = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json import json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.commit() return else: sample.luminosity = sample.getLuminosity() prompt = "A sample with the same name already exists in the database. Replace by:\n" prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): dbstore.commit() return # rollback dbstore.rollback()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the configuration from user input madweightCfg = MadWeight(unicode(opts.name)) for card in cards: setattr(madweightCfg, card, unicode(open(opts.path+"/Cards/"+card+".dat","r").read())) # get the transfert functions madweightCfg.transfer_fctVersion = unicode(open('%s/Source/MadWeight/transfer_function/Transfer_FctVersion.txt'%opts.path,"r").read().strip('\n')) theCfg = madweightCfg.transfer_fctVersion.split(':')[0] if not os.path.exists("%s/Source/MadWeight/transfer_function/data/TF_%s.dat"%(opts.path,theCfg)): raise RuntimeError("Could not find the transfert functions TF_%s.dat"%theCfg) madweightCfg.transfer_function = unicode(open("%s/Source/MadWeight/transfer_function/data/TF_%s.dat"%(opts.path,theCfg),"r").read()) # find the generate line(s) theCfg = filter(lambda x:x.startswith("generate"),map(lambda x:x.lstrip(' \t'),madweightCfg.proc_card_mg5.splitlines())) if len(theCfg)!=1: raise RuntimeError("Could not find a unique generate statement in proc_card_mg5.dat") madweightCfg.diagram = theCfg[0][8:].lstrip(' \t') # find the ISR correction parameter theCfg = filter(lambda x:x.startswith("isr"),map(lambda x:x.lstrip(' \t'),madweightCfg.MadWeight_card.splitlines())) if len(theCfg)!=1: raise RuntimeError("Could not find a unique isr statement in MadWeight_card.dat") madweightCfg.isr=int(theCfg[0].split(None,2)[1]) # find the NWA configuration parameter theCfg = filter(lambda x:x.startswith("nwa"),map(lambda x:x.lstrip(' \t'),madweightCfg.MadWeight_card.splitlines())) if len(theCfg)!=1: raise RuntimeError("Could not find a unique nwa statement in MadWeight_card.dat") nwa = theCfg[0].split(None,2)[1] if nwa=='F': madweightCfg.nwa=False elif nwa=='T': madweightCfg.nwa=True else: raise RuntimeError("Unrecognized value for the nwa parameter in MadWeight_card.dat: %s"%nwa) # find the beam energy and store cm energy in TeV theCfg = filter(lambda x:"ebeam1" in x,madweightCfg.run_card.splitlines()) try: madweightCfg.cm_energy = float(theCfg[0].split()[0])*0.002 except: print "Cannot find the beam energy in the run card" raise # find and add the Higgs weight (can be null, so no error if missing) theCfg = filter(lambda x:x.startswith("DECAY"),map(lambda x:x.lstrip(' \t'),madweightCfg.param_card_1.splitlines())) for cfg in theCfg: fields = cfg.split() if fields[1]=="25": madweightCfg.higgs_width = float(fields[2]) # connect to the MySQL database using default credentials dbstore = DbStore() # check that there is no existing entry checkExisting = dbstore.find(MadWeight,MadWeight.name==madweightCfg.name) if checkExisting.is_empty(): print madweightCfg if confirm(prompt="Insert into the database?", resp=True): dbstore.add(madweightCfg) else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(madweightCfg) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(madweightCfg) # commit dbstore.commit()
def get_sample(sample): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.name==sample) return list(resultset.values(Sample.sample_id))
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw sample.extras_event_weight_sum = unicode(json.dumps(extras_sumw, separators=(',', ':'))) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the sample from user input sample = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed) sample.nevents = opts.nevents sample.normalization = opts.normalization sample.luminosity = opts.luminosity sample.code_version = unicode(opts.code_version) sample.user_comment = unicode(opts.user_comment) sample.source_dataset_id = opts.source_dataset_id sample.source_sample_id = opts.source_sample_id sample.author = unicode(opts.author) sample.creation_time = opts.datetime # connect to the MySQL database using default credentials dbstore = DbStore() # unless the source is set, prompt the user and present a list to make a choice if sample.source_dataset_id is None: prompt_dataset(sample,dbstore) if sample.source_sample_id is None: prompt_sample(sample,dbstore) # check that source sample and dataset exist if sample.source_dataset_id is not None: checkExisting = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id) if checkExisting.is_empty(): raise IndexError("No dataset with such index: %d"%sample.source_dataset_id) if sample.source_sample_id is not None: checkExisting = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id) if checkExisting.is_empty(): raise IndexError("No sample with such index: %d"%sample.source_sample_id) # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order if sample.nevents_processed is None and sample.source_sample_id is not None: sample.nevents_processed = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id).one().nevents_processed if sample.nevents_processed is None and sample.source_dataset_id is not None: sample.nevents_processed = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).one().nevents if sample.nevents_processed is None: print "Warning: Number of processed events not given, and no way to guess it." # check that there is no existing entry checkExisting = dbstore.find(Sample,Sample.name==sample.name) if checkExisting.is_empty(): print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.add(sample) # compute the luminosity, if possible if sample.luminosity is None: dbstore.flush() sample.luminosity = sample.getLuminosity() else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(sample) if existing.luminosity is None: dbstore.flush() existing.luminosity = existing.getLuminosity() # commit dbstore.commit()
def get_sample(iSample): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.sample_id == iSample) return resultset.one()
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment): # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id' dbstore = DbStore() sample = None # check that source dataset exist # Skip: should exist, the check has been done before calling this function # check that there is no existing entry update = False localpath = '' nevents = 0 checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) # collecting contents sample.nevents_processed = 0 sample.nevents = 0 sample.normalization = 1 sample.event_weight_sum = 0 extras_event_weight_sum = {} dataset_nevents = 0 processed_lumi = LumiList() for i, s in enumerate(samples): if i == 0: sample.source_dataset_id = s['dataset_id'] sample.source_sample_id = s['sample_id'] results = dbstore.find(Sample, Sample.sample_id == s['sample_id']) # Should exist, the check has been done before calling this function sample.nevents_processed += results[0].nevents_processed sample.nevents += results[0].nevents sample.event_weight_sum += results[0].event_weight_sum extra_sumw = results[0].extras_event_weight_sum if extra_sumw is not None: extra_sumw = json.loads(extra_sumw) for key in extra_sumw: try: extras_event_weight_sum[key] += extra_sumw[key] except KeyError: extras_event_weight_sum[key] = extra_sumw[key] tmp_processed_lumi = results[0].processed_lumi if tmp_processed_lumi is not None: tmp_processed_lumi = json.loads( tmp_processed_lumi ) processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi) # Get info from file table results = dbstore.find(File, File.sample_id == s['sample_id']) for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)): f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents) sample.files.add(f) # Get info from parent datasets results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id']) dataset_nevents += results[0].nevents if len(extras_event_weight_sum) > 0: sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum)) if len(processed_lumi.getCompactList()) > 0: sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList())) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if sample.nevents_processed != dataset_nevents: sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment) else: sample.user_comment = unicode(comment) sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def get_sample(self, name, tag): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.name.like(unicode(name + "%_" + tag))) return resultset.one()
def main(crabUsername, ingridUsername, DEBUG = False, evaluateSize = False): if DEBUG: print "RUNNING IN DEBUG MODE" print "Nothing will be deleted\n" dbstore = DbStore() print "##### Get the list of potential DB samples of interest" list_allDBsamples = [] results = dbstore.find(Sample) for r in results: if crabUsername in r.path: list_allDBsamples.append([r.name, r.source_dataset_id]) print "" print "##### Get the list of existing productions" # before anything else: get the list of tags to not touch whitelist = requests.get('https://raw.githubusercontent.com/cp3-llbb/GridIn/master/data/SAMADhi_doNOTdelete_whitelist.json').json() if DEBUG: print "production whitelist= ", whitelist list_all_productions = [] for i, s in enumerate(list_allDBsamples): s_name, s_id = s isProdAlreadyListed = False isSampleProtected = False for FWtag, Anatag in list_all_productions: if FWtag in str(s_name) and Anatag in str(s_name): # print "This prod is already in the list, FWtag= ", FWtag, "Anatag= ", Anatag isProdAlreadyListed = True break if isProdAlreadyListed: continue tags = str(s_name) # Get the tags: # First of all: check if the sample is protected or not for ana in whitelist: part = str(ana) for protectedtag in whitelist[ana]: t = str(protectedtag).split('_%s_' % part) if t[0] in tags and t[1] in tags: if DEBUG: print '\tSkipping whitelisted sample %s' % s_name isSampleProtected = True if not isSampleProtected: tags = tags.replace(part, '') # remove HHAnalyzer and the like from the name of the sample if isSampleProtected: continue # now extract the fw and analyzer tags # for analyzer, this is always the last part of the sample name so we don't have to worry about naming conventions there (fortunately) tags = tags.split('_') Anatag = tags[-1] tags = tags[:-1] # for FW the following regex should work ((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40}) # it matches either: # - a framework tag (possibly with a final X): v1.2.0+7415 # - possibly followed by a number of commits and a 'g' plus 7 to 40 characters git hash: v1.2.0+7415-79-ga5b16ff # - or alternatively a 7 to 40 characters git hash: f2f0a44 tags = [x for x in tags if re.match('((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})', x)] if DEBUG: print tags, Anatag if len(tags) != 1: print "ERROR, there are spurious things in the sample name, please figure out what is happening:" print "FWtags= ", tags return 1 FWtag = tags[0] list_all_productions.append([FWtag, Anatag]) for i, p in enumerate(list_all_productions): if DEBUG and i > 0: break FWtag, Anatag = p extrastring = '' if not evaluateSize: extrastring = '(evaluation of the disk size is OFF by default)' print "\n##### Now looking at prod FWtag= ", FWtag, 'Anatag= ', Anatag, 'and list the associated folders %s' % extrastring totalSize = 0 totalSamples = 0 cannotManageToDeleteThisProd = False for s_name, s_id in list_allDBsamples: if FWtag in str(s_name) and Anatag in str(s_name): result = dbstore.find(Sample, Sample.name == s_name) s = result.one() if evaluateSize: totalSize += int(subprocess.check_output(["du", '-s', str(s.path)]).split()[0].decode('utf-8')) totalSamples += 1 if s.source_sample is not None: print "WARNING, the sample", s_name, "depend on another sample, aborting now" cannotManageToDeleteThisProd = True break if s.derived_samples.count() > 0: print "WARNING, the sample", s_name, "has derived samples, aborting now" cannotManageToDeleteThisProd = True break if s.results.count() > 0: print "WARNING, the sample", s_name, "has derived results, aborting now" cannotManageToDeleteThisProd = True break print s.path if cannotManageToDeleteThisProd: continue print '\tFWtag= ', FWtag, 'Anatag= ', Anatag, 'totalSamples= ', totalSamples, 'totalSize= ', totalSize, "(%s)" % sizeof_fmt(totalSize) if confirm(prompt='\tDo you REALLY want to DELETE this prod from disk and from SAMADhi?', resp=False): for s_name, s_id in list_allDBsamples: if FWtag in str(s_name) and Anatag in str(s_name): result = dbstore.find(Sample, Sample.name == s_name) s = result.one() if DEBUG: print 'rm -r %s' % s.path print 'rm -r %s' % str(s.path).rsplit('/0000', 1)[0] print 'dbstore.remove()' else: try: shutil.rmtree(s.path) shutil.rmtree(str(s.path).rsplit('/0000', 1)[0]) except OSError: print "Seems we have a buggy path: %s" % s.path print "deleting the DB entry then moving on..." dbstore.remove(s) dbstore.commit()
def get_sample(inputSample): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.name==inputSample) return list(resultset.values(Sample.path, Sample.normalization, Sample.nevents_processed))
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw sample.extras_event_weight_sum = unicode( json.dumps(extras_sumw, separators=(',', ':'))) sample.code_version = unicode( AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def get_dataset(inputDataset): dbstore = DbStore() resultset = dbstore.find(Dataset, Dataset.name==inputDataset) return list(resultset.values(Dataset.name, Dataset.dataset_id, Dataset.nevents))
def get_sample(iSample): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.sample_id == iSample) return list(resultset.values(Sample.name, Sample.path))
def get_sample(sample): dbstore = DbStore() resultset = dbstore.find(Sample, Sample.name == sample) return list(resultset.values(Sample.sample_id))
def main(crabUsername, ingridUsername, DEBUG=False, evaluateSize=False): if DEBUG: print "RUNNING IN DEBUG MODE" print "Nothing will be deleted\n" dbstore = DbStore() print "##### Get the list of potential DB samples of interest" list_allDBsamples = [] results = dbstore.find(Sample) for r in results: if crabUsername in r.path: list_allDBsamples.append([r.name, r.source_dataset_id]) print "" print "##### Get the list of existing productions" # before anything else: get the list of tags to not touch whitelist = requests.get( 'https://raw.githubusercontent.com/cp3-llbb/GridIn/master/data/SAMADhi_doNOTdelete_whitelist.json' ).json() if DEBUG: print "production whitelist= ", whitelist list_all_productions = [] for i, s in enumerate(list_allDBsamples): s_name, s_id = s isProdAlreadyListed = False isSampleProtected = False for FWtag, Anatag in list_all_productions: if FWtag in str(s_name) and Anatag in str(s_name): # print "This prod is already in the list, FWtag= ", FWtag, "Anatag= ", Anatag isProdAlreadyListed = True break if isProdAlreadyListed: continue tags = str(s_name) # Get the tags: # First of all: check if the sample is protected or not for ana in whitelist: part = str(ana) for protectedtag in whitelist[ana]: t = str(protectedtag).split('_%s_' % part) if t[0] in tags and t[1] in tags: if DEBUG: print '\tSkipping whitelisted sample %s' % s_name isSampleProtected = True if not isSampleProtected: tags = tags.replace( part, '' ) # remove HHAnalyzer and the like from the name of the sample if isSampleProtected: continue # now extract the fw and analyzer tags # for analyzer, this is always the last part of the sample name so we don't have to worry about naming conventions there (fortunately) tags = tags.split('_') Anatag = tags[-1] tags = tags[:-1] # for FW the following regex should work ((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40}) # it matches either: # - a framework tag (possibly with a final X): v1.2.0+7415 # - possibly followed by a number of commits and a 'g' plus 7 to 40 characters git hash: v1.2.0+7415-79-ga5b16ff # - or alternatively a 7 to 40 characters git hash: f2f0a44 tags = [ x for x in tags if re.match( '((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})', x) ] if DEBUG: print tags, Anatag if len(tags) != 1: print "ERROR, there are spurious things in the sample name, please figure out what is happening:" print "FWtags= ", tags return 1 FWtag = tags[0] list_all_productions.append([FWtag, Anatag]) for i, p in enumerate(list_all_productions): if DEBUG and i > 0: break FWtag, Anatag = p extrastring = '' if not evaluateSize: extrastring = '(evaluation of the disk size is OFF by default)' print "\n##### Now looking at prod FWtag= ", FWtag, 'Anatag= ', Anatag, 'and list the associated folders %s' % extrastring totalSize = 0 totalSamples = 0 cannotManageToDeleteThisProd = False for s_name, s_id in list_allDBsamples: if FWtag in str(s_name) and Anatag in str(s_name): result = dbstore.find(Sample, Sample.name == s_name) s = result.one() if evaluateSize: totalSize += int( subprocess.check_output(["du", '-s', str(s.path) ]).split()[0].decode('utf-8')) totalSamples += 1 if s.source_sample is not None: print "WARNING, the sample", s_name, "depend on another sample, aborting now" cannotManageToDeleteThisProd = True break if s.derived_samples.count() > 0: print "WARNING, the sample", s_name, "has derived samples, aborting now" cannotManageToDeleteThisProd = True break if s.results.count() > 0: print "WARNING, the sample", s_name, "has derived results, aborting now" cannotManageToDeleteThisProd = True break print s.path if cannotManageToDeleteThisProd: continue print '\tFWtag= ', FWtag, 'Anatag= ', Anatag, 'totalSamples= ', totalSamples, 'totalSize= ', totalSize, "(%s)" % sizeof_fmt( totalSize) if confirm( prompt= '\tDo you REALLY want to DELETE this prod from disk and from SAMADhi?', resp=False): for s_name, s_id in list_allDBsamples: if FWtag in str(s_name) and Anatag in str(s_name): result = dbstore.find(Sample, Sample.name == s_name) s = result.one() if DEBUG: print 'rm -r %s' % s.path print 'rm -r %s' % str(s.path).rsplit('/0000', 1)[0] print 'dbstore.remove()' else: try: shutil.rmtree(s.path) shutil.rmtree(str(s.path).rsplit('/0000', 1)[0]) except OSError: print "Seems we have a buggy path: %s" % s.path print "deleting the DB entry then moving on..." dbstore.remove(s) dbstore.commit()
def main(): """Main function""" # get the options optmgr = DASOptionParser() opts = optmgr.get_opt() host = opts.host debug = opts.verbose sample = opts.sample query1 = "dataset="+sample+" | grep dataset.name, dataset.nevents, dataset.size, dataset.tag, dataset.datatype, dataset.creation_time" query2 = "release dataset="+sample+" | grep release.name" idx = opts.idx thr = opts.threshold ckey = opts.ckey cert = opts.cert das_h = opts.das_headers # perform the DAS queries jsondict1 = get_data(host, query1, idx, 1, debug, thr, ckey, cert, das_h) jsondict2 = get_data(host, query2, idx, 1, debug, thr, ckey, cert, das_h) # check the result if len(jsondict1)>1: print "Error: more than one element in jsondict1..." tmp = [{u'dataset' : [{}]},] for i in range(0,len(jsondict1[0]["dataset"])): if jsondict1[0]["dataset"][i]["name"]==sample: for key in jsondict1[0]["dataset"][i]: tmp[0]["dataset"][0][key] = jsondict1[0]["dataset"][i][key] if not "tag" in tmp[0]["dataset"][0]: print "global tag not found: looks to be always the case now, value will be 'None'" tmp[0]["dataset"][0][u'tag']=None print "****das query:", tmp jsondict1 = tmp if not(isinstance(jsondict1, list) and len(jsondict1)==1 and isinstance(jsondict1[0], dict) and isinstance(jsondict1[0]["dataset"],list) and len(jsondict1[0]["dataset"])==1 and isinstance(jsondict1[0]["dataset"][0],dict) and isinstance(jsondict2, list) and len(jsondict2)==1 and isinstance(jsondict2[0], dict) and isinstance(jsondict2[0]["release"],list) and len(jsondict2[0]["release"])==1 and isinstance(jsondict2[0]["release"][0],dict)): raise RuntimeError("Incorrect response from DAS:\n"+str(jsondict1)+"\n"+str(jsondict2)) # prepare the summary json object jsondict1[0]["dataset"][0][u"release"] = jsondict2[0]["release"][0]["name"] jsondict1[0]["dataset"][0].update({ u"process":unicode(opts.process), u"xsection":opts.xsection, u"energy":opts.energy, u"comment":unicode(opts.comment) }) # convert the jsondict into a Dataset dataset = asDataset(jsondict1[0]["dataset"][0]) # connect to the MySQL database using default credentials dbstore = DbStore() # check that there is no existing entry checkExisting = dbstore.find(Dataset,Dataset.name==dataset.name) if checkExisting.is_empty(): print dataset if confirm(prompt="Insert into the database?", resp=True): dbstore.add(dataset) else: existing = checkExisting.one() prompt = "Replace existing entry:\n" prompt += str(existing) prompt += "\nby new entry:\n" prompt += str(dataset) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(dataset) # commit dbstore.commit()