def compute_luminosity(sample, options): print("Computing luminosity for %r") % str(sample.name) lumi = 0 if not options.local: print("Running brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment") print('') cmds = ['brilcalc', 'lumi', '--normtag', options.normtag, '--output-style', 'csv', '-i', '"%s"' % str(sample.processed_lumi.replace('"', ''))] cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; ' + ' '.join(cmds) ssh_cmds = ['ssh', '*****@*****.**' % options.username, cmd] brilcalc_result = subprocess.check_output(ssh_cmds) lumi = parse_luminosity_csv(brilcalc_result) else: print("Running brilcalc locally...") # FIXME one day print("Error: running brilcalc locally is not supported for the moment.") return 0 print("Sample luminosity: %.3f /pb" % lumi) print('') store = DbStore() # Update luminosity in the database store.find(Sample, Sample.sample_id == sample.sample_id).set(luminosity = lumi) store.commit() return lumi
class StoreCleaner(): """ handle to the db store, with basic facilities to cleanup entries """ def __init__(self): self.dbstore = DbStore() def deleteSample(self,sample_id): store = self.dbstore # first remove the files associated with the sample files = store.find(SFile,SFile.sample_id==sample_id) for sampleFile in files: store.remove(sampleFile) # then remove the sample sample = store.find(Sample,Sample.sample_id==sample_id).one() print("deleting sample %d"%sample_id) store.remove(sample) def deleteDataset(self,dataset_id): store = self.dbstore # simply delete the dataset dataset = store.find(Dataset,Dataset.dataset_id==dataset_id).one() print("deleting dataset %d"%dataset_id) store.remove(dataset) def commit(self): self.dbstore.commit() def rollback(self): self.dbstore.rollback()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the result from user input result = Result(unicode(opts.path)) result.description = unicode(opts.desc) result.author = unicode(opts.author) result.creation_time = opts.datetime result.elog = unicode(opts.elog) result.analysis_id = opts.ana # connect to the MySQL database using default credentials dbstore = DbStore() # unless the source is set, prompt the user and present a list to make a choice if opts.inputSamples is None: inputSamples = prompt_samples(dbstore) else: inputSamples = parse_samples(opts.inputSamples) # create and store the relations samples = dbstore.find(Sample, Sample.sample_id.is_in(inputSamples)) if samples.is_empty(): dbstore.add(result) else: for sample in samples: sample.results.add(result) # flush (populates the analysis if needed) dbstore.flush() # print the resulting object and ask for confirmation print result if confirm(prompt="Insert into the database?", resp=True): dbstore.commit()
def compute_luminosity(sample, options): print("Computing luminosity for %r") % str(sample.name) lumi = 0 if not options.local: print( "Running brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment" ) print('') cmds = [ 'brilcalc', 'lumi', '--normtag', options.normtag, '--output-style', 'csv', '-i', '"%s"' % str(sample.processed_lumi.replace('"', '')) ] cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; ' + ' '.join( cmds) ssh_cmds = ['ssh', '*****@*****.**' % options.username, cmd] brilcalc_result = subprocess.check_output(ssh_cmds) lumi = parse_luminosity_csv(brilcalc_result) else: print("Running brilcalc locally...") # FIXME one day print( "Error: running brilcalc locally is not supported for the moment.") return 0 print("Sample luminosity: %.3f /pb" % lumi) print('') store = DbStore() # Update luminosity in the database store.find(Sample, Sample.sample_id == sample.sample_id).set(luminosity=lumi) store.commit() return lumi
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment): # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id' dbstore = DbStore() sample = None # check that source dataset exist # Skip: should exist, the check has been done before calling this function # check that there is no existing entry update = False localpath = '' nevents = 0 checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) # collecting contents sample.nevents_processed = 0 sample.nevents = 0 sample.normalization = 1 sample.event_weight_sum = 0 extras_event_weight_sum = {} dataset_nevents = 0 processed_lumi = LumiList() for i, s in enumerate(samples): if i == 0: sample.source_dataset_id = s['dataset_id'] sample.source_sample_id = s['sample_id'] results = dbstore.find(Sample, Sample.sample_id == s['sample_id']) # Should exist, the check has been done before calling this function sample.nevents_processed += results[0].nevents_processed sample.nevents += results[0].nevents sample.event_weight_sum += results[0].event_weight_sum extra_sumw = results[0].extras_event_weight_sum if extra_sumw is not None: extra_sumw = json.loads(extra_sumw) for key in extra_sumw: try: extras_event_weight_sum[key] += extra_sumw[key] except KeyError: extras_event_weight_sum[key] = extra_sumw[key] tmp_processed_lumi = results[0].processed_lumi if tmp_processed_lumi is not None: tmp_processed_lumi = json.loads( tmp_processed_lumi ) processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi) # Get info from file table results = dbstore.find(File, File.sample_id == s['sample_id']) for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)): f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents) sample.files.add(f) # Get info from parent datasets results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id']) dataset_nevents += results[0].nevents if len(extras_event_weight_sum) > 0: sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum)) if len(processed_lumi.getCompactList()) > 0: sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList())) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if sample.nevents_processed != dataset_nevents: sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment) else: sample.user_comment = unicode(comment) sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the sample from user input sample = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed) sample.nevents = opts.nevents sample.normalization = opts.normalization sample.event_weight_sum = opts.weight_sum sample.luminosity = opts.luminosity sample.code_version = unicode(opts.code_version) sample.user_comment = unicode(opts.user_comment) sample.source_dataset_id = opts.source_dataset_id sample.source_sample_id = opts.source_sample_id sample.author = unicode(opts.author) sample.creation_time = opts.datetime # connect to the MySQL database using default credentials dbstore = DbStore() # unless the source is set, prompt the user and present a list to make a choice if sample.source_dataset_id is None: prompt_dataset(sample,dbstore) if sample.source_sample_id is None: prompt_sample(sample,dbstore) # check that source sample and dataset exist if sample.source_dataset_id is not None: checkExisting = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id) if checkExisting.is_empty(): raise IndexError("No dataset with such index: %d"%sample.source_dataset_id) if sample.source_sample_id is not None: checkExisting = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id) if checkExisting.is_empty(): raise IndexError("No sample with such index: %d"%sample.source_sample_id) # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order if sample.nevents_processed is None and sample.source_sample_id is not None: sample.nevents_processed = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id).one().nevents_processed if sample.nevents_processed is None and sample.source_dataset_id is not None: sample.nevents_processed = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).one().nevents if sample.nevents_processed is None: print "Warning: Number of processed events not given, and no way to guess it." # List input files files = [] if opts.files == "": files = glob.glob(os.path.join(sample.path, '*.root')) else: files = unicode(opts.files).split(",") if len(files) == 0: print "Warning: no root files found in %r" % sample.path # Try to guess the number of events stored into the file, as well as the weight sum for f in files: (weight_sum, entries) = get_file_data_(f) sample.files.add(File(f, f, weight_sum, entries)) # check that there is no existing entry checkExisting = dbstore.find(Sample,Sample.name==sample.name) if checkExisting.is_empty(): print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.add(sample) # compute the luminosity, if possible if sample.luminosity is None: dbstore.flush() sample.luminosity = sample.getLuminosity() else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(sample) if existing.luminosity is None: dbstore.flush() existing.luminosity = existing.getLuminosity() # commit dbstore.commit()
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw sample.extras_event_weight_sum = unicode(json.dumps(extras_sumw, separators=(',', ':'))) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def main(crabUsername, ingridUsername, DEBUG = False, evaluateSize = False, dryRun=False, output=None): if DEBUG: print "RUNNING IN DEBUG MODE" print "Nothing will be deleted\n" if dryRun: print("Only simulating what is happening, nothing will be deleted.") dbstore = DbStore() print "##### Get the list of potential DB samples of interest" list_allDBsamples = [] results = dbstore.find(Sample) for r in results: if crabUsername in r.path: list_allDBsamples.append([r.name, r.sample_id]) print "" print "##### Get the list of existing productions" # before anything else: get the list of tags to not touch whitelist = requests.get('https://raw.githubusercontent.com/cp3-llbb/GridIn/master/data/SAMADhi_doNOTdelete_whitelist.json').json() if DEBUG: print "production whitelist= ", whitelist list_all_productions = [] for i, s in enumerate(list_allDBsamples): s_name, s_id = s isProdAlreadyListed = False isSampleProtected = False for FWtag, Anatag in list_all_productions: if FWtag in str(s_name) and Anatag in str(s_name): # print "This prod is already in the list, FWtag= ", FWtag, "Anatag= ", Anatag isProdAlreadyListed = True break if isProdAlreadyListed: continue tags = str(s_name) # Get the tags: # First of all: check if the sample is protected or not for ana in whitelist: part = str(ana) for protectedtag in whitelist[ana]: t = str(protectedtag).split('_%s_' % part) if t[0] in tags and t[1] in tags: if DEBUG: print '\tSkipping whitelisted sample %s' % s_name isSampleProtected = True if not isSampleProtected: tags = tags.replace(part, '') # remove HHAnalyzer and the like from the name of the sample if isSampleProtected: continue # now extract the fw and analyzer tags # for analyzer, this is always the last part of the sample name so we don't have to worry about naming conventions there (fortunately) tags = tags.split('_') Anatag = tags[-1] tags = tags[:-1] # for FW the following regex should work ((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40}) # it matches either: # - a framework tag (possibly with a final X): v1.2.0+7415 # - possibly followed by a number of commits and a 'g' plus 7 to 40 characters git hash: v1.2.0+7415-79-ga5b16ff # - or alternatively a 7 to 40 characters git hash: f2f0a44 tags = [x for x in tags if re.match('((v\d+.\d+.\d+\+\d+X?)(-\d+-g[0-9a-f]{7,40})?)|([0-9a-f]{7,40})', x)] if DEBUG: print tags, Anatag if len(tags) != 1: print "ERROR, there are spurious things in the sample name, please figure out what is happening:" print "FWtags= ", tags return 1 FWtag = tags[0] list_all_productions.append([FWtag, Anatag]) globalTotalSize = 0 # If needed, open output file f = None if output: f = open(output, 'w') for i, p in enumerate(list_all_productions): if DEBUG and i > 0: break FWtag, Anatag = p extrastring = '' if not evaluateSize: extrastring = ' (evaluation of the disk size is OFF by default)' print("\n##### Now looking at prod FWtag: {}, Anatag: {} and list the associated folders{}".format(FWtag, Anatag, extrastring)) totalSize = 0 cannotManageToDeleteThisProd = False sample_ids = set() for s_name, s_id in list_allDBsamples: if FWtag in str(s_name) and Anatag in str(s_name): result = dbstore.find(Sample, Sample.name == s_name) s = result.one() if evaluateSize: totalSize += int(subprocess.check_output(["du", '-s', str(s.path)]).split()[0].decode('utf-8')) sample_ids.add(s.sample_id) if s.source_sample is not None: print "WARNING, the sample", s.name, "depend on another sample, aborting now" cannotManageToDeleteThisProd = True break if s.results.count() > 0: print "WARNING, the sample", s.name, "has derived results, aborting now" cannotManageToDeleteThisProd = True break if s.derived_samples.count() > 0: # Find all derived samples in the database r = dbstore.find(Sample, Sample.source_sample == s.sample_id) for derived_sample in r: sample_ids.add(derived_sample.sample_id) if cannotManageToDeleteThisProd: continue totalSamples = len(sample_ids) globalTotalSize += totalSize print("\tNumber of samples: {}, total size: {}".format(totalSamples, sizeof_fmt(totalSize) if evaluateSize else 'N/A')) print("") message = "\tDo you REALLY want to DELETE this prod from SAMADhi" if not f: message += " and from disk?" else: message += "?" if dryRun or confirm(prompt=message, resp=False): for sample_id in sorted(sample_ids): result = dbstore.find(Sample, Sample.sample_id == sample_id) s = result.one() if s.source_sample: # This sample is a merged sample, no need to remove any files, only the database entry if dryRun: print("\tDeleting merged sample {}.".format(s.name)) else: dbstore.remove(s) else: if f: f.write(str(s.path).rsplit('/0000', 1)[0] + '\n') if dryRun: print("\tDeleting sample {}.\n\t\tPath on storage: {}".format(s.name, s.path)) if DEBUG: print 'rm -r %s' % s.path print 'rm -r %s' % str(s.path).rsplit('/0000', 1)[0] else: try: if not f: shutil.rmtree(s.path) shutil.rmtree(str(s.path).rsplit('/0000', 1)[0]) except OSError: print "Seems we have a buggy path: %s" % s.path print "deleting the DB entry then moving on..." dbstore.remove(s) dbstore.commit() if dryRun: print("") print("Potential freeable space: {}".format(sizeof_fmt(globalTotalSize) if evaluateSize else 'N/A')) if f: f.close()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the sample from user input sample = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed) sample.nevents = opts.nevents sample.normalization = opts.normalization sample.event_weight_sum = opts.weight_sum sample.luminosity = opts.luminosity sample.code_version = unicode(opts.code_version) sample.user_comment = unicode(opts.user_comment) sample.source_dataset_id = opts.source_dataset_id sample.source_sample_id = opts.source_sample_id sample.author = unicode(opts.author) sample.creation_time = opts.datetime # connect to the MySQL database using default credentials dbstore = DbStore() # unless the source is set, prompt the user and present a list to make a choice if sample.source_dataset_id is None: prompt_dataset(sample, dbstore) if sample.source_sample_id is None: prompt_sample(sample, dbstore) # check that source sample and dataset exist if sample.source_dataset_id is not None: checkExisting = dbstore.find( Dataset, Dataset.dataset_id == sample.source_dataset_id) if checkExisting.is_empty(): raise IndexError("No dataset with such index: %d" % sample.source_dataset_id) if sample.source_sample_id is not None: checkExisting = dbstore.find( Sample, Sample.sample_id == sample.source_sample_id) if checkExisting.is_empty(): raise IndexError("No sample with such index: %d" % sample.source_sample_id) # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order if sample.nevents_processed is None and sample.source_sample_id is not None: sample.nevents_processed = dbstore.find( Sample, Sample.sample_id == sample.source_sample_id).one().nevents_processed if sample.nevents_processed is None and sample.source_dataset_id is not None: sample.nevents_processed = dbstore.find( Dataset, Dataset.dataset_id == sample.source_dataset_id).one().nevents if sample.nevents_processed is None: print "Warning: Number of processed events not given, and no way to guess it." # List input files files = [] if opts.files == "": files = glob.glob(os.path.join(sample.path, '*.root')) else: files = unicode(opts.files).split(",") if len(files) == 0: print "Warning: no root files found in %r" % sample.path # Try to guess the number of events stored into the file, as well as the weight sum for f in files: (weight_sum, entries) = get_file_data_(f) sample.files.add(File(f, f, weight_sum, entries)) # check that there is no existing entry checkExisting = dbstore.find(Sample, Sample.name == sample.name) if checkExisting.is_empty(): print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.add(sample) # compute the luminosity, if possible if sample.luminosity is None: dbstore.flush() sample.luminosity = sample.getLuminosity() else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(sample) if existing.luminosity is None: dbstore.flush() existing.luminosity = existing.getLuminosity() # commit dbstore.commit()
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw sample.extras_event_weight_sum = unicode( json.dumps(extras_sumw, separators=(',', ':'))) sample.code_version = unicode( AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()