def deepCheckWeight(file): """ some root files only contain the branches kept from the beginning but not those from the filler, e.g. the weight branch Those files are identified here, as weight==nan and thus the yield is nan """ from math import isnan from RootTools.core.Sample import Sample # convert dpm file pathes sample = Sample.fromFiles(name="sample", treeName="Events", files=file) # check for branch: l = sample.chain.GetListOfBranches() if not 'weight' in [l.At(i).GetName() for i in range(l.GetSize())]: return 0 val = sample.getYieldFromDraw(weightString="weight")['val'] del sample #logger.debug("Val in deepCheckWeight: %r", val) return not isnan(val)
def __init__(self, heppy_samples, dpm_directories, cache_file, multithreading=True): # Read cache file, if exists if os.path.exists(cache_file) and not overwrite: self.sample_map = pickle.load(file(cache_file)) logger.info("Loaded cache file %s" % cache_file) else: logger.info("Cache file %s not found. Recreate map.", cache_file) logger.info("Check proxy.") # Proxy certificate from RootTools.core.helpers import renew_proxy # Make proxy in afs to allow batch jobs to run proxy_path = os.path.expandvars('$HOME/private/.proxy') proxy = renew_proxy(proxy_path) logger.info("Using proxy %s" % proxy) # Read dpm directories self.cmg_directories = {} for data_path in dpm_directories: logger.info("Walking dpm directory %s", data_path) walker = walk_dpm(data_path) self.cmg_directories[ data_path] = walker.walk_dpm_cmgdirectories('.', maxN=maxN) #del walker logger.info("Now mapping directories to heppy samples") for heppy_sample in heppy_samples: heppy_sample.candidate_directories = [] pd, era = heppy_sample.dataset.split('/')[1:3] for data_path in self.cmg_directories.keys(): for dpm_directory in self.cmg_directories[data_path].keys( ): if not ('/%s/' % pd in dpm_directory): logger.debug("/%s/ not in dpm_directory %s", pd, dpm_directory) continue if not ('/' + era in dpm_directory): logger.debug("/%s not in dpm_directory %s", era, dpm_directory) continue heppy_sample.candidate_directories.append( [data_path, dpm_directory]) logger.debug("heppy sample %s in %s", heppy_sample.name, dpm_directory) logger.info("Found heppy sample %s in %i directories.", heppy_sample.name, len(heppy_sample.candidate_directories)) # Merge from RootTools.core.Sample import Sample logger.info( "Now making new sample map from %i directories and for %i heppy samples to be stored in %s", len(dpm_directories), len(heppy_samples), cache_file) self.sample_map = {} for heppy_sample in heppy_samples: if len(heppy_sample.candidate_directories) == 0: logger.info("No directory found for %s", heppy_sample.name) else: normalization, files = walker.combine_cmg_directories(\ cmg_directories = {dpm_directory:self.cmg_directories[data_path][dpm_directory] for data_path, dpm_directory in heppy_sample.candidate_directories }, multithreading = multithreading, ) logger.info( "Sample %s: Found a total of %i files with normalization %3.2f", heppy_sample.name, len(files), normalization) tmp_files = [] for f in files: isGoodFile = False try: isGoodFile = checkRootFile( "root://hephyse.oeaw.ac.at/" + os.path.join(f)) logger.debug("File %s got added", f) except IOError: logger.info("File %s is corrupted, skipping", f) if isGoodFile: tmp_files.append(f) self.sample_map[heppy_sample] = Sample.fromFiles( heppy_sample.name, files=[ 'root://hephyse.oeaw.ac.at/' + f for f in tmp_files ], normalization=normalization, treeName='tree', isData=heppy_sample.isData, maxN=maxN) logger.info( "Combined %i directories for sample %s to a total of %i files with normalization %3.2f", len(heppy_sample.candidate_directories), heppy_sample.name, len(files), normalization) # Store cache file dir_name = os.path.dirname(cache_file) if len(self.sample_map.keys()) > 0: if not os.path.exists(dir_name): os.makedirs(dir_name) pickle.dump(self.sample_map, file(cache_file, 'w')) logger.info("Created MC sample cache %s", cache_file) else: logger.info("Skipping to write %s because map is empty.", cache_file)