def join(samplelist, *searchterms, **kwargs): """Join samples from a sample list into one merged sample, that match a set of search terms. E.g. samplelist = join(samplelist,'DY','M-50',name='DY_highmass').""" verbosity = LOG.getverbosity(kwargs) name = kwargs.get('name', searchterms[0]) # name of new merged sample title = kwargs.get('title', None) # title of new merged sample color = kwargs.get('color', None) # color of new merged sample LOG.verbose("join: merging '%s' into %r" % ("', '".join(searchterms), name), verbosity, level=1) # GET samples containing names and searchterm mergelist = [s for s in samplelist if s.match(*searchterms, incl=True)] if len(mergelist) <= 1: LOG.warning("Could not merge %r: fewer than two %r samples (%d)" % (name, name, len(mergelist))) return samplelist padding = max([len(s.name) for s in mergelist]) + 2 # number of spaces # ADD samples with name and searchterm mergedsample = MergedSample(name, title, color=color) for sample in mergelist: samplestr = repr(sample.name).ljust(padding) LOG.verbose(" adding %s to %r (%s)" % (samplestr, name, sample.fnameshort), verbosity, level=2) mergedsample.add(sample) # REPLACE matched samples with merged sample in samplelist, preserving the order if mergedsample.samples and samplelist: if isinstance(samplelist, SampleSet): samplelist.replace(mergedsample) else: oldindex = len(samplelist) for sample in mergedsample.samples: index = samplelist.index(sample) if index < oldindex: oldindex = index samplelist.remove(sample) samplelist.insert(index, mergedsample) return samplelist
def getsampleset(datasample, expsamples, sigsamples=[], **kwargs): """Create sample set from a table of data and MC samples.""" channel = kwargs.get('channel', "") era = kwargs.get('era', "") fpattern = kwargs.get( 'file', None) # file name pattern, e.g. $PICODIR/$SAMPLE_$CHANNEL$TAG.root weight = kwargs.pop('weight', "") # common weight for MC samples dataweight = kwargs.pop('dataweight', "") # weight for data samples url = kwargs.pop('url', "") # XRootD url tag = kwargs.pop('tag', "") # extra tag for file name if not fpattern: fpattern = "$PICODIR/$SAMPLE_$CHANNEL$TAG.root" if '$PICODIR' in fpattern: import TauFW.PicoProducer.tools.config as GLOB CONFIG = GLOB.getconfig(verb=0) picodir = CONFIG['picodir'] fpattern = repkey(fpattern, PICODIR=picodir) if url: fpattern = "%s/%s" % (fpattern, url) LOG.verb("getsampleset: fpattern=%r" % (fpattern), level=1) # MC (EXPECTED) for i, info in enumerate(expsamples[:]): expkwargs = kwargs.copy() expkwargs['weight'] = weight if len(info) == 4: group, name, title, xsec = info elif len(info) == 5 and isinstance(info[4], dict): group, name, title, xsec, newkwargs = info expkwargs.update(newkwargs) else: LOG.throw(IOError, "Did not recognize mc row %s" % (info)) fname = repkey(fpattern, ERA=era, GROUP=group, SAMPLE=name, CHANNEL=channel, TAG=tag) #print fname sample = MC(name, title, fname, xsec, **expkwargs) expsamples[i] = sample # DATA (OBSERVED) title = 'Observed' datakwargs = kwargs.copy() datakwargs['weight'] = dataweight if isinstance(datasample, dict) and channel: datasample = datasample[channel] if len(datasample) == 2: group, name = datasample elif len(datasample) == 3: group, name = datasample[:2] if isinstance(datasample[2], dict): # dictionary datakwargs.update(datasample[2]) else: # string title = datasample[2] elif len(datasample) == 4 and isinstance(datasample[3], dict): group, name, title, newkwargs = datasample datakwargs.update(newkwargs) else: LOG.throw(IOError, "Did not recognize data row %s" % (datasample)) fpattern = repkey(fpattern, ERA=era, GROUP=group, SAMPLE=name, CHANNEL=channel, TAG=tag) fnames = glob.glob(fpattern) #print fnames if len(fnames) == 1: datasample = Data(name, title, fnames[0]) elif len(fnames) > 1: namerexp = re.compile(name.replace('?', '.').replace('*', '.*')) name = name.replace('?', '').replace('*', '') datasample = MergedSample(name, 'Observed', data=True) for fname in fnames: setname = namerexp.findall(fname)[0] #print setname datasample.add(Data(setname, 'Observed', fname, **datakwargs)) else: LOG.throw(IOError, "Did not find data file %r" % (fpattern)) # SAMPLE SET sampleset = SampleSet(datasample, expsamples, sigsamples, **kwargs) return sampleset