Пример #1
0
def join(samplelist, *searchterms, **kwargs):
    """Join samples from a sample list into one merged sample, that match a set of search terms.
  E.g. samplelist = join(samplelist,'DY','M-50',name='DY_highmass')."""
    verbosity = LOG.getverbosity(kwargs)
    name = kwargs.get('name', searchterms[0])  # name of new merged sample
    title = kwargs.get('title', None)  # title of new merged sample
    color = kwargs.get('color', None)  # color of new merged sample
    LOG.verbose("join: merging '%s' into %r" %
                ("', '".join(searchterms), name),
                verbosity,
                level=1)

    # GET samples containing names and searchterm
    mergelist = [s for s in samplelist if s.match(*searchterms, incl=True)]
    if len(mergelist) <= 1:
        LOG.warning("Could not merge %r: fewer than two %r samples (%d)" %
                    (name, name, len(mergelist)))
        return samplelist
    padding = max([len(s.name) for s in mergelist]) + 2  # number of spaces

    # ADD samples with name and searchterm
    mergedsample = MergedSample(name, title, color=color)
    for sample in mergelist:
        samplestr = repr(sample.name).ljust(padding)
        LOG.verbose("  adding %s to %r (%s)" %
                    (samplestr, name, sample.fnameshort),
                    verbosity,
                    level=2)
        mergedsample.add(sample)

    # REPLACE matched samples with merged sample in samplelist, preserving the order
    if mergedsample.samples and samplelist:
        if isinstance(samplelist, SampleSet):
            samplelist.replace(mergedsample)
        else:
            oldindex = len(samplelist)
            for sample in mergedsample.samples:
                index = samplelist.index(sample)
                if index < oldindex:
                    oldindex = index
                samplelist.remove(sample)
            samplelist.insert(index, mergedsample)
    return samplelist
Пример #2
0
def getsampleset(datasample, expsamples, sigsamples=[], **kwargs):
    """Create sample set from a table of data and MC samples."""
    channel = kwargs.get('channel', "")
    era = kwargs.get('era', "")
    fpattern = kwargs.get(
        'file',
        None)  # file name pattern, e.g. $PICODIR/$SAMPLE_$CHANNEL$TAG.root
    weight = kwargs.pop('weight', "")  # common weight for MC samples
    dataweight = kwargs.pop('dataweight', "")  # weight for data samples
    url = kwargs.pop('url', "")  # XRootD url
    tag = kwargs.pop('tag', "")  # extra tag for file name

    if not fpattern:
        fpattern = "$PICODIR/$SAMPLE_$CHANNEL$TAG.root"
    if '$PICODIR' in fpattern:
        import TauFW.PicoProducer.tools.config as GLOB
        CONFIG = GLOB.getconfig(verb=0)
        picodir = CONFIG['picodir']
        fpattern = repkey(fpattern, PICODIR=picodir)
    if url:
        fpattern = "%s/%s" % (fpattern, url)
    LOG.verb("getsampleset: fpattern=%r" % (fpattern), level=1)

    # MC (EXPECTED)
    for i, info in enumerate(expsamples[:]):
        expkwargs = kwargs.copy()
        expkwargs['weight'] = weight
        if len(info) == 4:
            group, name, title, xsec = info
        elif len(info) == 5 and isinstance(info[4], dict):
            group, name, title, xsec, newkwargs = info
            expkwargs.update(newkwargs)
        else:
            LOG.throw(IOError, "Did not recognize mc row %s" % (info))
        fname = repkey(fpattern,
                       ERA=era,
                       GROUP=group,
                       SAMPLE=name,
                       CHANNEL=channel,
                       TAG=tag)
        #print fname
        sample = MC(name, title, fname, xsec, **expkwargs)
        expsamples[i] = sample

    # DATA (OBSERVED)
    title = 'Observed'
    datakwargs = kwargs.copy()
    datakwargs['weight'] = dataweight
    if isinstance(datasample, dict) and channel:
        datasample = datasample[channel]
    if len(datasample) == 2:
        group, name = datasample
    elif len(datasample) == 3:
        group, name = datasample[:2]
        if isinstance(datasample[2], dict):  # dictionary
            datakwargs.update(datasample[2])
        else:  # string
            title = datasample[2]
    elif len(datasample) == 4 and isinstance(datasample[3], dict):
        group, name, title, newkwargs = datasample
        datakwargs.update(newkwargs)
    else:
        LOG.throw(IOError, "Did not recognize data row %s" % (datasample))
    fpattern = repkey(fpattern,
                      ERA=era,
                      GROUP=group,
                      SAMPLE=name,
                      CHANNEL=channel,
                      TAG=tag)
    fnames = glob.glob(fpattern)
    #print fnames
    if len(fnames) == 1:
        datasample = Data(name, title, fnames[0])
    elif len(fnames) > 1:
        namerexp = re.compile(name.replace('?', '.').replace('*', '.*'))
        name = name.replace('?', '').replace('*', '')
        datasample = MergedSample(name, 'Observed', data=True)
        for fname in fnames:
            setname = namerexp.findall(fname)[0]
            #print setname
            datasample.add(Data(setname, 'Observed', fname, **datakwargs))
    else:
        LOG.throw(IOError, "Did not find data file %r" % (fpattern))

    # SAMPLE SET
    sampleset = SampleSet(datasample, expsamples, sigsamples, **kwargs)
    return sampleset