示例#1
0
 def initsplit(self):
     '''
     Splits the dataset in two and builds a model for each half.
     '''
     global arg
     self.resampler = Resampler(self.data)
     self.subdata = []
     self.submodel = []
     if not self.option.replace('split', ''):
         self.nsplit = 2
     else:
         self.nsplit = int(self.option.replace('split', ''))
     self.part = ROOT.RooCategory('part', 'part')
     self.model = ROOT.RooSimultaneous('model', 'model', self.part)
     for i in range(self.nsplit):
         cat = str(i)
         name = 'subdata%d' % i
         self.part.defineType(cat)
         self.subdata.append(self.resampler.prescale(self.nsplit, [i], name, name))
         name = 'submodel%d' % i
         self.submodel.append(
             ParameterizedKeysPdf(
                 name, name, self.x, self.mode, self.effsigma,
                 self.subdata[i], rho=self.rho, forcerange=True
             )
         )
         self.model.addPdf(self.submodel[i], cat)
     # Now shuffle the subdata and the categories
     args = [roo.Index(self.part)]
     cats = [str(i) for i in range(self.nsplit)]
     cats.reverse()
     for cat, subdata in zip(cats, self.subdata):
         args.append(roo.Import(cat, subdata))
     self.data = ROOT.RooDataSet('data', 'data', ROOT.RooArgSet(self.x),
                                 *args)
def main():
    '''This is the entry point to execution.'''
    global data, model, x, fitresult
    data = getdata()
    x = data['raw'].get()[variable]
    if fit_range:
        x.setRange('fit', *fit_range)
    if plot_range:
        x.setRange('plot', *plot_range)
    initialize_fit_parameters(data['raw'], x)
    old_precision = set_default_integrator_precision(1e-8, 1e-8)
    ## Reduce data for model building
    data_kde_training = data['raw']
    if data['raw'].numEntries() > kde_training_max_events:
        resampler = Resampler(data['raw'])
        data['raw_reduced'] = resampler.bootstrap(
            name = data['raw'].GetName() + '_boot',
            title = data['raw'].GetTitle() + ' Bootstrap Replica',
            size = kde_training_max_events,
            seed = 12345,
            )
        data_kde_training = data['raw_reduced']    
    
    model = ParameterizedKeysPdf('model', 'model', x, mode, effsigma, 
                                 data_kde_training, rho=0.3, forcerange=True)
                
    
    fitresult = {}
    for name, dataset in data.items():
        dataset.SetName(name + 'Data')
        ## Reduce data for debugging
        # dataset = dataset.reduce(roo.EventRange(0, 10000))
        mode.setVal(model.shapemodevar.getVal())
        effsigma.setVal(model.shapewidthvar.getVal())
        fitresult[name] = model.fitTo(dataset, roo.SumW2Error(True),
                                      roo.NumCPU(8), roo.Strategy(2),
                                      roo.Save(), roo.Range("fit"))
        make_plot(x, dataset, model)
                            
        print '\n==  Fitted parameters =='
        mode.Print()
        effsigma.Print()
        save_result(fitresult[name])
    set_default_integrator_precision(*old_precision)
    x_corr = get_correction(x, fitresult['raw'].floatParsFinal(),
                            fitresult['target'].floatParsFinal())
    make_comparison_plot(data['raw'], data['target'], x_corr)
示例#3
0
 def dofit(self):
     mi = ModalInterval(self.data)
     mi.setSigmaLevel(1)
     self.mode.setVal(mi.halfSampleMode())
     self.effsigma.setVal(0.5 * mi.length())
     self.bootdata.add(self.bootset)
     resampler = Resampler(self.data)
     for iboot in range(self.nboot):
         mi = ModalInterval(resampler.bootstrap())
         mi.setSigmaLevel(1)
         self.mode.setVal(mi.halfSampleMode())
         self.effsigma.setVal(0.5 * mi.length())
         self.bootdata.add(self.bootset)
     self.mode.setVal(self.bootdata.mean(self.mode))
     self.effsigma.setVal(self.bootdata.mean(self.effsigma))
     self.mode.setError(self.bootdata.rmsVar(self.mode).getVal())
     self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
示例#4
0
 def dofit(self):
     mi = ModalInterval(self.data)
     mi.setSigmaLevel(1)
     self.mode.setVal(mi.halfSampleMode())
     self.effsigma.setVal(0.5 * mi.length())
     self.bootdata.add(self.bootset)
     resampler = Resampler(self.data)
     for iboot in range(self.nboot):
         mi = ModalInterval(resampler.bootstrap())
         mi.setSigmaLevel(1)
         self.mode.setVal(mi.halfSampleMode())
         self.effsigma.setVal(0.5 * mi.length())
         self.bootdata.add(self.bootset)
     self.mode.setVal(self.bootdata.mean(self.mode))
     self.effsigma.setVal(self.bootdata.mean(self.effsigma))
     self.mode.setError(self.bootdata.rmsVar(self.mode).getVal())
     self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
示例#5
0
 def bootstrap(self, repeat=10):
     nbinsx = len(self.fractions)
     xlow = 0.5 * self.fractions[0]
     xup = self.fractions[-1] + xlow
     errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title,
                                nbinsx, xlow, xup, 's')
     errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx)
     errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx)
     for graph in [errors_mi1, errors_mi2]:
         graph.SetTitle(self.title)
     resampler = Resampler(self.data)
     bootdata = {}
     for iteration in range(repeat):
         replica = resampler.bootstrap()
         boot = self.get_width_ratio(replica, self.fractions)
         for i in range(boot.GetN()):
             x = boot.GetX()[i]
             y = boot.GetY()[i]
             #y = boot.GetY()[i] - self.width_ratio.GetY()[i]
             errors_rms.Fill(x, y)
             bootdata.setdefault(x, []).append(y)
     for i, (x, ydist) in enumerate(sorted(bootdata.items())):
         ysize = len(ydist)
         yarray = array.array('d', ydist)
         y = ROOT.TMath.Median(ysize, yarray)
         errors_mi1.SetPoint(i, x, y)
         errors_mi2.SetPoint(i, x, y)
         exh = exl = 0.
         mi = ModalInterval(ysize, yarray)
         mi.setSigmaLevel(1)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi1.SetPointError(i, exl, exh, eyl, eyh)
         mi.setSigmaLevel(2)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi2.SetPointError(i, exl, exh, eyl, eyh)
     return errors_rms, errors_mi1, errors_mi2
示例#6
0
 def bootstrap(self, repeat=10):
     nbinsx = len(self.fractions)
     xlow = 0.5 * self.fractions[0]
     xup = self.fractions[-1] + xlow
     errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title, 
                                nbinsx, xlow, xup, 's')
     errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx)
     errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx)
     for graph in [errors_mi1, errors_mi2]:
         graph.SetTitle(self.title)
     resampler = Resampler(self.data)
     bootdata = {}
     for iteration in range(repeat):
         replica = resampler.bootstrap()
         boot = self.get_width_ratio(replica, self.fractions)
         for i in range(boot.GetN()):
             x = boot.GetX()[i]
             y = boot.GetY()[i]
             #y = boot.GetY()[i] - self.width_ratio.GetY()[i]
             errors_rms.Fill(x, y)
             bootdata.setdefault(x, []).append(y)
     for i, (x, ydist) in enumerate(sorted(bootdata.items())):
         ysize = len(ydist)
         yarray = array.array('d', ydist)
         y = ROOT.TMath.Median(ysize, yarray)
         errors_mi1.SetPoint(i, x, y)
         errors_mi2.SetPoint(i, x, y)
         exh = exl = 0.
         mi = ModalInterval(ysize, yarray)
         mi.setSigmaLevel(1)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi1.SetPointError(i, exl, exh, eyl, eyh)
         mi.setSigmaLevel(2)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi2.SetPointError(i, exl, exh, eyl, eyh)
     return errors_rms, errors_mi1, errors_mi2
示例#7
0
class KeysResponseFitter:
    #__________________________________________________________________________
    def __init__(self, data, rho=1.5, printlevel=-1, option='monolithic'):
        if data.get().getSize() != 1:
            raise RuntimeError, 'Data must contain just one variable!'
        
        self.w = ROOT.RooWorkspace('KeysResponseFitter',
                                   'KeysResponseFitter Workspace')
        self.w.Import(data)
        self.data = self.w.data(data.GetName())
        self.data.SetName('data')      

        self.x = self.w.var(data.get().first().GetName())
        self.x.SetName('x')
        
        self.rho = rho        
        self.printlevel = printlevel

        ## Define mode and effsigma.
        self.mode = self.w.factory('mode[0, -50, 50]')
        self.effsigma = self.w.factory('effsigma[1, 0.01, 50]')
        for x in 'mode effsigma'.split():
            getattr(self, x).setUnit(self.x.getUnit())

        self.option = option
        
        if option == 'monolithic':
            self.model = ParameterizedKeysPdf('model', 'model', self.x,
                                              self.mode,
                                              self.effsigma, self.data, rho=rho,
                                              forcerange=True)
        elif 'split' in option:
            self.initsplit()
        else:
            raise RuntimeError, 'Unknown option: %s' % option
            
        # self.dofit()
    ## end of __init__
    
    #__________________________________________________________________________
    def initsplit(self):
        '''
        Splits the dataset in two and builds a model for each half.
        '''
        global arg
        self.resampler = Resampler(self.data)
        self.subdata = []
        self.submodel = []
        if not self.option.replace('split', ''):
            self.nsplit = 2
        else:
            self.nsplit = int(self.option.replace('split', ''))
        self.part = ROOT.RooCategory('part', 'part')
        self.model = ROOT.RooSimultaneous('model', 'model', self.part)
        for i in range(self.nsplit):
            cat = str(i)
            name = 'subdata%d' % i
            self.part.defineType(cat)
            self.subdata.append(self.resampler.prescale(self.nsplit, [i], name, name))
            name = 'submodel%d' % i
            self.submodel.append(
                ParameterizedKeysPdf(
                    name, name, self.x, self.mode, self.effsigma,
                    self.subdata[i], rho=self.rho, forcerange=True
                )
            )
            self.model.addPdf(self.submodel[i], cat)
        # Now shuffle the subdata and the categories
        args = [roo.Index(self.part)]
        cats = [str(i) for i in range(self.nsplit)]
        cats.reverse()
        for cat, subdata in zip(cats, self.subdata):
            args.append(roo.Import(cat, subdata))
        self.data = ROOT.RooDataSet('data', 'data', ROOT.RooArgSet(self.x),
                                    *args)
    ## end of initsplit()
        
    #__________________________________________________________________________
    def dofit(self):
        self.model.fitTo(self.data, roo.PrintLevel(1), roo.Verbose(False))
    ## end of doFit

    #__________________________________________________________________________
    def makeplot(self):
        self.canvas = canvases.next()
        self.plot = self.x.frame()
        self.data.plotOn(self.plot)
        self.model.plotOn(self.plot)
        self.model.paramOn(self.plot)
        self.plot.Draw()
        self.canvas.Update()
示例#8
0
def test():
    '''
    Tests the RooRhoKeysPdf class.
    '''
    import FWLite.Tools.canvases as canvases
    import FWLite.Tools.cmsstyle as cmsstyle

    ROOT.RooRandom.randomGenerator().SetSeed(2)
    global gnlls, hrhoval, hrhoerr
    hrhoval = ROOT.TH1F('hrhoval', 'hrhoval', 100, 0, 5)
    hrhoerr = ROOT.TH1F('hrhoerr', 'hrhoerr', 100, 0, 1)
    gnlls = []
    for itoy in range(1):
        global w
        w = ROOT.RooWorkspace('w', 'w')
        # model = w.factory('Gaussian::model(x[-50, 50], mean[0], sigma[1])')
        model = w.factory('BreitWigner::model(x[-5, 5], mean[0], sigma[1])')
        x = w.var('x')
        oset = ROOT.RooArgSet(x)
        data = model.generate(oset, 1000)
        w.Import(data)
        # rho = w.factory('rho[1, 0, 100]')
        # testpdf = RooRhoKeysPdf('testpdf', 'testpdf', x, rho, data)
        # w.Import(testpdf)
        testpdf = w.factory('RooRhoKeysPdf::testpdf(x, rho[1, 0, 100], modelData)')
        rho = w.var('rho')
        plot = x.frame()
        data.plotOn(plot)
        model.plotOn(plot)
        testpdf.plotOn(plot, roo.LineColor(ROOT.kRed))

        rho.setVal(2)
        testpdf.LoadDataSet(data)
        testpdf.plotOn(plot, roo.LineColor(ROOT.kGreen))

        rho.setVal(3)
        testpdf.LoadDataSet(data)
        testpdf.plotOn(plot, roo.LineColor(ROOT.kBlack))

        canvases.next('RooRhoKeysPdf_Test%d' % itoy)
        plot.Draw()
        canvases.update()

        resampler = Resampler(data)
        data0 = resampler.prescale(2, [0], 'data0')
        data1 = resampler.prescale(2, [1], 'data1')
        w.Import(data0)
        w.Import(data1)
        testpdf0 = w.factory('RooRhoKeysPdf::testpdf0(x, rho, data0)')
        testpdf1 = w.factory('RooRhoKeysPdf::testpdf1(x, rho, data1)')

        gnll = ROOT.TGraph()
        for rhoval in [0.5 + 0.05 * i for i in range(50)]:
            rho.setVal(rhoval)
            testpdf0.LoadDataSet(data0)
            testpdf1.LoadDataSet(data1)
            nll = 0
            nll += testpdf0.createNLL(data1).getVal()
            nll += testpdf1.createNLL(data0).getVal()
            # print rhoval, nll
            gnll.SetPoint(gnll.GetN(), rhoval, nll)

        locmin = ROOT.TMath.LocMin(gnll.GetN(), gnll.GetY())
        xmin = gnll.GetX()[max(locmin-5, 0)]
        xmax = gnll.GetX()[min(locmin+5, gnll.GetN()-1)]
        fres = gnll.Fit('pol2', 's', '', xmin, xmax)
        p1 = fres.Get().GetParams()[1]
        p2 = fres.Get().GetParams()[2]
        rhoval =  - 0.5 * p1 / p2
        rhoerr = 1/ROOT.TMath.Sqrt(2 * p2)
        hrhoval.Fill(rhoval)
        hrhoerr.Fill(rhoerr)
        
        canvases.next('gnll%d' % itoy)
        gnll.Draw('ap')
        gnll.GetXaxis().SetTitle('#rho')
        gnll.GetYaxis().SetTitle('- log L')
        gnlls.append(gnll)
    canvases.next('rhoerr')
    hrhoerr.Draw()
    canvases.next('rhoval')
    hrhoval.Draw()
    canvases.update()

    from FWLite.Tools.modalinterval import ModalInterval
    global mi
    mi = ModalInterval(w.data('data0'))
    print mi.halfSampleMode()