def initsplit(self): ''' Splits the dataset in two and builds a model for each half. ''' global arg self.resampler = Resampler(self.data) self.subdata = [] self.submodel = [] if not self.option.replace('split', ''): self.nsplit = 2 else: self.nsplit = int(self.option.replace('split', '')) self.part = ROOT.RooCategory('part', 'part') self.model = ROOT.RooSimultaneous('model', 'model', self.part) for i in range(self.nsplit): cat = str(i) name = 'subdata%d' % i self.part.defineType(cat) self.subdata.append(self.resampler.prescale(self.nsplit, [i], name, name)) name = 'submodel%d' % i self.submodel.append( ParameterizedKeysPdf( name, name, self.x, self.mode, self.effsigma, self.subdata[i], rho=self.rho, forcerange=True ) ) self.model.addPdf(self.submodel[i], cat) # Now shuffle the subdata and the categories args = [roo.Index(self.part)] cats = [str(i) for i in range(self.nsplit)] cats.reverse() for cat, subdata in zip(cats, self.subdata): args.append(roo.Import(cat, subdata)) self.data = ROOT.RooDataSet('data', 'data', ROOT.RooArgSet(self.x), *args)
def dofit(self): mi = ModalInterval(self.data) mi.setSigmaLevel(1) self.mode.setVal(mi.halfSampleMode()) self.effsigma.setVal(0.5 * mi.length()) self.bootdata.add(self.bootset) resampler = Resampler(self.data) for iboot in range(self.nboot): mi = ModalInterval(resampler.bootstrap()) mi.setSigmaLevel(1) self.mode.setVal(mi.halfSampleMode()) self.effsigma.setVal(0.5 * mi.length()) self.bootdata.add(self.bootset) self.mode.setVal(self.bootdata.mean(self.mode)) self.effsigma.setVal(self.bootdata.mean(self.effsigma)) self.mode.setError(self.bootdata.rmsVar(self.mode).getVal()) self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
def bootstrap(self, repeat=10): nbinsx = len(self.fractions) xlow = 0.5 * self.fractions[0] xup = self.fractions[-1] + xlow errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title, nbinsx, xlow, xup, 's') errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx) errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx) for graph in [errors_mi1, errors_mi2]: graph.SetTitle(self.title) resampler = Resampler(self.data) bootdata = {} for iteration in range(repeat): replica = resampler.bootstrap() boot = self.get_width_ratio(replica, self.fractions) for i in range(boot.GetN()): x = boot.GetX()[i] y = boot.GetY()[i] #y = boot.GetY()[i] - self.width_ratio.GetY()[i] errors_rms.Fill(x, y) bootdata.setdefault(x, []).append(y) for i, (x, ydist) in enumerate(sorted(bootdata.items())): ysize = len(ydist) yarray = array.array('d', ydist) y = ROOT.TMath.Median(ysize, yarray) errors_mi1.SetPoint(i, x, y) errors_mi2.SetPoint(i, x, y) exh = exl = 0. mi = ModalInterval(ysize, yarray) mi.setSigmaLevel(1) eyl = y - mi.lowerBound() eyh = mi.upperBound() - y errors_mi1.SetPointError(i, exl, exh, eyl, eyh) mi.setSigmaLevel(2) eyl = y - mi.lowerBound() eyh = mi.upperBound() - y errors_mi2.SetPointError(i, exl, exh, eyl, eyh) return errors_rms, errors_mi1, errors_mi2
def test(): ''' Tests the RooRhoKeysPdf class. ''' import FWLite.Tools.canvases as canvases import FWLite.Tools.cmsstyle as cmsstyle ROOT.RooRandom.randomGenerator().SetSeed(2) global gnlls, hrhoval, hrhoerr hrhoval = ROOT.TH1F('hrhoval', 'hrhoval', 100, 0, 5) hrhoerr = ROOT.TH1F('hrhoerr', 'hrhoerr', 100, 0, 1) gnlls = [] for itoy in range(1): global w w = ROOT.RooWorkspace('w', 'w') # model = w.factory('Gaussian::model(x[-50, 50], mean[0], sigma[1])') model = w.factory('BreitWigner::model(x[-5, 5], mean[0], sigma[1])') x = w.var('x') oset = ROOT.RooArgSet(x) data = model.generate(oset, 1000) w.Import(data) # rho = w.factory('rho[1, 0, 100]') # testpdf = RooRhoKeysPdf('testpdf', 'testpdf', x, rho, data) # w.Import(testpdf) testpdf = w.factory('RooRhoKeysPdf::testpdf(x, rho[1, 0, 100], modelData)') rho = w.var('rho') plot = x.frame() data.plotOn(plot) model.plotOn(plot) testpdf.plotOn(plot, roo.LineColor(ROOT.kRed)) rho.setVal(2) testpdf.LoadDataSet(data) testpdf.plotOn(plot, roo.LineColor(ROOT.kGreen)) rho.setVal(3) testpdf.LoadDataSet(data) testpdf.plotOn(plot, roo.LineColor(ROOT.kBlack)) canvases.next('RooRhoKeysPdf_Test%d' % itoy) plot.Draw() canvases.update() resampler = Resampler(data) data0 = resampler.prescale(2, [0], 'data0') data1 = resampler.prescale(2, [1], 'data1') w.Import(data0) w.Import(data1) testpdf0 = w.factory('RooRhoKeysPdf::testpdf0(x, rho, data0)') testpdf1 = w.factory('RooRhoKeysPdf::testpdf1(x, rho, data1)') gnll = ROOT.TGraph() for rhoval in [0.5 + 0.05 * i for i in range(50)]: rho.setVal(rhoval) testpdf0.LoadDataSet(data0) testpdf1.LoadDataSet(data1) nll = 0 nll += testpdf0.createNLL(data1).getVal() nll += testpdf1.createNLL(data0).getVal() # print rhoval, nll gnll.SetPoint(gnll.GetN(), rhoval, nll) locmin = ROOT.TMath.LocMin(gnll.GetN(), gnll.GetY()) xmin = gnll.GetX()[max(locmin-5, 0)] xmax = gnll.GetX()[min(locmin+5, gnll.GetN()-1)] fres = gnll.Fit('pol2', 's', '', xmin, xmax) p1 = fres.Get().GetParams()[1] p2 = fres.Get().GetParams()[2] rhoval = - 0.5 * p1 / p2 rhoerr = 1/ROOT.TMath.Sqrt(2 * p2) hrhoval.Fill(rhoval) hrhoerr.Fill(rhoerr) canvases.next('gnll%d' % itoy) gnll.Draw('ap') gnll.GetXaxis().SetTitle('#rho') gnll.GetYaxis().SetTitle('- log L') gnlls.append(gnll) canvases.next('rhoerr') hrhoerr.Draw() canvases.next('rhoval') hrhoval.Draw() canvases.update() from FWLite.Tools.modalinterval import ModalInterval global mi mi = ModalInterval(w.data('data0')) print mi.halfSampleMode()