def initsplit(self): ''' Splits the dataset in two and builds a model for each half. ''' global arg self.resampler = Resampler(self.data) self.subdata = [] self.submodel = [] if not self.option.replace('split', ''): self.nsplit = 2 else: self.nsplit = int(self.option.replace('split', '')) self.part = ROOT.RooCategory('part', 'part') self.model = ROOT.RooSimultaneous('model', 'model', self.part) for i in range(self.nsplit): cat = str(i) name = 'subdata%d' % i self.part.defineType(cat) self.subdata.append(self.resampler.prescale(self.nsplit, [i], name, name)) name = 'submodel%d' % i self.submodel.append( ParameterizedKeysPdf( name, name, self.x, self.mode, self.effsigma, self.subdata[i], rho=self.rho, forcerange=True ) ) self.model.addPdf(self.submodel[i], cat) # Now shuffle the subdata and the categories args = [roo.Index(self.part)] cats = [str(i) for i in range(self.nsplit)] cats.reverse() for cat, subdata in zip(cats, self.subdata): args.append(roo.Import(cat, subdata)) self.data = ROOT.RooDataSet('data', 'data', ROOT.RooArgSet(self.x), *args)
def main(): '''This is the entry point to execution.''' global data, model, x, fitresult data = getdata() x = data['raw'].get()[variable] if fit_range: x.setRange('fit', *fit_range) if plot_range: x.setRange('plot', *plot_range) initialize_fit_parameters(data['raw'], x) old_precision = set_default_integrator_precision(1e-8, 1e-8) ## Reduce data for model building data_kde_training = data['raw'] if data['raw'].numEntries() > kde_training_max_events: resampler = Resampler(data['raw']) data['raw_reduced'] = resampler.bootstrap( name = data['raw'].GetName() + '_boot', title = data['raw'].GetTitle() + ' Bootstrap Replica', size = kde_training_max_events, seed = 12345, ) data_kde_training = data['raw_reduced'] model = ParameterizedKeysPdf('model', 'model', x, mode, effsigma, data_kde_training, rho=0.3, forcerange=True) fitresult = {} for name, dataset in data.items(): dataset.SetName(name + 'Data') ## Reduce data for debugging # dataset = dataset.reduce(roo.EventRange(0, 10000)) mode.setVal(model.shapemodevar.getVal()) effsigma.setVal(model.shapewidthvar.getVal()) fitresult[name] = model.fitTo(dataset, roo.SumW2Error(True), roo.NumCPU(8), roo.Strategy(2), roo.Save(), roo.Range("fit")) make_plot(x, dataset, model) print '\n== Fitted parameters ==' mode.Print() effsigma.Print() save_result(fitresult[name]) set_default_integrator_precision(*old_precision) x_corr = get_correction(x, fitresult['raw'].floatParsFinal(), fitresult['target'].floatParsFinal()) make_comparison_plot(data['raw'], data['target'], x_corr)
def dofit(self): mi = ModalInterval(self.data) mi.setSigmaLevel(1) self.mode.setVal(mi.halfSampleMode()) self.effsigma.setVal(0.5 * mi.length()) self.bootdata.add(self.bootset) resampler = Resampler(self.data) for iboot in range(self.nboot): mi = ModalInterval(resampler.bootstrap()) mi.setSigmaLevel(1) self.mode.setVal(mi.halfSampleMode()) self.effsigma.setVal(0.5 * mi.length()) self.bootdata.add(self.bootset) self.mode.setVal(self.bootdata.mean(self.mode)) self.effsigma.setVal(self.bootdata.mean(self.effsigma)) self.mode.setError(self.bootdata.rmsVar(self.mode).getVal()) self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
def bootstrap(self, repeat=10): nbinsx = len(self.fractions) xlow = 0.5 * self.fractions[0] xup = self.fractions[-1] + xlow errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title, nbinsx, xlow, xup, 's') errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx) errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx) for graph in [errors_mi1, errors_mi2]: graph.SetTitle(self.title) resampler = Resampler(self.data) bootdata = {} for iteration in range(repeat): replica = resampler.bootstrap() boot = self.get_width_ratio(replica, self.fractions) for i in range(boot.GetN()): x = boot.GetX()[i] y = boot.GetY()[i] #y = boot.GetY()[i] - self.width_ratio.GetY()[i] errors_rms.Fill(x, y) bootdata.setdefault(x, []).append(y) for i, (x, ydist) in enumerate(sorted(bootdata.items())): ysize = len(ydist) yarray = array.array('d', ydist) y = ROOT.TMath.Median(ysize, yarray) errors_mi1.SetPoint(i, x, y) errors_mi2.SetPoint(i, x, y) exh = exl = 0. mi = ModalInterval(ysize, yarray) mi.setSigmaLevel(1) eyl = y - mi.lowerBound() eyh = mi.upperBound() - y errors_mi1.SetPointError(i, exl, exh, eyl, eyh) mi.setSigmaLevel(2) eyl = y - mi.lowerBound() eyh = mi.upperBound() - y errors_mi2.SetPointError(i, exl, exh, eyl, eyh) return errors_rms, errors_mi1, errors_mi2
class KeysResponseFitter: #__________________________________________________________________________ def __init__(self, data, rho=1.5, printlevel=-1, option='monolithic'): if data.get().getSize() != 1: raise RuntimeError, 'Data must contain just one variable!' self.w = ROOT.RooWorkspace('KeysResponseFitter', 'KeysResponseFitter Workspace') self.w.Import(data) self.data = self.w.data(data.GetName()) self.data.SetName('data') self.x = self.w.var(data.get().first().GetName()) self.x.SetName('x') self.rho = rho self.printlevel = printlevel ## Define mode and effsigma. self.mode = self.w.factory('mode[0, -50, 50]') self.effsigma = self.w.factory('effsigma[1, 0.01, 50]') for x in 'mode effsigma'.split(): getattr(self, x).setUnit(self.x.getUnit()) self.option = option if option == 'monolithic': self.model = ParameterizedKeysPdf('model', 'model', self.x, self.mode, self.effsigma, self.data, rho=rho, forcerange=True) elif 'split' in option: self.initsplit() else: raise RuntimeError, 'Unknown option: %s' % option # self.dofit() ## end of __init__ #__________________________________________________________________________ def initsplit(self): ''' Splits the dataset in two and builds a model for each half. ''' global arg self.resampler = Resampler(self.data) self.subdata = [] self.submodel = [] if not self.option.replace('split', ''): self.nsplit = 2 else: self.nsplit = int(self.option.replace('split', '')) self.part = ROOT.RooCategory('part', 'part') self.model = ROOT.RooSimultaneous('model', 'model', self.part) for i in range(self.nsplit): cat = str(i) name = 'subdata%d' % i self.part.defineType(cat) self.subdata.append(self.resampler.prescale(self.nsplit, [i], name, name)) name = 'submodel%d' % i self.submodel.append( ParameterizedKeysPdf( name, name, self.x, self.mode, self.effsigma, self.subdata[i], rho=self.rho, forcerange=True ) ) self.model.addPdf(self.submodel[i], cat) # Now shuffle the subdata and the categories args = [roo.Index(self.part)] cats = [str(i) for i in range(self.nsplit)] cats.reverse() for cat, subdata in zip(cats, self.subdata): args.append(roo.Import(cat, subdata)) self.data = ROOT.RooDataSet('data', 'data', ROOT.RooArgSet(self.x), *args) ## end of initsplit() #__________________________________________________________________________ def dofit(self): self.model.fitTo(self.data, roo.PrintLevel(1), roo.Verbose(False)) ## end of doFit #__________________________________________________________________________ def makeplot(self): self.canvas = canvases.next() self.plot = self.x.frame() self.data.plotOn(self.plot) self.model.plotOn(self.plot) self.model.paramOn(self.plot) self.plot.Draw() self.canvas.Update()
def test(): ''' Tests the RooRhoKeysPdf class. ''' import FWLite.Tools.canvases as canvases import FWLite.Tools.cmsstyle as cmsstyle ROOT.RooRandom.randomGenerator().SetSeed(2) global gnlls, hrhoval, hrhoerr hrhoval = ROOT.TH1F('hrhoval', 'hrhoval', 100, 0, 5) hrhoerr = ROOT.TH1F('hrhoerr', 'hrhoerr', 100, 0, 1) gnlls = [] for itoy in range(1): global w w = ROOT.RooWorkspace('w', 'w') # model = w.factory('Gaussian::model(x[-50, 50], mean[0], sigma[1])') model = w.factory('BreitWigner::model(x[-5, 5], mean[0], sigma[1])') x = w.var('x') oset = ROOT.RooArgSet(x) data = model.generate(oset, 1000) w.Import(data) # rho = w.factory('rho[1, 0, 100]') # testpdf = RooRhoKeysPdf('testpdf', 'testpdf', x, rho, data) # w.Import(testpdf) testpdf = w.factory('RooRhoKeysPdf::testpdf(x, rho[1, 0, 100], modelData)') rho = w.var('rho') plot = x.frame() data.plotOn(plot) model.plotOn(plot) testpdf.plotOn(plot, roo.LineColor(ROOT.kRed)) rho.setVal(2) testpdf.LoadDataSet(data) testpdf.plotOn(plot, roo.LineColor(ROOT.kGreen)) rho.setVal(3) testpdf.LoadDataSet(data) testpdf.plotOn(plot, roo.LineColor(ROOT.kBlack)) canvases.next('RooRhoKeysPdf_Test%d' % itoy) plot.Draw() canvases.update() resampler = Resampler(data) data0 = resampler.prescale(2, [0], 'data0') data1 = resampler.prescale(2, [1], 'data1') w.Import(data0) w.Import(data1) testpdf0 = w.factory('RooRhoKeysPdf::testpdf0(x, rho, data0)') testpdf1 = w.factory('RooRhoKeysPdf::testpdf1(x, rho, data1)') gnll = ROOT.TGraph() for rhoval in [0.5 + 0.05 * i for i in range(50)]: rho.setVal(rhoval) testpdf0.LoadDataSet(data0) testpdf1.LoadDataSet(data1) nll = 0 nll += testpdf0.createNLL(data1).getVal() nll += testpdf1.createNLL(data0).getVal() # print rhoval, nll gnll.SetPoint(gnll.GetN(), rhoval, nll) locmin = ROOT.TMath.LocMin(gnll.GetN(), gnll.GetY()) xmin = gnll.GetX()[max(locmin-5, 0)] xmax = gnll.GetX()[min(locmin+5, gnll.GetN()-1)] fres = gnll.Fit('pol2', 's', '', xmin, xmax) p1 = fres.Get().GetParams()[1] p2 = fres.Get().GetParams()[2] rhoval = - 0.5 * p1 / p2 rhoerr = 1/ROOT.TMath.Sqrt(2 * p2) hrhoval.Fill(rhoval) hrhoerr.Fill(rhoerr) canvases.next('gnll%d' % itoy) gnll.Draw('ap') gnll.GetXaxis().SetTitle('#rho') gnll.GetYaxis().SetTitle('- log L') gnlls.append(gnll) canvases.next('rhoerr') hrhoerr.Draw() canvases.next('rhoval') hrhoval.Draw() canvases.update() from FWLite.Tools.modalinterval import ModalInterval global mi mi = ModalInterval(w.data('data0')) print mi.halfSampleMode()