Exemplo n.º 1
0
def test_toys2():
    """Perform toys-study for possible fit bias and correct uncertainty evaluation
    - generate `nToys` pseudoexperiments with some PDF `gen_pdf`
    - fit teach experiment with the PDF `fit_pdf`
    - store  fit results
    - fill distributions of fit results
    """

    logger = getLogger('test_toys2')

    results, stats = Toys.make_toys2(gen_pdf=gen_gauss,
                                     fit_pdf=fit_gauss,
                                     nToys=1000,
                                     data=[mass],
                                     gen_config={
                                         'nEvents': 200,
                                         'sample': True
                                     },
                                     fit_config={'silent': True},
                                     gen_pars={
                                         'mean_GG': 0.4,
                                         'sigma_GG': 0.1
                                     },
                                     fit_pars={
                                         'mean_GF': 0.4,
                                         'sigma_GF': 0.1
                                     },
                                     silent=True,
                                     progress=True)

    for p in stats:
        logger.info("Toys: %-20s : %s" % (p, stats[p]))

    ## make histos

    h_mean = ROOT.TH1F(hID(), 'mean of Gauss ', 50, 0, 0.80)
    h_sigma = ROOT.TH1F(hID(), 'sigma of Gauss', 50, 0.05, 0.15)

    for r in results['mean_FG']:
        h_mean.Fill(r)
    for r in results['sigma_FG']:
        h_sigma.Fill(r)

    for h in (h_mean, h_sigma):

        h.draw()
        logger.info("%s  :\n%s" % (h.GetTitle(), h.dump(30, 10)))
        time.sleep(1)
Exemplo n.º 2
0
def test_significance_toys():
    """Perform toy-study for significance of the signal 
    - generate `nToys` pseudoexperiments using background-only hypothesis 
    - fit each experiment with signal+background hypothesis
    - store  fit results
    - fill distributions for fit results
    """

    logger = getLogger('test_significance_toys')

    ## only background hypothesis
    bkg_only = Models.Bkg_pdf("BKG", xvar=mass, power=0, tau=0)

    signal = Models.Gauss_pdf('S', xvar=mass, mean=0.5, sigma=0.1)

    signal.mean.fix(0.4)
    signal.sigma.fix(0.1)

    ## signal + background hypothesis
    model = Models.Fit1D(signal=signal, background=1)
    model.background.tau.fix(0)

    results, stats = Toys.make_toys2(
        gen_pdf=bkg_only,
        fit_pdf=model,
        nToys=1000,
        data=[mass],
        gen_config={
            'nEvents': 100,
            'sample': True
        },
        fit_config={'silent': True},
        gen_pars={'tau_BKG': 0.},  ## initial values for generation 
        fit_pars={
            'B': 100,
            'S': 10,
            'phi0_Bkg_S': 0.0
        },  ## initial fit values for parameters 
        silent=True,
        progress=True)

    for p in stats:
        logger.info("Toys: %-20s : %s" % (p, stats[p]))

    h_S = ROOT.TH1F(hID(), '#S', 60, 0, 60)

    for r in results['S']:
        h_S.Fill(r)

    for h in (h_S, ):

        h.draw()
        logger.info("%s  :\n%s" % (h.GetTitle(), h.dump(30, 10)))
        time.sleep(1)
Exemplo n.º 3
0
def test_parallel_toys():
    """Perform toys-study for possible fit bias and correct uncertainty evaluation
    - generate `nToys` pseudoexperiments with some PDF `pdf`
    - fit teach experiment with the same PDF
    - store  fit results
    - calculate statistics of pulls
    - fill distributions of fit results
    - fill distributions of pulls 
    """

    results, stats = Toys.parallel_toys(pdf=gen_gauss,
                                        nToys=1000,
                                        nSplit=20,
                                        data=[mass],
                                        gen_config={'nEvents': 200},
                                        fit_config={'silent': True},
                                        init_pars={
                                            'mean_GG': 0.4,
                                            'sigma_GG': 0.1
                                        },
                                        silent=True,
                                        progress=False)

    for p in stats:
        logger.info("Toys: %-20s : %s" % (p, stats[p]))

    ## make histos:

    h_mean = ROOT.TH1F(hID(), 'mean of Gauss ', 100, 0, 0.80)
    h_sigma = ROOT.TH1F(hID(), 'sigma of Gauss', 100, 0.05, 0.15)

    for r in results['mean_GG']:
        h_mean.Fill(r)
    for r in results['sigma_GG']:
        h_sigma.Fill(r)

    for h in (h_mean, h_sigma):

        h.draw()
        logger.info("%s  :\n%s" % (h.GetTitle(), h.dump(30, 10)))
        time.sleep(1)
Exemplo n.º 4
0
def test_frame1 ( ) :

    logger = getLogger ( 'test_frame1' ) 
    if root_info < ( 6 , 16 ) : 
        logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) )
        return 
    
    frame = DataFrame ( tname        , fname        )
    tree  = Tree      ( name = tname , file = fname ).chain
    
    h1 = tree .draw ( 'b1' , '1/b1' )
    h2 = frame.draw ( 'b1' , '1/b1' )

    h1 = ROOT.TH1D( hID() , '' , 100 , 0 , 1000 )
    h2 = ROOT.TH1D( hID() , '' , 100 , 0 , 1000 )

    tree.project  ( h1    , 'b1' , '1.0/b1' )
    frame_project ( tree  , h2 , 'b1' , '2.0/b1' )
    
    with wait ( 3 ), use_canvas ( 'test_frame1' ) : 
        h1.red  ()
        h2.blue ()    
        h1.draw ()
        h2.draw ( 'same hist' )
    ## reload data
    data = Data('DATA_tree', testdata)
    mc = Data(tag_mc, testdata)

    wsum = mc.chain.sumVar('w')
    wvar = '%d*w/%s' % (len(data.chain), wsum.value())

    nn = '%s' % (len(data.chain) * 1.0 / len(mc.chain))

    for phi in vrange(0, 2 * math.pi, 10):

        dvar = '%.5f*x+%.5f*y' % (math.cos(phi), math.sin(phi))

        mn, mx = data.chain.statVar(dvar).minmax()
        h1 = ROOT.TH1D(hID(), '', 100, *axis_range(mn, mx, delta=0.05))
        h2 = h1.clone()
        h3 = h1.clone()

        data.chain.project(h1, dvar)  ## data
        mc.chain.project(h2, dvar, nn)  ## original (non-weighted) MC
        mc.chain.project(h3, dvar, wvar)  ## weighted  MC

        mn, mx = h1.minmax()
        mn, mx = axis_range(0, mx, delta=0.7)
        h1.SetMaximum(mx)
        h1.blue()
        h2.green()
        h3.red()

        h1.draw('')
Exemplo n.º 6
0
def test_gbreweight():

    logger = getLogger("test_gbreweight")

    try:

        from ostap.tools.reweighter import Reweighter
        rw = Reweighter()

    except ImportError:

        logger.error('GBReweighter is not available!')
        return

    if not os.path.exists(testdata):
        with timing("Prepare input data", logger=logger):
            prepare_data()

    # =========================================================================
    ## Input data/mc samples
    # =========================================================================
    data = Data('DATA_tree', testdata)
    mc = Data(tag_mc, testdata)

    ddata, wdata = data.chain.slice('x y', transpose=True)
    dmc, wmc = mc.chain.slice('x y', transpose=True)

    ## train BDT
    rw.reweight(original=dmc, target=ddata)

    ## new weights
    wnew = rw.weight(original=dmc)
    mc.chain.add_new_branch('w', wnew)

    ## reload data
    data = Data('DATA_tree', testdata)
    mc = Data(tag_mc, testdata)

    wsum = mc.chain.sumVar('w')
    wvar = '%d*w/%s' % (len(data.chain), wsum.value())

    nn = '%s' % (len(data.chain) * 1.0 / len(mc.chain))

    for phi in vrange(0, 2 * math.pi, 10):

        dvar = '%.5f*x+%.5f*y' % (math.cos(phi), math.sin(phi))

        mn, mx = data.chain.statVar(dvar).minmax()
        h1 = ROOT.TH1D(hID(), '', 100, *axis_range(mn, mx, delta=0.05))
        h2 = h1.clone()
        h3 = h1.clone()

        data.chain.project(h1, dvar)  ## data
        mc.chain.project(h2, dvar, nn)  ## original (non-weighted) MC
        mc.chain.project(h3, dvar, wvar)  ## weighted  MC

        mn, mx = h1.minmax()
        mn, mx = axis_range(0, mx, delta=0.7)
        h1.SetMaximum(mx)
        h1.blue()
        h2.green()
        h3.red()

        h1.draw('')
        h2.draw('same hist')
        h3.draw('same')

        time.sleep(2)
Exemplo n.º 7
0
def ex_func(particle, dataset, plots=None, verbose=False):
    """The example of the actual function that build histos
    
    In this example it builds two histograms:
    - accepted events
    - rejected events
    
    For dataset structure and variable names see:
    https://twiki.cern.ch/twiki/bin/view/LHCb/PIDCalibPackage
    
    The efficiency historgam can be later build in Ostap as :
    
    >>> h_acc = ...
    >>> h_rej = ...
    >>> eff = 1/(1 + h_rej/h_acc)
    """

    ## we need ROOT and Ostap machinery!
    import ROOT
    from ostap.core.pyrouts import hID

    # 1) define PID-cut and its negation
    #    For dataset structure and variable names see:
    #    https://twiki.cern.ch/twiki/bin/view/LHCb/PIDCalibPackage
    accepted = 'Pi_ProbNNpi>0.5'  ## note variable names
    rejected = 'Pi_ProbNNpi<0.5'  ## note variable names

    # 2) prepare the histogtrams
    hA = ROOT.TH2D(hID(), 'Accepted(%s)' % accepted, 15, 0, 150000, 10, 2, 5)
    h1.Sumw2()
    hR = ROOT.TH2D(hID(), 'Rejected(%s)' % rejected, 15, 0, 150000, 10, 2, 5)
    h2.Sumw2()

    # 3) fill the historgams with 'accepted' and 'rejected' events
    #    For dataset structure and variable names see:
    #    https://twiki.cern.ch/twiki/bin/view/LHCb/PIDCalibPackage

    vlst = ROOT.RooArgList()
    vlst.add(dataset.Pi_P)  ## note variable names
    vlst.add(dataset.Pi_Eta)  ## note variable name

    hA = DataSet.fillHistogram(hA, vlst, accepted)  ## fill histo
    hR = DataSet.fillHistogram(hR, vlst, rejected)  ## fill histo

    #
    ## and now update the output
    #

    if not plots:

        hA.SetName(hA.GetTitle())
        hR.SetName(hR.GetTitle())

        plots = [hA, hR]  ## "Accepted" & "Rejected" histograms

    else:

        plots[0] += hA  ## "Accepted" histogram
        plots[1] += hR  ## "Rejected" histogram

        hA.Delete()
        hR.Delete()
        if hA: del hA
        if hR: del hR

    if verbose:

        logger.info('Accepted histo: %s' % plots[0].stat())
        logger.info('Rejected histo: %s' % plots[1].stat())

    return plots
Exemplo n.º 8
0
    def __init__(
        self,
        category,  ## accessor to category 
        N,  ## number of categories 
        methods,  ## list of TMVA methods
        variables,  ## list of variables 
        signal,  ## signal tree
        background,  ## background tree
        signal_cuts='',  ## signal cuts 
        background_cuts='',  ## background cuts 
        spectators=[],
        bookingoptions="Transformations=I;D;P;G,D",
        configuration="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V",
        signal_weight=None,
        background_weight=None,
        name='TMVAChopper',  ## the name 
        verbose=False,  ## verbose ? 
        chop_signal=False,  ## chop the signal     ?
        chop_background=True):  ## chop the background ?
        """Create TMVA ``chopping'' trainer
        
        >>> N = 11 
        >>> trainer = Trainer (
        ... category = '137*evt+813*run' ,
        ... N        = N                 , 
        ... methods =  [ # type                   name   configuration
        ...      ( ROOT.TMVA.Types.kMLP        , 'MLP'        , 'H:!V:EstimatorType=CE:VarTransform=N:NCycles=200:HiddenLayers=N+3:TestRate=5:!UseRegulator' ) ,
        ...      ( ROOT.TMVA.Types.kBDT        , 'BDTG'       , 'H:!V:NTrees=100:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2' ) , 
        ...      ( ROOT.TMVA.Types.kCuts       , 'Cuts'       , 'H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart' ) ,
        ...      ( ROOT.TMVA.Types.kFisher     , 'Fisher'     , 'H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10' ),
        ...      ( ROOT.TMVA.Types.kLikelihood , 'Likelihood' , 'H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50' ) ] ,
        ... variables  = [ 'var1' , 'var2' ,  'var3' ] ,  ## Variables to use in the training
        ... signal     = signal_tree      , ## TTree/TChain with ``signal'' sample   
        ... background = background_tree  , ## TTree/TChain with ``background'' sample   
        ... name       = 'TMVAChopper'    ,
        ... verbose    = False )
        
        """
        assert isinstance(
            N, (int, long)) and 1 < N, "Invalid number of categories"

        self.__chop_signal = True if chop_signal else False
        self.__chop_background = True if chop_background else False

        assert self.__chop_signal or self.__chop_background, "Neither signal nor background chopping"

        self.__category = category
        self.__N = N

        self.__signal = signal
        self.__background = background

        self.__methods = tuple(methods)
        self.__signal_weight = signal_weight
        self.__signal_cuts = ROOT.TCut(signal_cuts)

        self.__background_weight = background_weight
        self.__background_cuts = ROOT.TCut(background_cuts)

        self.__variables = tuple(variables)
        self.__spectators = tuple(spectators)

        self.__bookingoptions = bookingoptions
        self.__configuration = configuration

        self.__name = name
        self.__verbose = True if verbose else False

        self.__sig_histos = ()
        self.__bkg_histos = ()

        cat = '(%s)%%%d' % (self.category, self.N)

        if self.chop_signal:
            hs1 = ROOT.TH1F(hID(), 'Signal categories', self.N * 5, -0.5,
                            self.N - 1)
            hs2 = h1_axis([-0.5 + i for i in range(self.N + 1)],
                          title=hs1.GetTitle())
            self.signal.project(hs1, cat, self.signal_cuts)
            self.signal.project(hs2, cat, self.signal_cuts)
            self.__sig_histos = hs1, hs2
            st = hs2.stat()
            if 0 >= st.min():
                logger.warning("Some signal categories are empty!")
            logger.info('Signal     category population mean/rms: %s/%s' %
                        (st.mean(), st.rms()))

        if self.chop_background:
            hb1 = ROOT.TH1F(hID(), 'Background categories', self.N * 5, -0.5,
                            self.N - 1)
            hb2 = h1_axis([-0.5 + i for i in range(self.N + 1)],
                          title=hb1.GetTitle())
            self.background.project(hb1, cat, self.background_cuts)
            self.background.project(hb2, cat, self.background_cuts)
            self.__bkg_histos = hb1, hb2
            ##
            st = hb2.stat()
            if 0 >= st.min():
                logger.warning("Some background categories are empty!")
            logger.info('Background category population mean/rms: %s/%s' %
                        (st.mean(), st.rms()))

        ##  trick to please Kisa
        from ostap.trees.trees import Chain
        self.__signal = Chain(signal)
        self.__background = Chain(background)

        ## book the trainers
        self.__trainers = ()
        self.__weights_files = []
        self.__class_files = []
        self.__output_files = []
        self.__tar_file = None
        self.__log_file = None