Пример #1
0
    def process(self, jobid, nToys):

        import ROOT
        from ostap.logger.logger import logWarning
        with logWarning():
            import ostap.core.pyrouts
            import ostap.fitting.roofit
            import ostap.fitting.dataset
            import ostap.fitting.roofitresult
            import ostap.fitting.variables

        from ostap.core.ostap_types import integer_types
        assert isinstance ( nToys , integer_types ) and 0 < nToys,\
               'Jobid %s: Invalid "nToys" argument %s/%s' % ( jobid , nToys , type ( nToys ) )

        import ostap.fitting.toys as Toys
        results, stats = Toys.make_toys2(gen_pdf=self.gen_pdf,
                                         fit_pdf=self.fit_pdf,
                                         nToys=nToys,
                                         data=self.data,
                                         gen_config=self.gen_config,
                                         fit_config=self.fit_config,
                                         gen_pars=self.gen_pars,
                                         fit_pars=self.fit_pars,
                                         more_vars=self.more_vars,
                                         gen_fun=self.gen_fun,
                                         fit_fun=self.fit_fun,
                                         accept_fun=self.accept_fun,
                                         silent=self.silent,
                                         progress=self.progress)

        self.the_output = results, stats

        return self.results()
Пример #2
0
def test_significance_toys():
    """Perform toy-study for significance of the signal 
    - generate `nToys` pseudoexperiments using background-only hypothesis 
    - fit each experiment with signal+background hypothesis
    - store  fit results
    - fill distributions for fit results
    """

    logger = getLogger('test_significance_toys')

    ## only background hypothesis
    bkg_only = Models.Bkg_pdf("BKG", xvar=mass, power=0, tau=0)

    signal = Models.Gauss_pdf('S', xvar=mass, mean=0.5, sigma=0.1)

    signal.mean.fix(0.4)
    signal.sigma.fix(0.1)

    ## signal + background hypothesis
    model = Models.Fit1D(signal=signal, background=1)
    model.background.tau.fix(0)

    results, stats = Toys.make_toys2(
        gen_pdf=bkg_only,
        fit_pdf=model,
        nToys=1000,
        data=[mass],
        gen_config={
            'nEvents': 100,
            'sample': True
        },
        fit_config={'silent': True},
        gen_pars={'tau_BKG': 0.},  ## initial values for generation 
        fit_pars={
            'B': 100,
            'S': 10,
            'phi0_Bkg_S': 0.0
        },  ## initial fit values for parameters 
        silent=True,
        progress=True)

    for p in stats:
        logger.info("Toys: %-20s : %s" % (p, stats[p]))

    h_S = ROOT.TH1F(hID(), '#S', 60, 0, 60)

    for r in results['S']:
        h_S.Fill(r)

    for h in (h_S, ):

        h.draw()
        logger.info("%s  :\n%s" % (h.GetTitle(), h.dump(30, 10)))
        time.sleep(1)
Пример #3
0
def test_toys2():
    """Perform toys-study for possible fit bias and correct uncertainty evaluation
    - generate `nToys` pseudoexperiments with some PDF `gen_pdf`
    - fit teach experiment with the PDF `fit_pdf`
    - store  fit results
    - fill distributions of fit results
    """

    logger = getLogger('test_toys2')

    results, stats = Toys.make_toys2(gen_pdf=gen_gauss,
                                     fit_pdf=fit_gauss,
                                     nToys=1000,
                                     data=[mass],
                                     gen_config={
                                         'nEvents': 200,
                                         'sample': True
                                     },
                                     fit_config={'silent': True},
                                     gen_pars={
                                         'mean_GG': 0.4,
                                         'sigma_GG': 0.1
                                     },
                                     fit_pars={
                                         'mean_GF': 0.4,
                                         'sigma_GF': 0.1
                                     },
                                     silent=True,
                                     progress=True)

    for p in stats:
        logger.info("Toys: %-20s : %s" % (p, stats[p]))

    ## make histos

    h_mean = ROOT.TH1F(hID(), 'mean of Gauss ', 50, 0, 0.80)
    h_sigma = ROOT.TH1F(hID(), 'sigma of Gauss', 50, 0.05, 0.15)

    for r in results['mean_FG']:
        h_mean.Fill(r)
    for r in results['sigma_FG']:
        h_sigma.Fill(r)

    for h in (h_mean, h_sigma):

        h.draw()
        logger.info("%s  :\n%s" % (h.GetTitle(), h.dump(30, 10)))
        time.sleep(1)
Пример #4
0
def parallel_toys2(
        gen_pdf,  ## PDF to generate toys 
        fit_pdf,  ## PDF to generate toys 
        nToys,  ## total number of toys 
        nSplit,  ## split into  <code>nSplit</code> subjobs 
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        gen_pars={},
        fit_pars={},
        more_vars={},
        gen_fun=None,  ## generator function ( pdf , varset  , **gen_config ) 
        fit_fun=None,  ## fit       function ( pdf , dataset , **fit_config ) 
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset     )
        silent=True,
        progress=False,
        **kwargs):
    """Make `ntoys` pseudoexperiments, splitting them into `nSplit` subjobs
    to be executed in parallel
    
    -   Schematically:
    >>> for toy in range ( nToys )  :
    >>> ...  dataset = gen_fun ( gen_pdf , ...     , **gen_config )
    >>> ...  result  = fit_fun ( fit_pdf , dataset , **fit_config )
    >>> ...  if not accept_fun ( result  , fit_pdf , dataset ) : continue
    >>> .... < collect statistics here > 
    
    For each experiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - pdf        PDF to be used for generation and fitting
    - nToys      total number    of pseudoexperiments to generate
    - nSplit     split total number of pseudoexperiments into `nSplit` subjobs  
    - data       variable list of variables to be used for dataset generation
    - gen_config configuration of <code>pdf.generate</code>
    - fit_config configuration of <code>pdf.fitTo</code>
    - gen_pars   redefine these parameters for generation of  each pseudoexperiment
    - fit_pars   redefine these parameters for fitting of each pseudoexperiment
    - more_vars  dictionary of functions to define the additional results 
    - silent     silent toys?
    - progress   show progress bar? 
    
    It returns a dictionary with fit results for the toys and a dictionary of statistics
    
    >>> pdf = ...
    ... results, stats = parallel_toys2 (
    ...    gen_pdf    = gen_pdf     , ## PDF  to generate toys 
    ...    fit_pdf    = gen_pdf     , ## PDF  to fit toys  
    ...    nToys      = 100000      , ## total number of toys
    ...    nSplit     = 100         , ## split them into `nSplit` subjobs 
    ...    data       = [ 'mass' ]  , ## varibales in dataset 
    ...    gen_config = { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...    fit_config = { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...    gen_pars   = { 'mean'  : 0.0 , 'sigma'  : 1.0 } ## parameters to use for generation 
    ...    fit_pars   = { 'meanG' : 0.0 , 'sigmaG' : 1.0 } ## parameters to use for fitting
    ...   )

    Derived parameters can be also retrived via <code>more_vars</code> argument:
    >>> ratio    = lambda res,pdf : res.ratio('x','y') 
    >>> more_vars = { 'Ratio' : ratio }
    >>> r,  s = parallel_toys2 ( .... , more_vars = more_vars , ... ) 

    Parallelization is controlled by  two arguments
    - `ncpus` :  number of local cpus to use, default is `'autodetect'`,
    that means all local processors
    - `ppservers`:  list of serevers to be used (for parallel python)

    
    """
    from ostap.core.ostap_types import integer_types

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    assert isinstance ( nToys  , integer_types ) and 0 < nToys  ,\
               'Jobid %s: Invalid "nToys"  argument %s/%s' % ( jobid , nToys  , type ( nToys  ) )

    assert isinstance ( nSplit , integer_types ) and 0 < nSplit ,\
               'Jobid %s: Invalid "nSplit" argument %s/%s' % ( jobid , nSplit , type ( nSplit ) )

    import ostap.fitting.toys as Toys
    if 1 == nSplit:
        return Toys.make_toys2(gen_pdf=gen_pdf,
                               fit_pdf=fit_pdf,
                               nToys=nToys,
                               data=data,
                               gen_config=gen_config,
                               fit_config=fit_config,
                               gen_pars=gen_pars,
                               fit_pars=fit_pars,
                               more_vars=more_vars,
                               gen_fun=gen_fun,
                               fit_fun=fit_fun,
                               accept_fun=accept_fun,
                               silent=silent,
                               progress=progress)

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult

    params = gen_pdf.params()
    toy_data = []
    if isinstance(data, ROOT.RooAbsData):
        varset = data.varset()
        for v in varset:
            toy_data.append(v.GetName())
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg): toy_data.append(v.GetName())
            elif isinstance(v, string_types) and v in params:
                toy_data.append(v)
            else:
                raise TypeError("Invalid type of variable %s/%s" %
                                (v, type(v)))

    gen_init_pars = Toys.vars_transform(gen_pars)
    fit_init_pars = Toys.vars_transform(fit_pars)

    # ========================================================================

    if nToys <= nSplit:
        nToy = 1
        nSplit = nToys
        nRest = 0
    else:
        nToy, nRest = divmod(nToys, nSplit)

    task = ToysTask2(gen_pdf=gen_pdf,
                     fit_pdf=fit_pdf,
                     data=toy_data,
                     gen_config=gen_config,
                     fit_config=fit_config,
                     gen_pars=gen_init_pars,
                     fit_pars=fit_init_pars,
                     more_vars=more_vars,
                     gen_fun=gen_fun,
                     fit_fun=fit_fun,
                     accept_fun=accept_fun,
                     silent=silent,
                     progress=progress)

    wmgr = WorkManager(silent=False, **kwargs)

    data = nSplit * [nToy]
    if nRest: data.append(nRest)

    wmgr.process(task, data)

    results, stats = task.results()
    Toys.print_stats(stats, nToys)

    return results, stats