Пример #1
0
 def test_const_results(self):
     pdf = PDF('NNPDF30_nlo_as_0118')
     obs = make_observable('data/higgs/ggh_13tev.root', order=1)
     res = produce_results(pdf, obs)[0]._data.as_matrix()
     previous = [ 31.22150023,  28.9888971 ,  31.3305855 ,  31.35700134,
     30.66074282,  28.2795592 ,  31.50932389,  31.43558106,
     31.28601552,  31.36406676,  30.69818304,  30.86453794,
     31.62708945,  31.6582997 ,  31.11008626,  31.59319744,
     31.52866286,  31.28639456,  31.07427256,  31.41568155,
     31.60044526,  31.36966168,  30.93032191,  33.05392731,
     30.96536474,  31.4317893 ,  30.72436163,  31.22605137,
     31.24243901,  31.21880536,  31.68798426,  31.07646885,
     30.59035729,  31.34800315,  31.21256586,  32.19365014,
     31.58647388,  31.74649083,  31.04692704,  30.2313781 ,
     31.4132261 ,  31.01493873,  31.36481456,  31.26872855,
     31.09018502,  31.51168931,  31.28105268,  30.91102961,
     30.88952926,  31.31389055,  31.22196011,  31.19094082,
     31.03705732,  31.54684624,  31.25938782,  31.10403155,
     30.87467055,  31.83239232,  31.05799506,  31.68930494,
     31.0387522 ,  31.27327964,  31.30919955,  31.51614659,
     31.06029472,  31.43211917,  31.35963882,  32.68355944,
     30.96018759,  30.60001451,  31.25165147,  31.37222175,
     31.19241142,  29.4805518 ,  31.66853784,  30.96975209,
     30.61955437,  31.34032813,  32.0724701 ,  30.9346566 ,
     31.27130234,  31.38000397,  30.83068422,  31.50593514,
     31.22489125,  30.74042979,  31.21191289,  31.4358684 ,
     31.27231758,  31.81871636,  31.10093208,  30.64649533,
     31.40680264,  31.12985161,  31.8378625 ,  31.18650963,
     31.51129514,  31.18025208,  31.87366192,  31.78494394,  31.45846653]
     self.assertTrue(np.allclose(res, previous))
Пример #2
0
def execute_config(conf, output_dir, db):

    import pandas as pd
    import smpdflib.core as lib
    import logging

    resultset = []
    for group in conf.actiongroups:
        pdfsets, observables = group['pdfsets'], group['observables']
        resources = group.copy()
        resources.pop('actions')
        # perform convolution
        #TODO Do this better
        if any(requires_result(act) for act in group['actions']):
            results = lib.produce_results(pdfsets, observables, db)
            resultset.append(results)
            data_table = lib.results_table(results)
            summed_table = lib.summed_results_table(results)

            total = pd.concat((data_table,
                                summed_table),
                                ignore_index = True)
            if logging.getLogger().isEnabledFor(logging.DEBUG):
                ...
                #print_results(results)
            resources.update({'results':results, 'data_table':data_table,
                           'total':total, 'summed_table':summed_table})

        if any(requires_correlations(act) for act in group['actions']):
            pdfcorrlist = lib.correlations(data_table, db=db)
            resources.update({'pdfcorrlist':pdfcorrlist})


        prefix = group['prefix']
        resources.update({ 'output_dir':output_dir,
                       'prefix':prefix,
                       'pdfsets': pdfsets,
                       'db': db})
        for action, res in do_actions(group['actions'], resources):
            logging.info("Finalized action '%s'." % action)
    return resultset
Пример #3
0
pdf = PDF("MC900_nlo")
obs = [make_observable(path, order='NLO') for path in itertools.chain(
       sorted(glob.glob("data/z/*.root")),
       sorted(glob.glob("data/w/*.root")),
       sorted(glob.glob("data/higgs/*.root")),
       sorted(glob.glob("data/ttbar/*.root")),
       )]



tolerance = 0.05

thresholds = [0.0, 0.25, 0.5, 0.75, 0.9, 0.99]

if __name__ == '__main__':
    neig = []
    logging.basicConfig(level=logging.INFO)
    db = shelve.open('db/db')

    results = produce_results([pdf], obs,  db=db)
    for t in thresholds:
        V, _ ,desc = get_smpdf_params(pdf, results, smpdf_tolerance=tolerance,
                          correlation_threshold=t, db=db)
        neig.append(V.shape[1])
        print("For thresholf %.2f we get:" %t)
        print(desc)
    with open("thresholdsladder.json", 'w') as f:
        json.dump([thresholds, neig], f)

    db.close()
Пример #4
0
def get_smpdf_params(
    pdf,
    pdf_results,
    smpdf_tolerance,
    full_grid=False,
    db=None,
    correlation_threshold=DEFAULT_CORRELATION_THRESHOLD,
    nonlinear_correction=True,
):

    first_res = get_smpdf_lincomb(
        pdf, pdf_results, full_grid=full_grid, target_error=smpdf_tolerance, correlation_threshold=correlation_threshold
    )
    norm = first_res.norm
    lincomb = first_res.lincomb
    description = first_res.desc
    vec = first_res.lincomb / norm

    if nonlinear_correction:
        logging.info("Estimating nonlinear correction")
        with tempfile.TemporaryDirectory() as td:
            applwrap.setlhapdfpath(td)
            tempname = str(uuid.uuid1())
            logging.info("Creating temporary PDF %s" % tempname)
            hessian_from_lincomb(pdf, vec, folder=td, set_name=tempname, db=db)
            observables = [r.obs for r in pdf_results]
            temppdf = PDF(tempname)
            temppdf.infopath
            real_results = produce_results(temppdf, observables)
            logging.info("Real results obtained")

        results_to_refine = []
        newtols = []
        for smpdf_res, prior_res in zip(real_results, pdf_results):
            real_error = 1 - smpdf_res.std_error() / prior_res.std_error()
            # pandas indexing is broken, so have to call as_matrix....
            bad_bins = np.array(real_error > smpdf_tolerance, copy=False)
            if bad_bins.any():
                lin_errors = list(first_res.errors[str(smpdf_res.obs)].values())

                newtol = smpdf_tolerance - (real_error[bad_bins] - np.array(lin_errors)[bad_bins])

                impossible = np.argwhere(newtol < 0)
                if len(impossible):
                    raise TooMuchPrecision(smpdf_res.obs, impossible[0] + 1)
                newtols += list(newtol)
                logging.debug("New tolerances for observable %s: %s" % (prior_res.obs, newtol))
                # Create result with the same type as prior, and only
                # bad_bins.
                newres = type(prior_res)(prior_res.obs, prior_res.pdf, prior_res._data.ix[bad_bins])

                results_to_refine.append(newres)
        if results_to_refine:
            logging.info("Calculating eigenvectors to refine")
            ref_res = get_smpdf_lincomb(
                pdf,
                results_to_refine,
                full_grid=full_grid,
                target_error=newtols,
                correlation_threshold=correlation_threshold,
                Rold=first_res.Rold,
            )

            lincomb, description = merge_lincombs(first_res.lincomb, ref_res.lincomb, description, ref_res.desc)

        else:
            logging.info("All results are within tolerance")

    return lincomb, norm, description
Пример #5
0
from smpdflib.plots import plot_bindist

#Db folder should exist...
db = shelve.open('db/db')


to_plot = {make_observable('data/applgrid/CMSWCHARM-WpCb-eta4.root', order=1)                      : 4,
           make_observable('data/applgrid/APPLgrid-LHCb-Z0-ee_arXiv1212.4260-eta34.root', order=1) : 8,
          }

pdfs = [PDF('MC900_nnlo', label="legendtext"),
        PDF('CMC100_nnlo', label="other"),
        PDF('MCH_nnlo_100', label="another")
       ]

if __name__ == '__main__':
    for obs, bin in to_plot.items():
        results = produce_results(pdfs, [obs], db=db)

        obs_table = results_table(results)
        for (obs,b), fig in plot_bindist(obs_table, bin, base_pdf = pdfs[0]):
            ax = fig.axes[0]
            ax.set_xlabel("My X label")
            ax.set_ylabel("MY Y label")
            ax.set_title("My title")
            path = "%s_bin_%s.pdf" % (obs, b+1)

            fig.savefig(path)
            plt.close(fig)
Пример #6
0
prior_name = "MC900_nnlo"

prefixes = ['z2tr'  + x for x in '0 10 25 50 75 90 99'.split()]

smpdf_names = [prefix + 'smpdf_' + prior_name for prefix in prefixes]

lincoef_paths = ['output/%s_lincomb.csv' % name for name in smpdf_names]

if __name__ == '__main__':
    prior = PDF(prior_name)
    smpdfs = [PDF(name) for name in smpdf_names]
    pdfs = [prior] + smpdfs
    obs = make_observable(obs_name, order='NLO')
    with shelve.open('db/db') as db:
        res_prior, *res_smpdfs = produce_results(pdfs, [obs], db=db)
    coefs = [pd.DataFrame.from_csv(path, sep='\t') for path in lincoef_paths]

    prior_std = res_prior.std_error()

    real_tols = [1 - res.std_error()/prior_std for res in res_smpdfs]

    prior_diffs = (res_prior._all_vals.T - res_prior.central_value).as_matrix().ravel()

    neig = [len(pdf) - 1 for pdf in smpdfs]

    rotated_tols = []
    for path in lincoef_paths:
        coefs = pd.DataFrame.from_csv(path, sep='\t')
        rotated_diffs = np.dot(prior_diffs, coefs)
        rotated_std = la.norm(rotated_diffs)