Exemplo n.º 1
0
def create_mc2hessian(pdf, Q, Neig, output_dir, name=None, db=None):
    X = get_X(pdf, Q, reshape=True)
    vec = compress_X(X, Neig)
    norm = _pdf_normalization(pdf)
    description = {"input_hash": mc2h_input_hash(pdf, Q, Neig)}
    save_lincomb(vec, norm, description=description, output_dir=output_dir, name=name)

    return hessian_from_lincomb(pdf, vec / norm, folder=output_dir, set_name=name, db=db)
Exemplo n.º 2
0
def create_smpdf(
    pdf,
    pdf_results,
    output_dir,
    name,
    smpdf_tolerance,
    full_grid=False,
    db=None,
    correlation_threshold=DEFAULT_CORRELATION_THRESHOLD,
    nonlinear_correction=True,
):

    lincomb, norm, description = get_smpdf_params(
        pdf,
        pdf_results,
        smpdf_tolerance,
        full_grid=full_grid,
        db=db,
        correlation_threshold=correlation_threshold,
        nonlinear_correction=nonlinear_correction,
    )

    vec = lincomb / norm

    description = complete_smpdf_description(
        description, pdf, pdf_results, full_grid=full_grid, target_error=smpdf_tolerance
    )
    # We have do do this because LHAPDF seems to not parse complex structures
    parsed_desc = {"smpdf_description": yaml.dump(description, default_flow_style=False)}

    save_lincomb(lincomb, norm, description, output_dir, name)

    with open(osp.join(output_dir, name + "_description.yaml"), "w") as f:
        yaml.dump(description, f, default_flow_style=False)

    logging.info("Final linear combination has %d eigenvectors" % lincomb.shape[1])

    return hessian_from_lincomb(pdf, vec, folder=output_dir, set_name=name, db=db, extra_fields=parsed_desc)
Exemplo n.º 3
0
def get_smpdf_params(
    pdf,
    pdf_results,
    smpdf_tolerance,
    full_grid=False,
    db=None,
    correlation_threshold=DEFAULT_CORRELATION_THRESHOLD,
    nonlinear_correction=True,
):

    first_res = get_smpdf_lincomb(
        pdf, pdf_results, full_grid=full_grid, target_error=smpdf_tolerance, correlation_threshold=correlation_threshold
    )
    norm = first_res.norm
    lincomb = first_res.lincomb
    description = first_res.desc
    vec = first_res.lincomb / norm

    if nonlinear_correction:
        logging.info("Estimating nonlinear correction")
        with tempfile.TemporaryDirectory() as td:
            applwrap.setlhapdfpath(td)
            tempname = str(uuid.uuid1())
            logging.info("Creating temporary PDF %s" % tempname)
            hessian_from_lincomb(pdf, vec, folder=td, set_name=tempname, db=db)
            observables = [r.obs for r in pdf_results]
            temppdf = PDF(tempname)
            temppdf.infopath
            real_results = produce_results(temppdf, observables)
            logging.info("Real results obtained")

        results_to_refine = []
        newtols = []
        for smpdf_res, prior_res in zip(real_results, pdf_results):
            real_error = 1 - smpdf_res.std_error() / prior_res.std_error()
            # pandas indexing is broken, so have to call as_matrix....
            bad_bins = np.array(real_error > smpdf_tolerance, copy=False)
            if bad_bins.any():
                lin_errors = list(first_res.errors[str(smpdf_res.obs)].values())

                newtol = smpdf_tolerance - (real_error[bad_bins] - np.array(lin_errors)[bad_bins])

                impossible = np.argwhere(newtol < 0)
                if len(impossible):
                    raise TooMuchPrecision(smpdf_res.obs, impossible[0] + 1)
                newtols += list(newtol)
                logging.debug("New tolerances for observable %s: %s" % (prior_res.obs, newtol))
                # Create result with the same type as prior, and only
                # bad_bins.
                newres = type(prior_res)(prior_res.obs, prior_res.pdf, prior_res._data.ix[bad_bins])

                results_to_refine.append(newres)
        if results_to_refine:
            logging.info("Calculating eigenvectors to refine")
            ref_res = get_smpdf_lincomb(
                pdf,
                results_to_refine,
                full_grid=full_grid,
                target_error=newtols,
                correlation_threshold=correlation_threshold,
                Rold=first_res.Rold,
            )

            lincomb, description = merge_lincombs(first_res.lincomb, ref_res.lincomb, description, ref_res.desc)

        else:
            logging.info("All results are within tolerance")

    return lincomb, norm, description