Пример #1
0
def marginal(accepted,
             C_limits,
             num_bin_kde,
             num_bin_raw,
             folder,
             mirror=False):
    """ Generate marginal and 2d pdfs

    :param accepted: list of accepted parameters
    :param C_limits: list of bounds
    :param num_bin_raw: number of bins in Nd histogram (per dimension)
    :param num_bin_kde: number of points in Nd KDE
    :param folder: path to output files
    :param mirror: (boolen) if to use mirroring
    """
    if not os.path.isdir(folder):
        os.makedirs(folder)
    ##############################################################################
    if num_bin_raw:
        logging.info(
            '2D raw marginals with {} bins per dimension'.format(num_bin_raw))
        H, C_final_joint = pp.calc_raw_joint_pdf(accepted, num_bin_raw,
                                                 C_limits)
        np.savetxt(os.path.join(folder, 'C_final_joint{}'.format(num_bin_raw)),
                   C_final_joint)
        pp.calc_marginal_pdf_raw(accepted, num_bin_raw, C_limits, folder)
        del H
    # ##############################################################################
    logging.info(
        '2D smooth marginals with {} bins per dimension'.format(num_bin_kde))
    if mirror:
        mirrored_data, _ = pp.mirror_data_for_kde(accepted, C_limits[:, 0],
                                                  C_limits[:, 1])
        print(
            f"{len(mirrored_data) - len(accepted)} points were added to {len(accepted)} points"
        )
        Z = gaussian_kde_scipy(mirrored_data, C_limits[:, 0], C_limits[:, 1],
                               num_bin_kde)
    else:
        Z = kdepy_fftkde(accepted, C_limits[:, 0], C_limits[:, 1], num_bin_kde)
        # Z = gaussian_kde_scipy(accepted, C_limits[:, 0], C_limits[:, 1], num_bin_kde)
    C_final_smooth = find_MAP_kde(Z, C_limits[:, 0], C_limits[:, 1])
    np.savetxt(os.path.join(folder, 'C_final_smooth' + str(num_bin_kde)),
               C_final_smooth)
    # np.savetxt(os.path.join(folder, 'mirrored_limits'), [left, right])
    logging.info(
        'Estimated parameters from joint pdf: {}'.format(C_final_smooth))
    # ##############################################################################
    np.savez(os.path.join(folder, 'Z.npz'), Z=Z)
    # Z = np.load(os.path.join(folder, 'Z.npz'))['Z']
    pp.calc_marginal_pdf_smooth(Z, num_bin_kde, C_limits, folder)
    pp.calc_conditional_pdf_smooth(Z, folder)
    make_pdfs.make_1d_pdf(accepted, num_bin_raw, C_limits, num_bin_kde, folder)
    del Z
Пример #2
0
    def test_kde2d_symmetric(self):
        num_bin_joint = 100
        data = np.random.multivariate_normal((3, 3), [[0.8, 0], [0,  0.8]], 100)
        a = [0, 0]
        b = [6, 6]
        grid, _ = kde.grid_for_kde(a, b, num_bin_joint)
        Z_scipy = kde.gaussian_kde_scipy(data, a, b, num_bin_joint)
        Z_kdepy = kde.kdepy_fftkde(data, a, b, num_bin_joint)
        # Normalize to summ up to 1
        Z_scipy = Z_scipy / np.sum(Z_scipy)
        Z_kdepy = Z_kdepy / np.sum(Z_kdepy)
        # find MAP values
        MAP_scipy = kde.find_MAP_kde(Z_scipy, a, b)
        MAP_kdepy = kde.find_MAP_kde(Z_kdepy, a, b)

        np.testing.assert_array_almost_equal(Z_scipy, Z_kdepy, decimal=4)
        np.testing.assert_array_almost_equal(MAP_scipy, MAP_kdepy)
Пример #3
0
def update_prior(S_init, C_limits, num_bin_update):
    # update prior based on accepted parameters in calibration
    N_params = len(C_limits)
    prior = kde.kdepy_fftkde(data=np.array(S_init)[:, :N_params],
                             a=C_limits[:, 0],
                             b=C_limits[:, 1],
                             num_bin_joint=num_bin_update)
    map_calibration = kde.find_MAP_kde(prior, C_limits[:, 0], C_limits[:, 1])
    logging.info('Estimated parameter after calibration step is {}'.format(
        map_calibration))
    np.savez(os.path.join(g.path['calibration'], 'prior.npz'), Z=prior)
    np.savetxt(os.path.join(g.path['calibration'], 'C_final_smooth'),
               map_calibration)
    prior_grid = np.empty((N_params, num_bin_update + 1))
    for i, limits in enumerate(g.C_limits):
        prior_grid[i] = np.linspace(limits[0], limits[1], num_bin_update + 1)
    g.prior_interpolator = RegularGridInterpolator(prior_grid,
                                                   prior,
                                                   bounds_error=False)
    return
Пример #4
0
    def test_kde1d(self):

        num_bin = 100
        data = np.concatenate([norm(-1, 1.).rvs(1600), norm(1, 0.3).rvs(400)]).reshape((-1, 1))
        a, b = [-4.5], [3.5]
        x_grid = np.linspace(a[0], b[0], num_bin + 1)
        pdf_true = (0.8 * norm(-1, 1).pdf(x_grid) + 0.2 * norm(1, 0.3).pdf(x_grid))

        Z_scipy = kde.gaussian_kde_scipy(data, a, b, num_bin)
        Z_kdepy = kde.kdepy_fftkde(data, a, b, num_bin)
        # Normalize to summ up to 1
        Z_scipy = Z_scipy / np.trapz(Z_scipy, x=x_grid)
        Z_kdepy = Z_kdepy / np.trapz(Z_kdepy, x=x_grid)
        # find MAP values
        MAP_scipy = kde.find_MAP_kde(Z_scipy, a, b)[0]
        MAP_kdepy = kde.find_MAP_kde(Z_kdepy, a, b)[0]

        np.testing.assert_array_almost_equal(Z_scipy, Z_kdepy, decimal=3)
        np.testing.assert_array_almost_equal(MAP_scipy, x_grid[np.argmax(pdf_true)])
        np.testing.assert_array_almost_equal(MAP_kdepy, x_grid[np.argmax(pdf_true)])
        np.testing.assert_array_almost_equal(MAP_scipy, MAP_kdepy)
Пример #5
0
def main(args):

    # Initialization
    if len(args) > 1:
        input_path = args[1]
    else:
        input_path = os.path.join('../rans_ode', 'params.yml')

    input = yaml.load(open(input_path, 'r'))

    ### Paths
    # path = input['path']
    # path = {'output': os.path.join('../runs_abc/', 'output/'), 'valid_data': '../rans_ode/valid_data/'}
    path = {
        'output': os.path.join('../', 'output/'),
        'valid_data': '../rans_ode/valid_data/'
    }

    print(path)
    logging.basicConfig(format="%(levelname)s: %(name)s:  %(message)s",
                        handlers=[
                            logging.FileHandler("{0}/{1}.log".format(
                                path['output'], 'ABClog_postprocess0005')),
                            logging.StreamHandler()
                        ],
                        level=logging.DEBUG)

    logging.info('\n############# POSTPROCESSING ############')
    x_list = [0.005, 0.01, 0.03, 0.05, 0.1, 0.3]
    C_limits = np.loadtxt(os.path.join(path['output'], 'C_limits_init'))
    N_params = 1
    files_abc = glob.glob1(path['output'], "classic_abc*.npz")
    files = [os.path.join(path['output'], i) for i in files_abc]
    accepted = np.empty((0, N_params))
    dist = np.empty((0, 1))
    sum_stat = np.empty((0, len(np.load(files[0])['sumstat'][0])))
    logging.info('Loading data')
    for file in files:
        logging.debug('loading {}'.format(file))
        accepted = np.vstack((accepted, np.load(file)['C'][:, :N_params]))
        sum_stat = np.vstack((sum_stat, np.load(file)['sumstat']))
        dist = np.vstack((dist, np.load(file)['dist'].reshape((-1, 1))))
    data = np.hstack((accepted, dist)).tolist()
    np.savetxt(os.path.join(path['output'], '1d_dist_scatter'), data)
    logging.info('\n############# Classic ABC ############')
    for x in x_list:
        logging.info('\n')
        folder = os.path.join(path['output'], 'x_{}'.format(x * 100))
        if not os.path.isdir(folder):
            os.makedirs(folder)
        print(folder)
        print('min dist = ', np.min(dist))
        eps = define_eps(data, x)
        np.savetxt(os.path.join(folder, 'eps'), [eps])
        abc_accepted = accepted[np.where(dist < eps)[0]]
        logging.info('x = {}, eps = {}, N accepted = {} (total {})'.format(
            x, eps, len(abc_accepted), len(dist)))
        print(sum_stat.shape, sum_stat[np.where(dist < eps)[0], :].shape)
        np.savez(os.path.join(path['output'], '1d_dist_scatter_{}'.format(x)),
                 C=abc_accepted,
                 dist=dist[np.where(dist < eps)].reshape((-1, 1)),
                 sumstat=sum_stat[np.where(dist < eps)[0], :])
        num_bin_kde = 100
        num_bin_raw = 20
        ##############################################################################
        logging.info(
            '1D raw histogram with {} bins per dimension'.format(num_bin_raw))
        x, y = utils.pdf_from_array_with_x(abc_accepted,
                                           bins=num_bin_raw,
                                           range=C_limits)
        np.savetxt(os.path.join(folder, 'histogram'), [x, y])
        np.savetxt(os.path.join(folder, 'C_final_raw{}'.format(num_bin_raw)),
                   [x[np.argmax(y)]])
        # ##############################################################################
        logging.info('2D smooth marginals with {} bins per dimension'.format(
            num_bin_kde))
        Z = kdepy_fftkde(abc_accepted, [C_limits[0]], [C_limits[1]],
                         num_bin_kde)
        C_final_smooth = find_MAP_kde(Z, C_limits[0], C_limits[1])
        grid = np.linspace(C_limits[0] - 1e-10, C_limits[1] + 1e-10,
                           num_bin_kde + 1)
        # Z, C_final_smooth = gaussian_kde_scipy(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde)
        # grid = np.linspace(C_limits[0], C_limits[1], num_bin_kde+1)
        np.savetxt(os.path.join(folder, 'C_final_smooth'), C_final_smooth)
        logging.info(
            'Estimated parameters from joint pdf: {}'.format(C_final_smooth))
        np.savez(os.path.join(folder, 'Z.npz'), Z=Z, grid=grid)

        # for q in [0.05, 0.1, 0.25]:
        #     pp.marginal_confidence(N_params, folder, q)
        #     pp.marginal_confidence_joint(abc_accepted, folder, q)
    ####################################################################################################################
    #
    # ####################################################################################################################
    logging.info('\n############# Regression ############')
    path['regression_dist'] = os.path.join(path['output'], 'regression_dist')
    path['regression_full'] = os.path.join(path['output'], 'regression_full')
    if not os.path.isdir(path['regression_dist']):
        os.makedirs(path['regression_dist'])
    if not os.path.isdir(path['regression_full']):
        os.makedirs(path['regression_full'])
    Truth = sumstat.TruthData(valid_folder=path['valid_data'],
                              case=input['case'])
    ind = np.argsort(dist[:, 0])
    accepted = accepted[ind]
    sum_stat = sum_stat[ind]
    dist = dist[ind]
    for x in x_list:
        logging.info('\n')
        n = int(x * len(accepted))
        print('{} samples are taken for regression ({}% of {})'.format(
            n, x * 100, len(accepted)))
        samples = accepted[:n, :N_params]
        dist_reg = dist[:n, -1].reshape((-1, 1))
        #########################################################################
        logging.info('Regression with distance')
        folder = os.path.join(path['regression_dist'], 'x_{}'.format(x * 100))
        if not os.path.isdir(folder):
            os.makedirs(folder)
        # new_samples, solution = regression(samples, (sum_stat[:n] - Truth.sumstat_true).reshape((-1, 1)),
        #                                    dist_reg, 1, folder)
        new_samples, solution = regression(samples, dist_reg, dist_reg, 1,
                                           folder)
        limits = np.empty((N_params, 2))
        for i in range(N_params):
            limits[i, 0] = np.min(new_samples[:, i])
            limits[i, 1] = np.max(new_samples[:, i])
            if limits[i, 1] - limits[i, 0] < 1e-8:
                logging.warning('too small new range')
                limits[i, 0] -= 0.001
                limits[i, 1] += 0.001
        print('new limits = ', limits)
        np.savetxt(os.path.join(folder, 'reg_limits'), limits)
        num_bin_kde_reg = 20
        logging.info('2D smooth marginals with {} bins per dimension'.format(
            num_bin_kde_reg))
        Z = kdepy_fftkde(new_samples, [limits[:, 0]], [limits[:, 1]],
                         num_bin_kde_reg)
        C_final_smooth = find_MAP_kde(Z, [limits[:, 0]], [limits[:, 1]])
        grid = np.linspace(limits[0, 0] - 1e-10, limits[0, 1] + 1e-10,
                           num_bin_kde_reg + 1)
        # Z, C_final_smooth = gaussian_kde_scipy(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde)
        # grid = np.linspace(C_limits[0], C_limits[1], num_bin_kde+1)
        np.savetxt(os.path.join(folder, 'C_final_smooth'), C_final_smooth[0])
        logging.info('Estimated parameters from joint pdf: {}'.format(
            C_final_smooth[0]))
        np.savez(os.path.join(folder, 'Z.npz'), Z=Z, grid=grid)
        # for q in [0.05, 0.1, 0.25]:
        #     pp.marginal_confidence(N_params, folder, q)
        ##########################################################################
        logging.info('Regression with full summary statistics')
        folder = os.path.join(path['regression_full'], 'x_{}'.format(x * 100))
        if not os.path.isdir(folder):
            os.makedirs(folder)
        new_samples, _ = regression(samples, sum_stat[:n] - Truth.sumstat_true,
                                    dist_reg, 1, folder)
        limits = np.empty((N_params, 2))
        for i in range(N_params):
            limits[i, 0] = np.min(new_samples[:, i])
            limits[i, 1] = np.max(new_samples[:, i])
            if limits[i, 1] - limits[i, 0] < 1e-8:
                logging.warning('too small new range')
                limits[i, 0] -= 0.001
                limits[i, 1] += 0.001
        print('new limits = ', limits)
        np.savetxt(os.path.join(folder, 'reg_limits'), limits)
        num_bin_kde_reg = 20
        logging.info('2D smooth marginals with {} bins per dimension'.format(
            num_bin_kde_reg))
        Z = kdepy_fftkde(new_samples, [limits[:, 0]], [limits[:, 1]],
                         num_bin_kde_reg)
        C_final_smooth = find_MAP_kde(Z, [limits[:, 0]], [limits[:, 1]])
        grid = np.linspace(limits[0, 0] - 1e-10, limits[0, 1] + 1e-10,
                           num_bin_kde_reg + 1)
        # Z, C_final_smooth = gaussian_kde_scipy(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde)
        # grid = np.linspace(C_limits[0], C_limits[1], num_bin_kde+1)
        np.savetxt(os.path.join(folder, 'C_final_smooth'), C_final_smooth[0])
        logging.info('Estimated parameters from joint pdf: {}'.format(
            C_final_smooth[0]))
        np.savez(os.path.join(folder, 'Z.npz'), Z=Z, grid=grid)
        # for q in [0.05, 0.1, 0.25]:
        #     pp.marginal_confidence(N_params, folder, q)
    logging.info('\n#############Done############')
Пример #6
0
def main():
    path = {
        'output': os.path.join('../', 'rans_output/'),
        'valid_data': '../rans_ode/valid_data/'
    }
    print(path)
    logging.basicConfig(format="%(levelname)s: %(name)s:  %(message)s",
                        handlers=[
                            logging.FileHandler("{0}/{1}.log".format(
                                path['output'], 'ABClog_postprocess0005')),
                            logging.StreamHandler()
                        ],
                        level=logging.DEBUG)

    logging.info('\n############# POSTPROCESSING ############')
    x_list = [0.3, 0.1, 0.05, 0.03, 0.01]
    # x_list = [0.3]
    num_bin_kde = 20
    num_bin_raw = 10
    C_limits = np.loadtxt(os.path.join(path['output'], 'C_limits_init'))
    if len(np.array(C_limits).shape) < 2:
        N_params = 1
    else:
        N_params = len(C_limits)
    files_abc = glob.glob1(path['output'], "classic_abc*.npz")
    files = [os.path.join(path['output'], i) for i in files_abc]

    logging.info('Loading data')

    dist_unsorted = load_dist(files)
    ind = np.argsort(dist_unsorted[:, 0])
    samples = load_c(files, N_params)[ind]
    N_total, _ = samples.shape
    dist = dist_unsorted[ind]
    # ##################################################################################################################
    # #
    # # ################################################################################################################
    logging.info('\n############# Regression ############')
    # # regression_type = ['regression_dist', 'regression_full']
    # regression_type = ['regression_full']
    regression_type = ['regression_dist']
    for type in regression_type:
        path[type] = os.path.join(path['output'], type)
        if not os.path.isdir(path[type]):
            os.makedirs(path[type])
        # if type == 'regression_full':
        #     sum_stat = load_sum_stat(files)[ind]
        for x in x_list:
            n = int(x * N_total)
            folder = os.path.join(path[type], 'x_{}'.format(x * 100))
            if not os.path.isdir(folder):
                os.makedirs(folder)
            logging.info('\n')
            logging.info('Regression {}'.format(type))
            logging.info(
                '{} samples are taken for regression ({}% of {})'.format(
                    n, x * 100, N_total))
            samples = samples[:n, :N_params]
            dist = dist[:n]
            # if type == 'regression_full':
            #     sum_stat = sum_stat[:n]
            ##########################################################################
            if type == 'regression_dist':
                new_samples, solution = regression(samples,
                                                   dist,
                                                   dist,
                                                   x=1,
                                                   folder=folder)
            # else:
            #     Truth = sumstat.TruthData(valid_folder=path['valid_data'], case=input['case'])
            #     new_samples, solution = regression(samples, sum_stat - Truth.sumstat_true, dist, x=1)
            limits = new_limits(new_samples, N_params)
            np.savetxt(os.path.join(folder, 'reg_limits'), limits)
            Z = kdepy_fftkde(new_samples, limits[:, 0], limits[:, 1],
                             num_bin_kde)
            # Z, C_final_smooth = pp.gaussian_kde_scipy(new_samples, limits[:, 0], limits[:, 1], num_bin_kde_reg)
            C_final_smooth = find_MAP_kde(Z, C_limits[:, 0], C_limits[:, 1])
            logging.info('Estimated parameters from joint pdf: {}'.format(
                C_final_smooth))
            np.savetxt(
                os.path.join(folder, 'C_final_smooth' + str(num_bin_kde)),
                C_final_smooth)
            np.savez(os.path.join(folder, 'Z.npz'), Z=Z)
            pp.calc_marginal_pdf_smooth(Z, num_bin_kde, limits, folder)
            pp.calc_conditional_pdf_smooth(Z, folder)
            del Z
            # for q in [0.05, 0.1, 0.25]:
            #     pp.marginal_confidence(N_params, folder, q)
            #     pp.marginal_confidence_joint(new_samples, folder, q)
    logging.info('\n#############Done############')
Пример #7
0
def main(args):

    # Initialization
    if len(args) > 1:
        input_path = args[1]
    else:
        input_path = os.path.join('../rans_output/', 'params.yml')

    input = yaml.load(open(input_path, 'r'))

    ### Paths
    basefolder = '../'
    path = {
        'output': os.path.join(basefolder, 'rans_output/'),
        'plots': os.path.join(basefolder, 'rans_plots/'),
        'valid_data': os.path.join(basefolder, 'rans_ode', 'valid_data')
    }
    # path = input['path']
    path['calibration'] = os.path.join(path['output'], 'calibration/')
    N_chains = input['parallel_threads']
    logging.basicConfig(format="%(levelname)s: %(name)s:  %(message)s",
                        handlers=[
                            logging.FileHandler("{0}/{1}.log".format(
                                path['output'], 'ABClog_postprocess')),
                            logging.StreamHandler()
                        ],
                        level=logging.DEBUG)

    logging.info('\n############# POSTPROCESSING CHAINS ############')
    C_limits = np.loadtxt(os.path.join(path['calibration'], 'C_limits'))
    N_params = len(C_limits)
    files = np.empty(0)
    for chain in range(N_chains):
        files_onechain = glob.glob1(path['output'],
                                    "chain{}_*.npz".format(chain))
        files = np.hstack(
            (files,
             np.array(
                 [os.path.join(path['output'], i) for i in files_onechain])))
    accepted = np.empty((0, N_params))
    dist = np.empty((0, 1))
    sum_stat = np.empty((0, len(np.load(files[0])['sumstat'][0])))
    logging.info('Loading data')
    for file in files:
        logging.debug('loading {}'.format(file))
        accepted = np.vstack((accepted, np.load(file)['C']))
        sum_stat = np.vstack((sum_stat, np.load(file)['sumstat']))
        dist = np.vstack((dist, np.load(file)['dist'].reshape((-1, 1))))
    # data = np.hstack((accepted, dist)).tolist()
    print(accepted.shape, sum_stat.shape, dist.shape)
    logging.info('\n')
    logging.info(
        '\n############# MCMC-ABC ({} chains) ############'.format(N_chains))
    folder = os.path.join(path['output'], 'chains')
    if not os.path.isdir(folder):
        os.makedirs(folder)
    num_bin_kde = 50
    num_bin_raw = 20
    ##############################################################################
    logging.info(
        '2D raw marginals with {} bins per dimension'.format(num_bin_raw))
    H, C_final_joint = pp.calc_raw_joint_pdf(accepted, num_bin_raw, C_limits)
    np.savetxt(os.path.join(folder, 'C_final_joint{}'.format(num_bin_raw)),
               C_final_joint)
    pp.calc_marginal_pdf_raw(accepted, num_bin_raw, C_limits, folder)
    ##############################################################################
    logging.info(
        '2D smooth marginals with {} bins per dimension'.format(num_bin_kde))
    Z = kdepy_fftkde(accepted, C_limits[:, 0], C_limits[:, 1], num_bin_kde)
    C_final_smooth = find_MAP_kde(Z, C_limits[:, 0], C_limits[:, 1])
    np.savetxt(os.path.join(folder, 'C_final_smooth' + str(num_bin_kde)),
               C_final_smooth)
    logging.info(
        'Estimated parameters from joint pdf: {}'.format(C_final_smooth))
    np.savez(os.path.join(folder, 'Z.npz'), Z=Z)
    pp.calc_marginal_pdf_smooth(Z, num_bin_kde, C_limits, folder)
    pp.calc_conditional_pdf_smooth(Z, folder)