Пример #1
0
def run_inference(options, data_path):

    exp, W_test, y_test = set_up_radar_experiment(options, data_path)

    t0 = utils.tic()
    mu_grid = exp.model.predict(X=None, grid=True, verbose=True, tol=1e-2)[0]
    t0f = utils.toc(t0)
    inference_time = utils.toc_report(t0f, tag="InfGP", return_val=True)
    y_predict = (W_test * mu_grid).squeeze()

    mae_mp = compute_mae(np.ones_like(y_test) * np.mean(y_test), y_test)

    print("Mae: ", compute_mae(y_predict, y_test))
    print("Smae: ", compute_mae(y_predict, y_test) / mae_mp)
    print("Mse: ", np.mean((y_predict - y_test)**2))
    print("Rmse: ", np.sqrt(np.mean((y_predict - y_test)**2)))
    return inference_time
Пример #2
0
def run_logdet_computation(options, data_path, W_path=None):

    ntrials = options.ntrials
    nrank = options.maxiter

    exp, __, __ = set_up_radar_experiment(options, data_path, W_path=W_path)
    sigma = exp.model.noise_covar.noise

    if options.method == utils.MethodName.GSGP:

        t1 = utils.tic()
        K = exp.model.covar_module._inducing_forward()
        A_hat = GsGpLinearOperator(exp.model.WT_times_W,
                                   K,
                                   sigma,
                                   dtype=exp.model.WT_times_Y.dtype)

        if options.variant == 1:
            estimate_logdet = logdet_estimate_using_lz_variants(
                A_hat,
                WT=exp.WT,
                trials=ntrials,
                rank=nrank,
                verbose=True,
                dump=options.dump,
                dump_path=options.log_dir)[0]
        elif options.variant == 2:
            estimate_logdet = logdet_estimate_using_cg_variants(
                A_hat,
                WT=exp.WT,
                trials=ntrials,
                tol=options.tol,
                rank=nrank,
                verbose=True)[0]
        else:
            raise NotImplementedError

        print("Estimated log-det: ", estimate_logdet)
        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    elif options.method == utils.MethodName.KISSGP:

        t1 = utils.tic()
        W_train = exp.model.W
        K = exp.model.covar_module._inducing_forward()
        A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype)

        if options.variant == 1:
            estimate_logdet = logdet_estimate_using_lz_variants(
                A,
                trials=ntrials,
                rank=nrank,
                verbose=True,
            )[0]
        elif options.variant == 2:
            estimate_logdet = logdet_estimate_using_cg_variants(
                A, trials=ntrials, tol=options.tol, rank=nrank,
                verbose=True)[0]
        else:
            raise NotImplementedError

        print("Estimated log-det: ", estimate_logdet)

        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    else:
        raise NotImplementedError

    return inference_time
Пример #3
0
def run_ski_cov_inf_experiment(exp, options):

    sigma = exp.model.noise_covar.noise

    if options.method == configs.MethodName.KISSGP:

        W_test = exp.model.covar_module(exp.test_x)[0]

        t1 = utils.tic()
        W_train, K, __ = exp.model.covar_module(exp.train_x, is_kmm=True)
        A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype)
        probes = W_train * A.kmm_matmat(
            W_test.T.todense())  # Computing over test data points

        cov = bcg(A,
                  probes,
                  tol=options.tol,
                  maxiter=options.maxiter,
                  verbose=True)
        covf = np.dot(probes.T, cov)
        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    elif options.method == configs.MethodName.GSGP:

        W_test = exp.model.covar_module(exp.test_x)[0]

        t1 = utils.tic()
        K = exp.model.covar_module._inducing_forward()
        A_hat = GsGpLinearOperator(exp.model.WT_times_W,
                                   K,
                                   sigma,
                                   dtype=exp.model.WT_times_Y.dtype)
        r0_hat = A_hat.kmm_matmat(W_test.T.todense())

        x_diff = bfcg(A_hat,
                      r0_hat,
                      yty=np.linalg.norm(r0_hat, axis=0)**2,
                      maxiter=options.maxiter,
                      verbose=True,
                      tol=options.tol)
        covf = np.dot(r0_hat.T, x_diff)
        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    else:
        raise NotImplementedError

    if options.store_ref:
        os.makedirs(options.log_dir, exist_ok=True)
        pickle.dump(
            covf,
            open(
                options.log_dir + "/" + options.data_type.name.lower() +
                "_ski_dump.pkl", "wb"))
        return 0.0, 0.0

    # Computing l2norm
    cov_ref_path = os.environ[
        'PRJ'] + '/data/refs/' + options.data_type.name.lower(
        ) + '_ski_dump.pkl'
    assert os.path.exists(
        cov_ref_path
    ), cov_ref_path + "doesn't exists. Follow readme to generate refs."

    COV_REF = pickle.load(open(cov_ref_path, "rb"))
    l2_norm = np.linalg.norm(covf - COV_REF)
    print("L2norm: ", l2_norm)

    return inference_time, l2_norm
Пример #4
0
def run_approx_cov_inf_experiment(exp, options):

    if exp.config.data_type not in [configs.DatasetType.SOUND, configs.DatasetType.PRECIPITATION]:
        nrank = min(configs.DEFAULT_LANCZOS_RANK, options.maxiter)
        print("\n\n\nLanczos rank is reduced to ", nrank, "  from ", options.maxiter)
        print("\n\n")
    else:
        nrank = options.maxiter

    sigma = exp.model.noise_covar.noise
    if exp.config.data_type == configs.DatasetType.SOUND:
        max_num_test_vectors = 1000
    else:
        max_num_test_vectors = 50

    if exp.test_x.shape[0] > max_num_test_vectors:
        W_test = exp.model.covar_module(exp.test_x[:min(max_num_test_vectors, exp.test_x.shape[0]), :])[0]
    else:
        W_test = exp.model.covar_module(exp.test_x)[0]

    if options.method == configs.MethodName.KISSGP:

        t1 = utils.tic()
        W_train, K, __ = exp.model.covar_module(exp.train_x, is_kmm=True)

        A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype)
        probes = A.kmm_matmat(W_train.T.todense()).T
        v = np.mean(probes, axis=1)
        Q, T = bsla(A, v, k=nrank)
        T_diag = T.diagonal()
        T_subdiag = T.diagonal(1)
        L_diag, L_subdiag = chol_trid(T_diag, T_subdiag)
        R = A.kmm_matmat(A.WT * Q)
        Rprime = chol_trid_solve(L_diag, L_subdiag, R.T).T

        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    elif options.method == configs.MethodName.GSGP:

        t1 = utils.tic()
        K = exp.model.covar_module._inducing_forward()
        probes = fm.Kron(*K).getArray().real if len(K) > 1 else K[0].getArray().real
        v = np.mean(probes, axis=1)

        A_hat = GsGpLinearOperator(exp.model.WT_times_W, K, sigma, dtype=exp.model.WT_times_Y.dtype)
        Q, T = bfsla1(A_hat, v, k=nrank)
        T_diag = T.diagonal()
        T_subdiag = T.diagonal(1)
        L_diag, L_subdiag = chol_trid(T_diag, T_subdiag)
        R = A_hat.kmm_matmat(A_hat.WTW * Q)
        Rprime = chol_trid_solve(L_diag, L_subdiag, R.T).T
        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", )

    else:
        raise NotImplementedError

    # Computing l2norm
    cov_ref_path = os.environ['PRJ'] + '/data/refs/' + options.data_type.name.lower() + '_ski_dump.pkl'
    assert os.path.exists(cov_ref_path), cov_ref_path + "doesn't exists. Follow readme to generate refs."

    COV_REF = pickle.load(open(cov_ref_path, "rb"))[-1]
    predicted_cov = np.dot(W_test * R, (W_test * Rprime).T)
    l2_norm = np.linalg.norm(predicted_cov - COV_REF)
    print("L2norm: ", l2_norm)

    return inference_time, l2_norm
Пример #5
0
def run_llk_experiment(exp, options):

    sigma = exp.model.noise_covar.noise
    ntrials = options.ntrials
    nrank = options.maxiter

    if type(exp) == KissGpExp:

        t1 = utils.tic()
        W_train, K, __ = exp.model.covar_module(exp.train_x, is_kmm=True)
        A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype)
        if options.variant == 1:
            estimate_logdet = logdet_estimate_using_lz_variants(
                A,
                trials=ntrials,
                rank=nrank,
                verbose=True,
            )[0]
        elif options.variant == 2:
            estimate_logdet = logdet_estimate_using_cg_variants(
                A, trials=ntrials, tol=options.tol, rank=nrank,
                verbose=True)[0]
        else:
            raise NotImplementedError

        print("Estimated log-det: ", estimate_logdet)
        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    elif type(exp) == GsGpExp:

        t1 = utils.tic()
        K = exp.model.covar_module._inducing_forward()
        A_hat = GsGpLinearOperator(exp.model.WT_times_W,
                                   K,
                                   sigma,
                                   dtype=exp.model.WT_times_Y.dtype)

        if options.variant == 1:
            estimate_logdet = logdet_estimate_using_lz_variants(
                A_hat,
                WT=exp.WT,
                trials=ntrials,
                rank=nrank,
                verbose=True,
                dump=options.dump,
                dump_path=options.log_dir)[0]
        elif options.variant == 2:
            estimate_logdet = logdet_estimate_using_cg_variants(
                A_hat,
                WT=exp.WT,
                trials=ntrials,
                tol=options.tol,
                rank=nrank,
                verbose=True)[0]
        else:
            raise NotImplementedError

        print("Estimated log-det: ", estimate_logdet)
        t1f = utils.toc(t1)
        inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    else:
        raise NotImplementedError

    return inference_time, estimate_logdet
Пример #6
0
def pre_process(method, grid_idx, entire_us=False):

    zmax = DEFAULT_Z_MAX
    grid = configs.get_radar_grid(idx=grid_idx)
    num_grid_points = np.product([item[-1] for item in grid])

    # Read all of the radars into file
    if entire_us:
        data_dirpath = RADAR_DATASET_PATH + "/entire_us"
    else:
        data_dirpath = RADAR_DATASET_PATH + "/ne"
    files = os.listdir(data_dirpath)

    # Creating directory for processed files
    output_data_path = data_dirpath + "_processed/" + method.name.lower(
    ) + "_grid_" + str(grid_idx)
    os.makedirs(output_data_path, exist_ok=True)

    if method == utils.MethodName.GSGP:

        t0 = utils.tic()
        WTW_train, WTy_train, yty_train, n_train, total_nnz = 0, 0, 0, 0, 0
        W_test, y_test, n_test = [], [], 0

        print("\n\nProcessing data ...\n\n")

        print("Reading data ...\n\n")

        for scan in files:
            filename = '%s' % (scan)
            print('Reading %s' % (filename))
            try:
                print("File name path: ",
                      data_dirpath + "/" + filename.split("/")[-1])
                radar = pyart.io.read_nexrad_archive(data_dirpath + "/" +
                                                     filename.split("/")[-1])

            except IOError:
                pass

            print('Processing %s' % (radar.metadata['instrument_name']))

            X, y = get_data_poionts(radar, zmax=zmax)

            perm = np.random.permutation(len(X))
            X = X[perm]
            y = y[perm]

            ntrain = int(TRAIN_FRAC * len(X))
            W_train = get_basis(X[:ntrain], grid)
            y_train = y[:ntrain]
            WT_train = W_train.T.tocsr()
            total_nnz += len(W_train.nonzero()[0])

            WTW_train += WT_train * W_train
            WTy_train += WT_train * y_train
            yty_train += y_train.T @ y_train
            n_train += ntrain

            W_test += get_basis(X[ntrain:], grid),
            y_test += y[ntrain:],
            n_test += len(X) - ntrain

        t0f = utils.toc(t0)
        pre_time = utils.toc_report(t0f, tag="DataGP", return_val=True)

        m_logm = num_grid_points * np.log2(num_grid_points)
        print("NumPoints:", n_train)
        print("NumTestPoints:", n_test)
        print("Expected speed up over SKI: ", (2 * total_nnz + m_logm) /
              (len(WTW_train.nonzero()[0]) + m_logm))

        W_test = scipy.sparse.vstack(W_test)
        y_test = np.hstack(y_test)
        scipy.sparse.save_npz(output_data_path + '/WTW_train.npz', WTW_train)
        np.savez(output_data_path + '/WTy_train.npz', WTy_train=WTy_train)
        scipy.sparse.save_npz(output_data_path + '/W_test.npz', W_test)
        np.savez(output_data_path + '/y_test.npz', y_test=y_test)
        pickle.dump((yty_train, n_train, n_test),
                    open(output_data_path + "/norms.pkl", "wb"))

        # Report results in a yaml file
        results = {
            'n_train': n_train,
            'n_test': n_test,
            'method': method.value,
            'pre_time': float(pre_time),
            'grid_size': num_grid_points
        }
        with open(output_data_path + "/stats.yaml", 'w') as outfile:
            yaml.dump(results, outfile, default_flow_style=False)

    elif method == utils.MethodName.KISSGP:

        print("Reading data ...\n\n")
        radars = []
        for scan in files:
            filename = '%s' % (scan)
            print('Reading %s' % (filename))
            try:
                print("File name path: ",
                      data_dirpath + "/" + filename.split("/")[-1])
                radars.append(
                    pyart.io.read_nexrad_archive(data_dirpath + "/" +
                                                 filename.split("/")[-1]))
            except IOError:
                pass

        t0 = utils.tic()
        W_train, y_train, n_train, n_test = [], [], 0, 0
        W_test, y_test = [], []

        print("\n\nProcessing data ...\n\n")

        for radar in radars:

            print('Processing %s' % (radar.metadata['instrument_name']))
            X, y = get_data_poionts(radar, zmax=zmax)
            perm = np.random.permutation(len(X))
            X = X[perm]
            y = y[perm]

            ntrain = int(TRAIN_FRAC * len(X))

            W_train += get_basis(X[:ntrain], grid),
            y_train += y[:ntrain],
            n_train += ntrain

            W_test += get_basis(X[ntrain:], grid),
            y_test += y[ntrain:],
            n_test += len(X) - ntrain

        W_train = scipy.sparse.vstack(W_train)
        W_test = scipy.sparse.vstack(W_test)
        y_train = np.hstack(y_train)
        y_test = np.hstack(y_test)
        t0f = utils.toc(t0)
        pre_time = utils.toc_report(t0f, tag="DataGP", return_val=True)
        print("NumPoints:", n_train)
        print("NumTestPoints:", n_test)

        scipy.sparse.save_npz(output_data_path + '/W_train.npz', W_train)
        scipy.sparse.save_npz(output_data_path + '/W_test.npz', W_test)
        np.savez(output_data_path + '/y_train.npz', y_train=y_train)
        np.savez(output_data_path + '/y_test.npz', y_test=y_test)

        # Report results in a yaml file
        results = {
            'n_train': n_train,
            'n_test': n_test,
            'method': method.value,
            'pre_time': float(pre_time),
            'grid_size': num_grid_points
        }
        with open(output_data_path + "/stats.yaml", 'w') as outfile:
            yaml.dump(results, outfile, default_flow_style=False)

    else:
        raise NotImplementedError

    print("Pre-processing time: ", pre_time)

    return
Пример #7
0
def main(options=None):

    # Handling experiment configuration
    logging.info('Running with args %s', str(sys.argv[1:]))
    wandb.init(project="skigp")
    options = utils.get_options() if options is None else options
    wandb.config.update(options)

    # Handling log directory
    sweep_name = os.environ.get(wandb.env.SWEEP_ID, 'solo')
    output_dir = options.log_dir + '/' + sweep_name
    grid_size = -1

    if options.grid_size_f != utils.GridSizeFunc.NOT_SUPPLIED:
        options.log_dir = output_dir + "/rid_" + str(options.seed) \
            + "_method_" + str(options.method.value) + "_ns_" + str(options.num_samples) + "_gsf_" \
            + str(options.grid_size_f.value)
    elif options.data_type == utils.DatasetType.PRECIPITATION:
        grid_size = np.prod(configs.get_precip_grid(options.grid_idx))
        options.log_dir = output_dir + "/rid_" + str(options.seed) \
            + "_method_" + str(options.method.value) + "_ns_" + str(options.num_samples) + "_gs_" \
            + str(grid_size)
    else:
        options.log_dir = output_dir + "/rid_" + str(options.seed) \
                           + "_method_" + str(options.method.value) + "_gs_" + str(options.grid_size)

    logging.info("Logging directory: " + options.log_dir)
    os.makedirs(options.log_dir, exist_ok=True)

    # Setting up experiment
    experiment, data_time = set_up_experiment(options)
    if grid_size < 0:
        grid_size = options.grid_size

    # Performing inference
    dump = []
    t1 = utils.tic()

    if options.data_type == configs.DatasetType.SINE:
        error = experiment.compute_mae(maxiter=options.maxiter,
                                       verbose=True,
                                       dump=dump)
        t1f = utils.toc(t1)

    elif options.data_type == configs.DatasetType.SOUND:
        error = experiment.compute_smae(maxiter=options.maxiter,
                                        verbose=True,
                                        dump=dump)
        t1f = utils.toc(t1)

    elif options.data_type == configs.DatasetType.PRECIPITATION:
        predict_y = experiment.model.predict(experiment.test_x,
                                             verbose=True,
                                             tol=options.tol,
                                             maxiter=800,
                                             dump=dump)
        t1f = utils.toc(t1)
        error = np.mean(
            np.abs(predict_y[0].squeeze() -
                   experiment.test_y.squeeze()))  # computing mAE
    else:
        raise NotImplementedError

    iter_count = dump[0]
    inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True)

    # Report results in a yaml file
    results = {
        'data_type': options.data_type.name.lower(),
        'seed': options.seed,
        'method': options.method.value,
        'num_samples': options.num_samples,
        'inf_time': float(inference_time),
        'pre_time': float(data_time),
        "error": float(error),
        "num_iters": iter_count
    }

    if options.grid_size_f != utils.GridSizeFunc.NOT_SUPPLIED:
        results.update({'gsf': options.grid_size_f.value})
    else:
        results.update({'grid_size': int(grid_size)})

    with open(options.log_dir + "/results.yaml", 'w') as outfile:
        yaml.dump(results, outfile, default_flow_style=False)

    logging.info("Done with experimentation!")
Пример #8
0
def set_up_experiment(options):

    # Setup random seed
    random.seed(options.seed)
    np.random.seed(options.seed)

    # Setting up the experiment skeleton
    if options.data_type == configs.DatasetType.PRECIPITATION:
        grid_size = configs.get_precip_grid(idx=options.grid_idx)
        config = configs.Structdict()
        config['data_type'] = utils.DatasetType.PRECIPITATION
        config['num_dims'] = 3
        config['grid_size'] = copy.copy(grid_size)
        data_reader = DataLoader(config=options)
    else:
        data_reader = DataLoader(config=options)

    if options.method == configs.MethodName.KISSGP:
        experiment = KissGpExp(config=options, data_loader=data_reader)

    elif options.method == configs.MethodName.GSGP:
        experiment = GsGpExp(config=options, data_loader=data_reader)

    else:
        raise NotImplementedError

    # Supply test scenario -- required only for per iteration results
    experiment.data_loader.config["one_dim_num_points"] = options.num_samples
    if options.sigma > 0:
        experiment.data_loader.config['one_dim_noise_level'] = options.sigma

    # Setting grid size
    num_points = options.num_samples
    if options.grid_idx < 0 and experiment.config['grid_size'] < 0:
        grid_size = configs.get_grid_size(
            num_points=num_points,
            grid_size_f=options.grid_size_f,
            data_type=experiment.config.data_type)
        experiment.config['grid_size'] = grid_size

    # Sampling or reading data
    experiment.sample_data(
    )  # this is to cut time used for synthetic data creation

    # Processing data
    t0 = utils.tic()
    experiment.load_data()
    t0f = utils.toc(t0)
    data_time = utils.toc_report(t0f, tag="DataGP", return_val=True)

    # Build experiment
    experiment.build()

    # Dealing with hyper-parameters
    hypers = configs.get_hypers(data_type=options.data_type, options=options)
    hypers = {
        'noise_covar.noise':
        hypers['noise'],
        'covar_module.base_kernel.outputscale':
        hypers['outputscale'],
        'covar_module.base_kernel.base_kernel.lengthscale':
        hypers['lengthscale'] if type(hypers['lengthscale']) != list else
        np.array(hypers['lengthscale'])
    }
    experiment.init_params(hypers=hypers)
    experiment.print_model_params(raw=False)

    return experiment, data_time