def run_inference(options, data_path): exp, W_test, y_test = set_up_radar_experiment(options, data_path) t0 = utils.tic() mu_grid = exp.model.predict(X=None, grid=True, verbose=True, tol=1e-2)[0] t0f = utils.toc(t0) inference_time = utils.toc_report(t0f, tag="InfGP", return_val=True) y_predict = (W_test * mu_grid).squeeze() mae_mp = compute_mae(np.ones_like(y_test) * np.mean(y_test), y_test) print("Mae: ", compute_mae(y_predict, y_test)) print("Smae: ", compute_mae(y_predict, y_test) / mae_mp) print("Mse: ", np.mean((y_predict - y_test)**2)) print("Rmse: ", np.sqrt(np.mean((y_predict - y_test)**2))) return inference_time
def run_logdet_computation(options, data_path, W_path=None): ntrials = options.ntrials nrank = options.maxiter exp, __, __ = set_up_radar_experiment(options, data_path, W_path=W_path) sigma = exp.model.noise_covar.noise if options.method == utils.MethodName.GSGP: t1 = utils.tic() K = exp.model.covar_module._inducing_forward() A_hat = GsGpLinearOperator(exp.model.WT_times_W, K, sigma, dtype=exp.model.WT_times_Y.dtype) if options.variant == 1: estimate_logdet = logdet_estimate_using_lz_variants( A_hat, WT=exp.WT, trials=ntrials, rank=nrank, verbose=True, dump=options.dump, dump_path=options.log_dir)[0] elif options.variant == 2: estimate_logdet = logdet_estimate_using_cg_variants( A_hat, WT=exp.WT, trials=ntrials, tol=options.tol, rank=nrank, verbose=True)[0] else: raise NotImplementedError print("Estimated log-det: ", estimate_logdet) t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) elif options.method == utils.MethodName.KISSGP: t1 = utils.tic() W_train = exp.model.W K = exp.model.covar_module._inducing_forward() A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype) if options.variant == 1: estimate_logdet = logdet_estimate_using_lz_variants( A, trials=ntrials, rank=nrank, verbose=True, )[0] elif options.variant == 2: estimate_logdet = logdet_estimate_using_cg_variants( A, trials=ntrials, tol=options.tol, rank=nrank, verbose=True)[0] else: raise NotImplementedError print("Estimated log-det: ", estimate_logdet) t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) else: raise NotImplementedError return inference_time
def run_ski_cov_inf_experiment(exp, options): sigma = exp.model.noise_covar.noise if options.method == configs.MethodName.KISSGP: W_test = exp.model.covar_module(exp.test_x)[0] t1 = utils.tic() W_train, K, __ = exp.model.covar_module(exp.train_x, is_kmm=True) A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype) probes = W_train * A.kmm_matmat( W_test.T.todense()) # Computing over test data points cov = bcg(A, probes, tol=options.tol, maxiter=options.maxiter, verbose=True) covf = np.dot(probes.T, cov) t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) elif options.method == configs.MethodName.GSGP: W_test = exp.model.covar_module(exp.test_x)[0] t1 = utils.tic() K = exp.model.covar_module._inducing_forward() A_hat = GsGpLinearOperator(exp.model.WT_times_W, K, sigma, dtype=exp.model.WT_times_Y.dtype) r0_hat = A_hat.kmm_matmat(W_test.T.todense()) x_diff = bfcg(A_hat, r0_hat, yty=np.linalg.norm(r0_hat, axis=0)**2, maxiter=options.maxiter, verbose=True, tol=options.tol) covf = np.dot(r0_hat.T, x_diff) t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) else: raise NotImplementedError if options.store_ref: os.makedirs(options.log_dir, exist_ok=True) pickle.dump( covf, open( options.log_dir + "/" + options.data_type.name.lower() + "_ski_dump.pkl", "wb")) return 0.0, 0.0 # Computing l2norm cov_ref_path = os.environ[ 'PRJ'] + '/data/refs/' + options.data_type.name.lower( ) + '_ski_dump.pkl' assert os.path.exists( cov_ref_path ), cov_ref_path + "doesn't exists. Follow readme to generate refs." COV_REF = pickle.load(open(cov_ref_path, "rb")) l2_norm = np.linalg.norm(covf - COV_REF) print("L2norm: ", l2_norm) return inference_time, l2_norm
def run_approx_cov_inf_experiment(exp, options): if exp.config.data_type not in [configs.DatasetType.SOUND, configs.DatasetType.PRECIPITATION]: nrank = min(configs.DEFAULT_LANCZOS_RANK, options.maxiter) print("\n\n\nLanczos rank is reduced to ", nrank, " from ", options.maxiter) print("\n\n") else: nrank = options.maxiter sigma = exp.model.noise_covar.noise if exp.config.data_type == configs.DatasetType.SOUND: max_num_test_vectors = 1000 else: max_num_test_vectors = 50 if exp.test_x.shape[0] > max_num_test_vectors: W_test = exp.model.covar_module(exp.test_x[:min(max_num_test_vectors, exp.test_x.shape[0]), :])[0] else: W_test = exp.model.covar_module(exp.test_x)[0] if options.method == configs.MethodName.KISSGP: t1 = utils.tic() W_train, K, __ = exp.model.covar_module(exp.train_x, is_kmm=True) A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype) probes = A.kmm_matmat(W_train.T.todense()).T v = np.mean(probes, axis=1) Q, T = bsla(A, v, k=nrank) T_diag = T.diagonal() T_subdiag = T.diagonal(1) L_diag, L_subdiag = chol_trid(T_diag, T_subdiag) R = A.kmm_matmat(A.WT * Q) Rprime = chol_trid_solve(L_diag, L_subdiag, R.T).T t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) elif options.method == configs.MethodName.GSGP: t1 = utils.tic() K = exp.model.covar_module._inducing_forward() probes = fm.Kron(*K).getArray().real if len(K) > 1 else K[0].getArray().real v = np.mean(probes, axis=1) A_hat = GsGpLinearOperator(exp.model.WT_times_W, K, sigma, dtype=exp.model.WT_times_Y.dtype) Q, T = bfsla1(A_hat, v, k=nrank) T_diag = T.diagonal() T_subdiag = T.diagonal(1) L_diag, L_subdiag = chol_trid(T_diag, T_subdiag) R = A_hat.kmm_matmat(A_hat.WTW * Q) Rprime = chol_trid_solve(L_diag, L_subdiag, R.T).T t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", ) else: raise NotImplementedError # Computing l2norm cov_ref_path = os.environ['PRJ'] + '/data/refs/' + options.data_type.name.lower() + '_ski_dump.pkl' assert os.path.exists(cov_ref_path), cov_ref_path + "doesn't exists. Follow readme to generate refs." COV_REF = pickle.load(open(cov_ref_path, "rb"))[-1] predicted_cov = np.dot(W_test * R, (W_test * Rprime).T) l2_norm = np.linalg.norm(predicted_cov - COV_REF) print("L2norm: ", l2_norm) return inference_time, l2_norm
def run_llk_experiment(exp, options): sigma = exp.model.noise_covar.noise ntrials = options.ntrials nrank = options.maxiter if type(exp) == KissGpExp: t1 = utils.tic() W_train, K, __ = exp.model.covar_module(exp.train_x, is_kmm=True) A = KissGpLinearOperator(W_train, K, sigma, dtype=W_train.dtype) if options.variant == 1: estimate_logdet = logdet_estimate_using_lz_variants( A, trials=ntrials, rank=nrank, verbose=True, )[0] elif options.variant == 2: estimate_logdet = logdet_estimate_using_cg_variants( A, trials=ntrials, tol=options.tol, rank=nrank, verbose=True)[0] else: raise NotImplementedError print("Estimated log-det: ", estimate_logdet) t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) elif type(exp) == GsGpExp: t1 = utils.tic() K = exp.model.covar_module._inducing_forward() A_hat = GsGpLinearOperator(exp.model.WT_times_W, K, sigma, dtype=exp.model.WT_times_Y.dtype) if options.variant == 1: estimate_logdet = logdet_estimate_using_lz_variants( A_hat, WT=exp.WT, trials=ntrials, rank=nrank, verbose=True, dump=options.dump, dump_path=options.log_dir)[0] elif options.variant == 2: estimate_logdet = logdet_estimate_using_cg_variants( A_hat, WT=exp.WT, trials=ntrials, tol=options.tol, rank=nrank, verbose=True)[0] else: raise NotImplementedError print("Estimated log-det: ", estimate_logdet) t1f = utils.toc(t1) inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) else: raise NotImplementedError return inference_time, estimate_logdet
def pre_process(method, grid_idx, entire_us=False): zmax = DEFAULT_Z_MAX grid = configs.get_radar_grid(idx=grid_idx) num_grid_points = np.product([item[-1] for item in grid]) # Read all of the radars into file if entire_us: data_dirpath = RADAR_DATASET_PATH + "/entire_us" else: data_dirpath = RADAR_DATASET_PATH + "/ne" files = os.listdir(data_dirpath) # Creating directory for processed files output_data_path = data_dirpath + "_processed/" + method.name.lower( ) + "_grid_" + str(grid_idx) os.makedirs(output_data_path, exist_ok=True) if method == utils.MethodName.GSGP: t0 = utils.tic() WTW_train, WTy_train, yty_train, n_train, total_nnz = 0, 0, 0, 0, 0 W_test, y_test, n_test = [], [], 0 print("\n\nProcessing data ...\n\n") print("Reading data ...\n\n") for scan in files: filename = '%s' % (scan) print('Reading %s' % (filename)) try: print("File name path: ", data_dirpath + "/" + filename.split("/")[-1]) radar = pyart.io.read_nexrad_archive(data_dirpath + "/" + filename.split("/")[-1]) except IOError: pass print('Processing %s' % (radar.metadata['instrument_name'])) X, y = get_data_poionts(radar, zmax=zmax) perm = np.random.permutation(len(X)) X = X[perm] y = y[perm] ntrain = int(TRAIN_FRAC * len(X)) W_train = get_basis(X[:ntrain], grid) y_train = y[:ntrain] WT_train = W_train.T.tocsr() total_nnz += len(W_train.nonzero()[0]) WTW_train += WT_train * W_train WTy_train += WT_train * y_train yty_train += y_train.T @ y_train n_train += ntrain W_test += get_basis(X[ntrain:], grid), y_test += y[ntrain:], n_test += len(X) - ntrain t0f = utils.toc(t0) pre_time = utils.toc_report(t0f, tag="DataGP", return_val=True) m_logm = num_grid_points * np.log2(num_grid_points) print("NumPoints:", n_train) print("NumTestPoints:", n_test) print("Expected speed up over SKI: ", (2 * total_nnz + m_logm) / (len(WTW_train.nonzero()[0]) + m_logm)) W_test = scipy.sparse.vstack(W_test) y_test = np.hstack(y_test) scipy.sparse.save_npz(output_data_path + '/WTW_train.npz', WTW_train) np.savez(output_data_path + '/WTy_train.npz', WTy_train=WTy_train) scipy.sparse.save_npz(output_data_path + '/W_test.npz', W_test) np.savez(output_data_path + '/y_test.npz', y_test=y_test) pickle.dump((yty_train, n_train, n_test), open(output_data_path + "/norms.pkl", "wb")) # Report results in a yaml file results = { 'n_train': n_train, 'n_test': n_test, 'method': method.value, 'pre_time': float(pre_time), 'grid_size': num_grid_points } with open(output_data_path + "/stats.yaml", 'w') as outfile: yaml.dump(results, outfile, default_flow_style=False) elif method == utils.MethodName.KISSGP: print("Reading data ...\n\n") radars = [] for scan in files: filename = '%s' % (scan) print('Reading %s' % (filename)) try: print("File name path: ", data_dirpath + "/" + filename.split("/")[-1]) radars.append( pyart.io.read_nexrad_archive(data_dirpath + "/" + filename.split("/")[-1])) except IOError: pass t0 = utils.tic() W_train, y_train, n_train, n_test = [], [], 0, 0 W_test, y_test = [], [] print("\n\nProcessing data ...\n\n") for radar in radars: print('Processing %s' % (radar.metadata['instrument_name'])) X, y = get_data_poionts(radar, zmax=zmax) perm = np.random.permutation(len(X)) X = X[perm] y = y[perm] ntrain = int(TRAIN_FRAC * len(X)) W_train += get_basis(X[:ntrain], grid), y_train += y[:ntrain], n_train += ntrain W_test += get_basis(X[ntrain:], grid), y_test += y[ntrain:], n_test += len(X) - ntrain W_train = scipy.sparse.vstack(W_train) W_test = scipy.sparse.vstack(W_test) y_train = np.hstack(y_train) y_test = np.hstack(y_test) t0f = utils.toc(t0) pre_time = utils.toc_report(t0f, tag="DataGP", return_val=True) print("NumPoints:", n_train) print("NumTestPoints:", n_test) scipy.sparse.save_npz(output_data_path + '/W_train.npz', W_train) scipy.sparse.save_npz(output_data_path + '/W_test.npz', W_test) np.savez(output_data_path + '/y_train.npz', y_train=y_train) np.savez(output_data_path + '/y_test.npz', y_test=y_test) # Report results in a yaml file results = { 'n_train': n_train, 'n_test': n_test, 'method': method.value, 'pre_time': float(pre_time), 'grid_size': num_grid_points } with open(output_data_path + "/stats.yaml", 'w') as outfile: yaml.dump(results, outfile, default_flow_style=False) else: raise NotImplementedError print("Pre-processing time: ", pre_time) return
def main(options=None): # Handling experiment configuration logging.info('Running with args %s', str(sys.argv[1:])) wandb.init(project="skigp") options = utils.get_options() if options is None else options wandb.config.update(options) # Handling log directory sweep_name = os.environ.get(wandb.env.SWEEP_ID, 'solo') output_dir = options.log_dir + '/' + sweep_name grid_size = -1 if options.grid_size_f != utils.GridSizeFunc.NOT_SUPPLIED: options.log_dir = output_dir + "/rid_" + str(options.seed) \ + "_method_" + str(options.method.value) + "_ns_" + str(options.num_samples) + "_gsf_" \ + str(options.grid_size_f.value) elif options.data_type == utils.DatasetType.PRECIPITATION: grid_size = np.prod(configs.get_precip_grid(options.grid_idx)) options.log_dir = output_dir + "/rid_" + str(options.seed) \ + "_method_" + str(options.method.value) + "_ns_" + str(options.num_samples) + "_gs_" \ + str(grid_size) else: options.log_dir = output_dir + "/rid_" + str(options.seed) \ + "_method_" + str(options.method.value) + "_gs_" + str(options.grid_size) logging.info("Logging directory: " + options.log_dir) os.makedirs(options.log_dir, exist_ok=True) # Setting up experiment experiment, data_time = set_up_experiment(options) if grid_size < 0: grid_size = options.grid_size # Performing inference dump = [] t1 = utils.tic() if options.data_type == configs.DatasetType.SINE: error = experiment.compute_mae(maxiter=options.maxiter, verbose=True, dump=dump) t1f = utils.toc(t1) elif options.data_type == configs.DatasetType.SOUND: error = experiment.compute_smae(maxiter=options.maxiter, verbose=True, dump=dump) t1f = utils.toc(t1) elif options.data_type == configs.DatasetType.PRECIPITATION: predict_y = experiment.model.predict(experiment.test_x, verbose=True, tol=options.tol, maxiter=800, dump=dump) t1f = utils.toc(t1) error = np.mean( np.abs(predict_y[0].squeeze() - experiment.test_y.squeeze())) # computing mAE else: raise NotImplementedError iter_count = dump[0] inference_time = utils.toc_report(t1f, tag="InfGP", return_val=True) # Report results in a yaml file results = { 'data_type': options.data_type.name.lower(), 'seed': options.seed, 'method': options.method.value, 'num_samples': options.num_samples, 'inf_time': float(inference_time), 'pre_time': float(data_time), "error": float(error), "num_iters": iter_count } if options.grid_size_f != utils.GridSizeFunc.NOT_SUPPLIED: results.update({'gsf': options.grid_size_f.value}) else: results.update({'grid_size': int(grid_size)}) with open(options.log_dir + "/results.yaml", 'w') as outfile: yaml.dump(results, outfile, default_flow_style=False) logging.info("Done with experimentation!")
def set_up_experiment(options): # Setup random seed random.seed(options.seed) np.random.seed(options.seed) # Setting up the experiment skeleton if options.data_type == configs.DatasetType.PRECIPITATION: grid_size = configs.get_precip_grid(idx=options.grid_idx) config = configs.Structdict() config['data_type'] = utils.DatasetType.PRECIPITATION config['num_dims'] = 3 config['grid_size'] = copy.copy(grid_size) data_reader = DataLoader(config=options) else: data_reader = DataLoader(config=options) if options.method == configs.MethodName.KISSGP: experiment = KissGpExp(config=options, data_loader=data_reader) elif options.method == configs.MethodName.GSGP: experiment = GsGpExp(config=options, data_loader=data_reader) else: raise NotImplementedError # Supply test scenario -- required only for per iteration results experiment.data_loader.config["one_dim_num_points"] = options.num_samples if options.sigma > 0: experiment.data_loader.config['one_dim_noise_level'] = options.sigma # Setting grid size num_points = options.num_samples if options.grid_idx < 0 and experiment.config['grid_size'] < 0: grid_size = configs.get_grid_size( num_points=num_points, grid_size_f=options.grid_size_f, data_type=experiment.config.data_type) experiment.config['grid_size'] = grid_size # Sampling or reading data experiment.sample_data( ) # this is to cut time used for synthetic data creation # Processing data t0 = utils.tic() experiment.load_data() t0f = utils.toc(t0) data_time = utils.toc_report(t0f, tag="DataGP", return_val=True) # Build experiment experiment.build() # Dealing with hyper-parameters hypers = configs.get_hypers(data_type=options.data_type, options=options) hypers = { 'noise_covar.noise': hypers['noise'], 'covar_module.base_kernel.outputscale': hypers['outputscale'], 'covar_module.base_kernel.base_kernel.lengthscale': hypers['lengthscale'] if type(hypers['lengthscale']) != list else np.array(hypers['lengthscale']) } experiment.init_params(hypers=hypers) experiment.print_model_params(raw=False) return experiment, data_time