def test_dating(): test_ratio = 0.1 data, label = dataset.dating_set(norm=True) utils.plot_scatter(data[:, 2], data[:, 1], 10 * label, 15 * label) test_size = int(data.shape[0] * test_ratio) right_count = 0 for i in range(test_size): predict_label = classify(data[i, :], data[test_size:, :], label[test_size:]) right_count += 1 if label[i] == predict_label else 0 print("accuracy: %f" % float(right_count / test_size))
def PCA_analysis(generator, pca, eng, params, numImgs=100): generator.eval() imgs = sample_images(generator, numImgs, params) generator.train() Efficiency = torch.zeros(numImgs) img = torch.squeeze(imgs[:, 0, :]).data.cpu().numpy() img = matlab.double(img.tolist()) wavelength = matlab.double([params.w] * numImgs) desired_angle = matlab.double([params.a] * numImgs) abseffs = eng.Eval_Eff_1D_parallel(img, wavelength, desired_angle) Efficiency = torch.Tensor([abseffs]).data.cpu().numpy().reshape(-1) # img = img[np.where(Efficiency.reshape(-1) > 0), :] # Efficiency = Efficiency[Efficiency > 0] img_2 = pca.transform(img) fig_path = params.output_dir + \ '/figures/scatter/Iter{}.png'.format(params.iter) utils.plot_scatter(img_2, Efficiency, params.iter, fig_path) fig_path = params.output_dir + \ '/figures/histogram/Iter{}.png'.format(params.iter) utils.plot_histogram(Efficiency, params.iter, fig_path) imgs = imgs[:8, :, :].unsqueeze(2).repeat(1, 1, 64, 1) fig_path = params.output_dir + \ '/figures/deviceSamples/Iter{}.png'.format(params.iter) save_image(imgs, fig_path, 2) ''' grads = eng.GradientFromSolver_1D_parallel(img, wavelength, desired_angle) grad_2 = pca.transform(grads) if params.iter % 2 == 0: utils.plot_envolution(params.img_2_prev, params.eff_prev, params.grad_2_prev, img_2, Efficiency, params.iter, params.output_dir) else: utils.plot_arrow(img_2, Efficiency, grad_2, params.iter, params.output_dir) params.img_2_prev = img_2 params.eff_prev = Efficiency params.grad_2_prev = grad_2 ''' return img_2, Efficiency
def measurement_scatter(measurement_list_all_files, scatter_plot_range=[0, 0], plot_name="", index=[1, 3], x_label="full predicates", y_label="predicted predicates"): measurement_list_all_files_full_label = [ float(v[index[0]]) for v in measurement_list_all_files ] measurement_list_all_files_predicted_label = [ float(v[index[1]]) for v in measurement_list_all_files ] plot_scatter(measurement_list_all_files_full_label, measurement_list_all_files_predicted_label, name=plot_name, range=scatter_plot_range, x_label=x_label, y_label=y_label)
def scatter_view(self): data = self.filter_data_rows() array_x = data[self.X].to_numpy() array_y = data[self.Y].to_numpy() fig = utils.plot_scatter(x=array_x, y=array_y, add_unit_line=True, add_R2=True, layout_kwargs=dict(title='', xaxis_title=self.X, yaxis_title=self.Y)) return fig
def scatter_view(self): ''' create a scatter plot using the filtered dataframe (output of self.filter_data_rows) and where x and y axes are dataframe columns set by the user via self.x and self.y :return: a Plotly scatter plot wrapped by Panel package ''' data = self.filter_data_rows() array_x = data[self.X].to_numpy() array_y = data[self.Y].to_numpy() fig = utils.plot_scatter(x=array_x, y=array_y, add_unit_line=True, add_R2=True, layout_kwargs=dict(title='', xaxis_title=self.X, yaxis_title=self.Y)) return pn.pane.Plotly( fig) # pn.pane.Plotly is the Panel wrapper for Plotly figures
def test_gip_sigma_scale_tfd(): from LogPDFs import cross_validate_sigma # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(12345) # Load some data to train/validate/test with data_file = "data/tfd_data_48x48.pkl" dataset = load_tfd(tfd_pkl_name=data_file, which_set="unlabeled", fold="all") Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set="train", fold="all") Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set="test", fold="all") Xva = dataset[0] tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] data_dim = Xtr.shape[1] batch_size = 100 # Symbolic inputs Xd = T.matrix(name="Xd") Xc = T.matrix(name="Xc") Xm = T.matrix(name="Xm") Xt = T.matrix(name="Xt") # Load inferencer and generator from saved parameters gn_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_GN.pkl" in_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) x_dim = IN.shared_layers[0].in_dim z_dim = IN.mu_layers[-1].out_dim # construct a GIPair with the loaded InfNet and GenNet osm_params = {} osm_params["x_type"] = "gaussian" osm_params["xt_transform"] = "sigmoid" osm_params["logvar_bound"] = LOGVAR_BOUND OSM = OneStageModel( rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, p_x_given_z=GN, q_z_given_x=IN, x_dim=x_dim, z_dim=z_dim, params=osm_params ) # # compute variational likelihood bound and its sub-components Xva = row_shuffle(Xva) Xb = Xva[0:5000] # file_name = "A_TFD_POST_KLDS.png" # post_klds = OSM.compute_post_klds(Xb) # post_dim_klds = np.mean(post_klds, axis=0) # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ # file_name) # compute information about free-energy on validation set file_name = "A_TFD_KLD_FREE_ENERGY.png" fe_terms = OSM.compute_fe_terms(Xb, 20) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, x_label="Posterior KLd", y_label="Negative Log-likelihood") # bound_results = OSM.compute_ll_bound(Xva) # ll_bounds = bound_results[0] # post_klds = bound_results[1] # log_likelihoods = bound_results[2] # max_lls = bound_results[3] # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds))) # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds))) # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods))) # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls))) # print("min ll bound: {0:.4f}".format(np.min(ll_bounds))) # print("max posterior KLd: {0:.4f}".format(np.max(post_klds))) # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods))) # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls))) # # compute some information about the approximate posteriors # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva) # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs # post_dim_klds = post_stats[2] # average post KLds for each post dim # post_dim_vars = post_stats[3] # average squared mean for each post dim # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png") # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png") # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png") # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png") # draw many samples from the GIP for i in range(5): tr_idx = npr.randint(low=0, high=tr_samples, size=(100,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xs = [] for row in range(3): Xs.append([]) for col in range(3): sample_lists = OSM.sample_from_chain(Xd_batch[0:10, :], loop_iters=100, sigma_scale=1.0) Xs[row].append(group_chains(sample_lists["data samples"])) Xs, block_im_dim = block_video(Xs, (48, 48), (3, 3)) to_video(Xs, block_im_dim, "A_TFD_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10) # sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \ # sigma_scale=1.0) # Xs = np.vstack(sample_lists["data samples"]) # file_name = "TFD_TEST_{0:d}.png".format(i) # utils.visualize_samples(Xs, file_name, num_rows=15) file_name = "A_TFD_KLD_PRIOR_SAMPLE.png" Xs = OSM.sample_from_prior(20 * 20) utils.visualize_samples(Xs, file_name, num_rows=20) # test Parzen density estimator built from prior samples # Xs = OSM.sample_from_prior(10000) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.09, 0.095, 0.1, 0.105, 0.11], 10) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_TFD_BEST_LLS_1.png") # utils.visualize_samples(Xva[sort_idx], "A_TFD_BAD_FACES_1.png", num_rows=20) return
# BENCHMARK 1 predictions y_pred_1 = np.ones(len(y_test)) * y_train.mean() # calculate results results = utils.get_results(y_test, y_pred_1) # save results with open("results/results_benchmark_1.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred_1, filename="benchmark_1", window_plot=200, fontsize=14, fig_size=(15, 5) ) utils.plot_scatter(y_test, y_pred_1, filename="benchmark_1") # BENCHMARK 2 predictions y_pred_2 = X.loc[:, "prev_sp_offers"][-len(y_test) :] # calculate results results = utils.get_results(y_test, y_pred_2) # save results with open("results/results_benchmark_2.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred_2, filename="benchmark_2", window_plot=200, fontsize=14, fig_size=(15, 5) )
regressor = RandomForestRegressor(n_estimators=60) # create pipeline with regressor and scaler pipeline = Pipeline([("scaler", RobustScaler()), ("regressor", regressor)]) # nested cross validation tscv = TimeSeriesSplit(n_splits=6, max_train_size=365 * 48, test_size=48 * 30) # perform nested cross validation and get results y_test, y_pred = utils.my_cross_val_predict(pipeline, X, y, tscv) # calculate results results = utils.get_results(y_test, y_pred) # save results with open("results/results_random_forest.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred, filename="random_forest", window_plot=200, fontsize=14, fig_size=(15, 5), ) utils.plot_scatter(y_test, y_pred, filename="random_forest")
build_fn=get_ann, epochs=ann_params["epochs"], batch_size=ann_params["batch_size"], validation_split=ann_params["validation_split"], callbacks=EarlyStopping(patience=25), shuffle=False, verbose=2, ) # create pipeline pipeline = Pipeline([("scaler", RobustScaler()), ("regressor", regressor)]) # nested cross validation tscv = TimeSeriesSplit(n_splits=6, max_train_size=365 * 48, test_size=48 * 30) # perform nested cross validation and get results y_test, y_pred = utils.my_cross_val_predict(pipeline, X, y, tscv) # calculate results results = utils.get_results(y_test, y_pred) # save results with open("results/results_ann_callbacks.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred, filename="ann_callbacks", window_plot=200, fontsize=14, fig_size=(15, 5) ) utils.plot_scatter(y_test, y_pred, filename="ann_callbacks")
def pretrain_osm(lam_kld=0.0): # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr = datasets[0][0] Xtr = Xtr.get_value(borrow=False) Xva = datasets[2][0] Xva = Xva.get_value(borrow=False) print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape))) # get and set some basic dataset information Xtr_mean = np.mean(Xtr, axis=0) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 100 batch_reps = 5 # setup some symbolic variables and stuff Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') data_dim = Xtr.shape[1] prior_sigma = 1.0 ########################## # NETWORK CONFIGURATIONS # ########################## gn_params = {} shared_config = [PRIOR_DIM, 1000, 1000] top_config = [shared_config[-1], data_dim] gn_params['shared_config'] = shared_config gn_params['mu_config'] = top_config gn_params['sigma_config'] = top_config gn_params['activation'] = relu_actfun gn_params['init_scale'] = 1.4 gn_params['lam_l2a'] = 0.0 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.0 gn_params['input_noise'] = 0.0 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 1000, 1000] top_config = [shared_config[-1], PRIOR_DIM] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = relu_actfun in_params['init_scale'] = 1.4 in_params['lam_l2a'] = 0.0 in_params['vis_drop'] = 0.0 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.0 in_params['input_noise'] = 0.0 # Initialize the base networks for this OneStageModel IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) # Initialize biases in IN and GN IN.init_biases(0.2) GN.init_biases(0.2) ######################### # INITIALIZE THE GIPAIR # ######################### osm_params = {} osm_params['x_type'] = 'bernoulli' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params) OSM.set_lam_l2w(1e-5) safe_mean = (0.9 * Xtr_mean) + 0.05 safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean)) OSM.set_output_bias(safe_mean_logit) OSM.set_input_bias(-Xtr_mean) ###################### # BASIC VAE TRAINING # ###################### out_file = open(RESULT_PATH+"pt_osm_results.txt", 'wb') # Set initial learning rate and basic SGD hyper parameters obs_costs = np.zeros((batch_size,)) costs = [0. for i in range(10)] learn_rate = 0.0005 for i in range(150000): scale = min(1.0, float(i) / 10000.0) if ((i > 1) and ((i % 20000) == 0)): learn_rate = learn_rate * 0.9 # do a minibatch update of the model, and compute some costs tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do a minibatch update of the model, and compute some costs OSM.set_sgd_params(lr_1=(scale*learn_rate), mom_1=0.5, mom_2=0.98) OSM.set_lam_nll(1.0) OSM.set_lam_kld(lam_kld_1=(1.0 + (scale*(lam_kld-1.0))), lam_kld_2=0.0) result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 1000) == 0): # record and then reset the cost trackers costs = [(v / 1000.0) for v in costs] str_1 = "-- batch {0:d} --".format(i) str_2 = " joint_cost: {0:.4f}".format(costs[0]) str_3 = " nll_cost : {0:.4f}".format(costs[1]) str_4 = " kld_cost : {0:.4f}".format(costs[2]) str_5 = " reg_cost : {0:.4f}".format(costs[3]) costs = [0.0 for v in costs] # print out some diagnostic information joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() if ((i % 2000) == 0): Xva = row_shuffle(Xva) model_samps = OSM.sample_from_prior(500) file_name = RESULT_PATH+"pt_osm_samples_b{0:d}_XG.png".format(i) utils.visualize_samples(model_samps, file_name, num_rows=20) # compute information about free-energy on validation set file_name = RESULT_PATH+"pt_osm_free_energy_b{0:d}.png".format(i) fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20) fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1]) fe_str = " nll_bound : {0:.4f}".format(fe_mean) print(fe_str) out_file.write(fe_str+"\n") utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # compute information about posterior KLds on validation set file_name = RESULT_PATH+"pt_osm_post_klds_b{0:d}.png".format(i) post_klds = OSM.compute_post_klds(Xva[0:2500]) post_dim_klds = np.mean(post_klds, axis=0) utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ file_name) if ((i % 5000) == 0): IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_GN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_IN.pkl") GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_GN.pkl") return
def pretrain_osm(lam_kld=0.0): # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all') Xva = dataset[0] tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 400 batch_reps = 6 carry_frac = 0.25 carry_size = int(batch_size * carry_frac) reset_prob = 0.04 # setup some symbolic variables and stuff Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') data_dim = Xtr.shape[1] prior_sigma = 1.0 Xtr_mean = np.mean(Xtr, axis=0) ########################## # NETWORK CONFIGURATIONS # ########################## gn_params = {} shared_config = [PRIOR_DIM, 1500, 1500] top_config = [shared_config[-1], data_dim] gn_params['shared_config'] = shared_config gn_params['mu_config'] = top_config gn_params['sigma_config'] = top_config gn_params['activation'] = relu_actfun gn_params['init_scale'] = 1.4 gn_params['lam_l2a'] = 0.0 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.0 gn_params['input_noise'] = 0.0 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 1500, 1500] top_config = [shared_config[-1], PRIOR_DIM] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = relu_actfun in_params['init_scale'] = 1.4 in_params['lam_l2a'] = 0.0 in_params['vis_drop'] = 0.0 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.0 in_params['input_noise'] = 0.0 # Initialize the base networks for this OneStageModel IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) # Initialize biases in IN and GN IN.init_biases(0.2) GN.init_biases(0.2) ###################################### # LOAD AND RESTART FROM SAVED PARAMS # ###################################### # gn_fname = RESULT_PATH+"pt_osm_params_b110000_GN.pkl" # in_fname = RESULT_PATH+"pt_osm_params_b110000_IN.pkl" # IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \ # new_params=None) # GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd, \ # new_params=None) # in_params = IN.params # gn_params = GN.params ######################### # INITIALIZE THE GIPAIR # ######################### osm_params = {} osm_params['x_type'] = 'bernoulli' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params) OSM.set_lam_l2w(1e-5) safe_mean = (0.9 * Xtr_mean) + 0.05 safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean)) OSM.set_output_bias(safe_mean_logit) OSM.set_input_bias(-Xtr_mean) ###################### # BASIC VAE TRAINING # ###################### out_file = open(RESULT_PATH + "pt_osm_results.txt", 'wb') # Set initial learning rate and basic SGD hyper parameters obs_costs = np.zeros((batch_size, )) costs = [0. for i in range(10)] learn_rate = 0.002 for i in range(200000): scale = min(1.0, float(i) / 5000.0) if ((i > 1) and ((i % 20000) == 0)): learn_rate = learn_rate * 0.8 if (i < 50000): momentum = 0.5 elif (i < 10000): momentum = 0.7 else: momentum = 0.9 if ((i == 0) or (npr.rand() < reset_prob)): # sample a fully random batch batch_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, )) else: # sample a partially random batch, which retains some portion of # the worst scoring examples from the previous batch fresh_idx = npr.randint(low=0, high=tr_samples, size=(batch_size - carry_size, )) batch_idx = np.concatenate((fresh_idx.ravel(), carry_idx.ravel())) # do a minibatch update of the model, and compute some costs tr_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, )) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do a minibatch update of the model, and compute some costs OSM.set_sgd_params(lr_1=(scale*learn_rate), \ mom_1=(scale*momentum), mom_2=0.98) OSM.set_lam_nll(1.0) OSM.set_lam_kld(lam_kld_1=scale * lam_kld, lam_kld_2=0.0, lam_kld_c=50.0) result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps) batch_costs = result[4] + result[5] obs_costs = collect_obs_costs(batch_costs, batch_reps) carry_idx = batch_idx[np.argsort(-obs_costs)[0:carry_size]] costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 1000) == 0): # record and then reset the cost trackers costs = [(v / 1000.0) for v in costs] str_1 = "-- batch {0:d} --".format(i) str_2 = " joint_cost: {0:.4f}".format(costs[0]) str_3 = " nll_cost : {0:.4f}".format(costs[1]) str_4 = " kld_cost : {0:.4f}".format(costs[2]) str_5 = " reg_cost : {0:.4f}".format(costs[3]) costs = [0.0 for v in costs] # print out some diagnostic information joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() if ((i % 2000) == 0): Xva = row_shuffle(Xva) model_samps = OSM.sample_from_prior(500) file_name = RESULT_PATH + "pt_osm_samples_b{0:d}_XG.png".format(i) utils.visualize_samples(model_samps, file_name, num_rows=20) file_name = RESULT_PATH + "pt_osm_inf_weights_b{0:d}.png".format(i) utils.visualize_samples(OSM.inf_weights.get_value(borrow=False).T, \ file_name, num_rows=30) file_name = RESULT_PATH + "pt_osm_gen_weights_b{0:d}.png".format(i) utils.visualize_samples(OSM.gen_weights.get_value(borrow=False), \ file_name, num_rows=30) # compute information about free-energy on validation set file_name = RESULT_PATH + "pt_osm_free_energy_b{0:d}.png".format(i) fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20) fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1]) fe_str = " nll_bound : {0:.4f}".format(fe_mean) print(fe_str) out_file.write(fe_str + "\n") utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # compute information about posterior KLds on validation set file_name = RESULT_PATH + "pt_osm_post_klds_b{0:d}.png".format(i) post_klds = OSM.compute_post_klds(Xva[0:2500]) post_dim_klds = np.mean(post_klds, axis=0) utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ file_name) if ((i % 5000) == 0): IN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_b{0:d}_GN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_IN.pkl") GN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_GN.pkl") return
def pretrain_osm(lam_kld=0.0): # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr = datasets[0][0] Xtr = Xtr.get_value(borrow=False) Xva = datasets[2][0] Xva = Xva.get_value(borrow=False) print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape))) # get and set some basic dataset information Xtr_mean = np.mean(Xtr, axis=0) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 100 batch_reps = 5 # setup some symbolic variables and stuff Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') data_dim = Xtr.shape[1] prior_sigma = 1.0 ########################## # NETWORK CONFIGURATIONS # ########################## gn_params = {} shared_config = [PRIOR_DIM, 1000, 1000] top_config = [shared_config[-1], data_dim] gn_params['shared_config'] = shared_config gn_params['mu_config'] = top_config gn_params['sigma_config'] = top_config gn_params['activation'] = relu_actfun gn_params['init_scale'] = 1.4 gn_params['lam_l2a'] = 0.0 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.0 gn_params['input_noise'] = 0.0 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 1000, 1000] top_config = [shared_config[-1], PRIOR_DIM] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = relu_actfun in_params['init_scale'] = 1.4 in_params['lam_l2a'] = 0.0 in_params['vis_drop'] = 0.0 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.0 in_params['input_noise'] = 0.0 # Initialize the base networks for this OneStageModel IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) # Initialize biases in IN and GN IN.init_biases(0.2) GN.init_biases(0.2) ######################### # INITIALIZE THE GIPAIR # ######################### osm_params = {} osm_params['x_type'] = 'bernoulli' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params) OSM.set_lam_l2w(1e-5) safe_mean = (0.9 * Xtr_mean) + 0.05 safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean)) OSM.set_output_bias(safe_mean_logit) OSM.set_input_bias(-Xtr_mean) ###################### # BASIC VAE TRAINING # ###################### out_file = open(RESULT_PATH + "pt_osm_results.txt", 'wb') # Set initial learning rate and basic SGD hyper parameters obs_costs = np.zeros((batch_size, )) costs = [0. for i in range(10)] learn_rate = 0.0005 for i in range(150000): scale = min(1.0, float(i) / 10000.0) if ((i > 1) and ((i % 20000) == 0)): learn_rate = learn_rate * 0.9 # do a minibatch update of the model, and compute some costs tr_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, )) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do a minibatch update of the model, and compute some costs OSM.set_sgd_params(lr_1=(scale * learn_rate), mom_1=0.5, mom_2=0.98) OSM.set_lam_nll(1.0) OSM.set_lam_kld(lam_kld_1=(1.0 + (scale * (lam_kld - 1.0))), lam_kld_2=0.0) result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 1000) == 0): # record and then reset the cost trackers costs = [(v / 1000.0) for v in costs] str_1 = "-- batch {0:d} --".format(i) str_2 = " joint_cost: {0:.4f}".format(costs[0]) str_3 = " nll_cost : {0:.4f}".format(costs[1]) str_4 = " kld_cost : {0:.4f}".format(costs[2]) str_5 = " reg_cost : {0:.4f}".format(costs[3]) costs = [0.0 for v in costs] # print out some diagnostic information joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() if ((i % 2000) == 0): Xva = row_shuffle(Xva) model_samps = OSM.sample_from_prior(500) file_name = RESULT_PATH + "pt_osm_samples_b{0:d}_XG.png".format(i) utils.visualize_samples(model_samps, file_name, num_rows=20) # compute information about free-energy on validation set file_name = RESULT_PATH + "pt_osm_free_energy_b{0:d}.png".format(i) fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20) fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1]) fe_str = " nll_bound : {0:.4f}".format(fe_mean) print(fe_str) out_file.write(fe_str + "\n") utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # compute information about posterior KLds on validation set file_name = RESULT_PATH + "pt_osm_post_klds_b{0:d}.png".format(i) post_klds = OSM.compute_post_klds(Xva[0:2500]) post_dim_klds = np.mean(post_klds, axis=0) utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ file_name) if ((i % 5000) == 0): IN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_b{0:d}_GN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_IN.pkl") GN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_GN.pkl") return
# set callbacks callbacks = [ EarlyStopping(patience=50), ModelCheckpoint(filepath="model_checkpoint", save_weights_only=True, save_best_only=True), ] # perform nested cross validation and get results y_test, y_pred = utils.my_cross_val_predict_for_lstm( get_lstm(), scaler, data, tscv, lstm_params, callbacks) # calculate results results = utils.get_results(y_test, y_pred) # save results with open("results/results_lstm_robust_scaler.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred, filename="lstm_robust_scaler", window_plot=200, fontsize=14, fig_size=(15, 5), ) utils.plot_scatter(y_test, y_pred, filename="lstm_robust_scaler")
verbose=True) positions = forceatlas2.forceatlas2_networkx_layout(nx_graph, pos=None, iterations=iters) if (lbl): nx.draw_networkx_labels(nx_graph, positions, labels=dict([(n, n) for n in nx_graph.nodes()])) nx.draw_networkx_nodes(nx_graph, positions, node_size=5, with_labels=False, node_color="blue", alpha=0.4) nx.draw_networkx_edges(nx_graph, positions, edge_color="green", alpha=0.05) plt.axis('off') plt.savefig(f"{fname.split('.')[-2]}_graph.png") if (show): plt.show() plt.clf() if (__name__ == '__main__'): utils.plot_scatter() # for x in range(35, 95, 5): # plot_graph(f"graph/nelson_1_{x}.graph", iters=100, show=False) # for x in range(25, 35): # plot_graph(f"graph/nelson_1_{x}.graph", iters=100, show=False) # for fname in ["pathlengths_undirected_kennetetal", "pathlengths_undirected_step_distance", "pathlengths_undirected_step_distance_pmfg"]: # plot_graph_unweighted(f"graph/{fname}.graph", iters=100, show=False)
def test_gip_sigma_scale_tfd(): from LogPDFs import cross_validate_sigma # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(12345) # Load some data to train/validate/test with data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='test', fold='all') Xva = dataset[0] tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] data_dim = Xtr.shape[1] batch_size = 100 # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') # Load inferencer and generator from saved parameters gn_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_GN.pkl" in_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) x_dim = IN.shared_layers[0].in_dim z_dim = IN.mu_layers[-1].out_dim # construct a GIPair with the loaded InfNet and GenNet osm_params = {} osm_params['x_type'] = 'gaussian' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=x_dim, z_dim=z_dim, params=osm_params) # # compute variational likelihood bound and its sub-components Xva = row_shuffle(Xva) Xb = Xva[0:5000] # file_name = "A_TFD_POST_KLDS.png" # post_klds = OSM.compute_post_klds(Xb) # post_dim_klds = np.mean(post_klds, axis=0) # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ # file_name) # compute information about free-energy on validation set file_name = "A_TFD_KLD_FREE_ENERGY.png" fe_terms = OSM.compute_fe_terms(Xb, 20) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # bound_results = OSM.compute_ll_bound(Xva) # ll_bounds = bound_results[0] # post_klds = bound_results[1] # log_likelihoods = bound_results[2] # max_lls = bound_results[3] # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds))) # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds))) # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods))) # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls))) # print("min ll bound: {0:.4f}".format(np.min(ll_bounds))) # print("max posterior KLd: {0:.4f}".format(np.max(post_klds))) # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods))) # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls))) # # compute some information about the approximate posteriors # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva) # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs # post_dim_klds = post_stats[2] # average post KLds for each post dim # post_dim_vars = post_stats[3] # average squared mean for each post dim # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png") # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png") # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png") # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png") # draw many samples from the GIP for i in range(5): tr_idx = npr.randint(low=0, high=tr_samples, size=(100, )) Xd_batch = Xtr.take(tr_idx, axis=0) Xs = [] for row in range(3): Xs.append([]) for col in range(3): sample_lists = OSM.sample_from_chain(Xd_batch[0:10,:], loop_iters=100, \ sigma_scale=1.0) Xs[row].append(group_chains(sample_lists['data samples'])) Xs, block_im_dim = block_video(Xs, (48, 48), (3, 3)) to_video(Xs, block_im_dim, "A_TFD_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10) #sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \ # sigma_scale=1.0) #Xs = np.vstack(sample_lists["data samples"]) #file_name = "TFD_TEST_{0:d}.png".format(i) #utils.visualize_samples(Xs, file_name, num_rows=15) file_name = "A_TFD_KLD_PRIOR_SAMPLE.png" Xs = OSM.sample_from_prior(20 * 20) utils.visualize_samples(Xs, file_name, num_rows=20) # test Parzen density estimator built from prior samples # Xs = OSM.sample_from_prior(10000) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.09, 0.095, 0.1, 0.105, 0.11], 10) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_TFD_BEST_LLS_1.png") # utils.visualize_samples(Xva[sort_idx], "A_TFD_BAD_FACES_1.png", num_rows=20) return
# create pipeline with regressor and scaler pipeline = Pipeline([("scaler", RobustScaler()), ("regressor", LinearRegression())]) # nested cross validation tscv = TimeSeriesSplit(n_splits=6, max_train_size=365 * 48, test_size=48 * 30) # perform nested cross validation and get results y_test, y_pred = utils.my_cross_val_predict(pipeline, X, y, tscv) # calculate results results = utils.get_results(y_test, y_pred) # save results with open("results/results_polynomial_regression.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred, filename="polynomial_regression", window_plot=200, fontsize=14, fig_size=(15, 5), ) utils.plot_scatter(y_test, y_pred, filename="polynomial_regression")
feed_dict = { x_: np.vstack([train_data, val_data]), y_: np.vstack([train_lab, val_lab]) } top_layer_values = np.squeeze(sess.run([top_layer], feed_dict)) pca = PCA(n_components=2) reduced_top_layer_values = pca.fit_transform(top_layer_values) print 'TSNE' cls = np.argmax(np.vstack([train_lab, val_lab]), axis=1) tsne = TSNE(n_components=2) transfer_values_reduced = tsne.fit_transform( reduced_top_layer_values) tsne_savepath = os.path.join('./logs', str(i), '{}.png'.format(step)) plot_scatter(transfer_values_reduced, cls, 2, savepath=tsne_savepath) writer.writerow([ i, train_acc, train_cost, val_acc, val_cost, val_sens, val_spec, val_cohen, val_b_acc, val_auc, test_acc, test_cost, test_sens, test_spec, test_cohen, test_b_acc, test_auc ]) result.flush() # Training # Get random Batch batch_xs, batch_ys = next_batch(train_data, train_lab, 30) feed_dict = {x_: batch_xs, y_: batch_ys, lr_: 0.01} _, train_cost, train_acc, train_preds = sess.run(
def run_elastic_effect_synthetic(): data_option = sys.argv[1].split('=')[1] method_option = sys.argv[2].split('=')[1] model_option = sys.argv[3].split('=')[1] loss_option = sys.argv[4].split('=')[1] theta_option = sys.argv[5].split('=')[1] set_random_seed(666) dir_path = '/path/to/experiments/dir' model_path = dir_path + 'models/' + data_option + '_' + model_option + '.pt' figure_path = dir_path + 'figures/' + data_option + '.png' result_figure_path = dir_path + 'figures/' + data_option + '_' + model_option + '_' + method_option + '_' + \ theta_option + '.png' config = { 'batch_size': 1, 'epoch_num': 20000, 'lr': 1e-5, 'test_batch_size': 1, 'sample_size': 100, 'simple_train_batch_size': 100, 'simple_test_batch_size': 10, 'prob_lr': 1e-6, 'input_size': 3, 'loss_function': loss_option, 'method_option': method_option, 'model': model_option, 'optimizer': 'SGD' } if data_option == 'two_cosine': config['input_size'] = 2 # config['lr'] = 1e-4 positions, labels, original_positions = two_cosine( config['sample_size']) positions_large, labels_large, original_positions_large = two_cosine( 10000) dataloader = get_data(positions, labels) dataloader_large = get_data(positions_large, labels_large) print('plot data functions') plot_scatter(positions_large, labels_large, figure_path) elif data_option == 'two_cycles': config['input_size'] = 2 # config['lr'] = 1e-4 positions, labels, original_positions = two_cycles( config['sample_size']) positions_large, labels_large, original_positions_large = two_cycles( 10000) dataloader = get_data(positions, labels) dataloader_large = get_data(positions_large, labels_large) print('plot data functions') plot_scatter(positions_large, labels_large, figure_path) elif data_option == 'two_fold_surface': config['input_size'] = 3 # config['lr'] = 1e-6 positions, labels, original_positions = two_fold_surface( config['sample_size']) positions_large, labels_large, original_positions_large = two_fold_surface( 10000) dataloader = get_data(positions, labels) dataloader_large = get_data(positions_large, labels_large) print('plot data functions') plot_scatter(positions_large, labels_large, figure_path, '3D') elif data_option == 'double_helix': config['input_size'] = 3 positions, labels, original_positions = double_helix( config['sample_size']) positions_large, labels_large, original_positions_large = double_helix( 10000) dataloader = get_data(positions, labels) dataloader_large = get_data(positions_large, labels_large) print('plot data functions') plot_scatter(positions_large, labels_large, figure_path, '3D') elif data_option == 'two_sphere': config['input_size'] = 3 if config['model'] == 'MLPProb': config['lr'] = 3e-3 elif config['model'] == 'MLPLinearProb': config['lr'] = 3e-4 positions, labels, original_positions = two_sphere( config['sample_size']) positions_large, labels_large, original_positions_large = two_sphere( 10000) dataloader = get_data(positions, labels) dataloader_large = get_data(positions_large, labels_large) print('plot data functions') plot_scatter(positions_large, labels_large, figure_path, '3D') else: config['input_size'] = 3 positions, labels, original_positions = donut(config['sample_size']) positions_large, labels_large, original_positions_large = donut(10000) dataloader = get_data(positions, labels) dataloader_large = get_data(positions_large, labels_large) print('plot data functions') plot_scatter(positions_large, labels_large, figure_path, '3D') print(config['lr']) print('build model') model, loss_function, optimizer = build_model(config) if theta_option == 'optimal': print('train model') simple_train_batch(dataloader, model, loss_function, optimizer, config) print('save model') torch.save(model.state_dict(), model_path) print('load model') model.load_state_dict(torch.load(model_path)) data_res, data_np = simple_test_batch(dataloader, model, config) print('data accuracy', data_res) index_list = [] for index in range(len(dataloader)): if dataloader[index][1] == 1: index_list.append(index) config['epoch_num'] = 1 total_delta = train_elastic_effect(dataloader, model, loss_function, config, index_list) if config['method_option'] == 'kernel': similarity_matrix = get_similarity_matrix(total_delta, index_list) total_delta = get_kernel_distance(similarity_matrix, index_list) data_res, data_np = simple_test_batch(dataloader, model, config) print('data accuracy', data_res) total_distance = get_distance(original_positions, index_list, option=data_option) delta, distance = get_delta_distance(total_delta, total_distance, index_list) print('average delta', np.mean(np.array(delta))) print('plot result figures') plot_delta_versus_distance(delta, distance, result_figure_path) print('delta', delta) print('distance', distance) print('Pearson Correlation') print( pearsonr(np.array(delta).reshape(-1), np.array(distance).reshape(-1))[0])
def test_with_model_init(): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr_shared = datasets[0][0] Xva_shared = datasets[1][0] Xtr = Xtr_shared.get_value(borrow=False).astype(theano.config.floatX) Xva = Xva_shared.get_value(borrow=False).astype(theano.config.floatX) tr_samples = Xtr.shape[0] batch_size = 200 batch_reps = 1 ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ obs_dim = Xtr.shape[1] z_dim = 20 h_dim = 100 x_type = 'bernoulli' # some InfNet instances to build the TwoStageModel from X_sym = T.matrix('X_sym') ######################## # p_s0_obs_given_z_obs # ######################## params = {} shared_config = [z_dim, 250, 250] top_config = [shared_config[-1], obs_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 1e-3 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_s0_obs_given_z_obs = InfNet(rng=rng, Xd=X_sym, \ params=params, shared_param_dicts=None) p_s0_obs_given_z_obs.init_biases(0.2) ################# # p_hi_given_si # ################# params = {} shared_config = [obs_dim, 250, 250] top_config = [shared_config[-1], h_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_hi_given_si = InfNet(rng=rng, Xd=X_sym, \ params=params, shared_param_dicts=None) p_hi_given_si.init_biases(0.2) ###################### # p_sip1_given_si_hi # ###################### params = {} shared_config = [h_dim, 250, 250] top_config = [shared_config[-1], obs_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_sip1_given_si_hi = InfNet(rng=rng, Xd=X_sym, \ params=params, shared_param_dicts=None) p_sip1_given_si_hi.init_biases(0.2) ############### # q_z_given_x # ############### params = {} shared_config = [obs_dim, 250, 250] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_z_given_x = InfNet(rng=rng, Xd=X_sym, \ params=params, shared_param_dicts=None) q_z_given_x.init_biases(0.2) ################### # q_hi_given_x_si # ################### params = {} shared_config = [(obs_dim + obs_dim), 500, 500] top_config = [shared_config[-1], h_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_hi_given_x_si = InfNet(rng=rng, Xd=X_sym, \ params=params, shared_param_dicts=None) q_hi_given_x_si.init_biases(0.2) ################################################################ # Define parameters for the MultiStageModel, and initialize it # ################################################################ print("Building the MultiStageModel...") msm_params = {} msm_params['x_type'] = x_type msm_params['obs_transform'] = 'sigmoid' MSM = MultiStageModel(rng=rng, x_in=X_sym, \ p_s0_obs_given_z_obs=p_s0_obs_given_z_obs, \ p_hi_given_si=p_hi_given_si, \ p_sip1_given_si_hi=p_sip1_given_si_hi, \ q_z_given_x=q_z_given_x, \ q_hi_given_x_si=q_hi_given_x_si, \ obs_dim=obs_dim, z_dim=z_dim, h_dim=h_dim, \ model_init_obs=True, ir_steps=2, \ params=msm_params) obs_mean = (0.9 * np.mean(Xtr, axis=0)) + 0.05 obs_mean_logit = np.log(obs_mean / (1.0 - obs_mean)) MSM.set_input_bias(-obs_mean) MSM.set_obs_bias(0.1*obs_mean_logit) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ costs = [0. for i in range(10)] learn_rate = 0.0003 momentum = 0.8 for i in range(300000): scale = min(1.0, ((i+1) / 10000.0)) extra_kl = max(0.0, ((50000.0 - i) / 50000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 # randomly sample a minibatch tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xb = binarize_data(Xtr.take(tr_idx, axis=0)) Xb = Xb.astype(theano.config.floatX) # set sgd and objective function hyperparams for this update MSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \ mom_1=(scale*momentum), mom_2=0.98) MSM.set_train_switch(1.0) MSM.set_l1l2_weight(1.0) MSM.set_lam_nll(lam_nll=1.0) MSM.set_lam_kld(lam_kld_1=(1.0+extra_kl), lam_kld_2=(1.0+extra_kl)) MSM.set_lam_l2w(1e-6) MSM.set_kzg_weight(0.01) # perform a minibatch update and record the cost for this batch result = MSM.train_joint(Xb, batch_reps) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 500) == 0): costs = [(v / 500.0) for v in costs] print("-- batch {0:d} --".format(i)) print(" joint_cost: {0:.4f}".format(costs[0])) print(" nll_cost : {0:.4f}".format(costs[1])) print(" kld_cost : {0:.4f}".format(costs[2])) print(" reg_cost : {0:.4f}".format(costs[3])) costs = [0.0 for v in costs] if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))): Xva = row_shuffle(Xva) # draw some independent random samples from the model samp_count = 200 model_samps = MSM.sample_from_prior(samp_count) seq_len = len(model_samps) seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1])) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = model_samps[s2][s1] idx += 1 file_name = "MX_SAMPLES_b{0:d}.png".format(i) utils.visualize_samples(seq_samps, file_name, num_rows=20) # visualize some important weights in the model file_name = "MX_INF_1_WEIGHTS_b{0:d}.png".format(i) W = MSM.inf_1_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MX_INF_2_WEIGHTS_b{0:d}.png".format(i) W = MSM.inf_2_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MX_GEN_1_WEIGHTS_b{0:d}.png".format(i) W = MSM.gen_1_weights.get_value(borrow=False) utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MX_GEN_2_WEIGHTS_b{0:d}.png".format(i) W = MSM.gen_2_weights.get_value(borrow=False) utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MX_GEN_INF_WEIGHTS_b{0:d}.png".format(i) W = MSM.gen_inf_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) # compute information about posterior KLds on validation set post_klds = MSM.compute_post_klds(Xva[0:5000]) file_name = "MX_H0_KLDS_b{0:d}.png".format(i) utils.plot_stem(np.arange(post_klds[0].shape[1]), \ np.mean(post_klds[0], axis=0), file_name) file_name = "MX_HI_COND_KLDS_b{0:d}.png".format(i) utils.plot_stem(np.arange(post_klds[1].shape[1]), \ np.mean(post_klds[1], axis=0), file_name) file_name = "MX_HI_GLOB_KLDS_b{0:d}.png".format(i) utils.plot_stem(np.arange(post_klds[2].shape[1]), \ np.mean(post_klds[2], axis=0), file_name) # compute information about free-energy on validation set file_name = "MX_FREE_ENERGY_b{0:d}.png".format(i) fe_terms = MSM.compute_fe_terms(binarize_data(Xva[0:5000]), 20) fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1]) print(" nll_bound : {0:.4f}".format(fe_mean)) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') return
def test_with_model_init(): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr_shared = datasets[0][0] Xva_shared = datasets[1][0] Xtr = Xtr_shared.get_value(borrow=False).astype(theano.config.floatX) Xva = Xva_shared.get_value(borrow=False).astype(theano.config.floatX) tr_samples = Xtr.shape[0] batch_size = 500 batch_reps = 1 ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ obs_dim = Xtr.shape[1] z_rnn_dim = 25 z_obs_dim = 5 jnt_dim = obs_dim + z_rnn_dim h_dim = 100 x_type = 'bernoulli' prior_sigma = 1.0 # some InfNet instances to build the TwoStageModel from X_sym = T.matrix('X_sym') ######################## # p_s0_obs_given_z_obs # ######################## params = {} shared_config = [z_obs_dim, 250, 250] top_config = [shared_config[-1], obs_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = softplus_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 1e-3 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_s0_obs_given_z_obs = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \ params=params, shared_param_dicts=None) p_s0_obs_given_z_obs.init_biases(0.2) ################# # p_hi_given_si # ################# params = {} shared_config = [jnt_dim, 500, 500] top_config = [shared_config[-1], h_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = softplus_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_hi_given_si = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \ params=params, shared_param_dicts=None) p_hi_given_si.init_biases(0.2) ###################### # p_sip1_given_si_hi # ###################### params = {} shared_config = [(h_dim + z_rnn_dim), 500, 500] top_config = [shared_config[-1], obs_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = softplus_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_sip1_given_si_hi = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \ params=params, shared_param_dicts=None) p_sip1_given_si_hi.init_biases(0.2) ############### # q_z_given_x # ############### params = {} shared_config = [obs_dim, 250, 250] top_config = [shared_config[-1], (z_rnn_dim + z_obs_dim)] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = softplus_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_z_given_x = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \ params=params, shared_param_dicts=None) q_z_given_x.init_biases(0.2) ################### # q_hi_given_x_si # ################### params = {} shared_config = [(obs_dim + jnt_dim), 500, 500] top_config = [shared_config[-1], h_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = softplus_actfun params['init_scale'] = 1.2 params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_hi_given_x_si = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \ params=params, shared_param_dicts=None) q_hi_given_x_si.init_biases(0.2) ################################################################ # Define parameters for the MultiStageModel, and initialize it # ################################################################ print("Building the MultiStageModel...") msm_params = {} msm_params['x_type'] = x_type msm_params['obs_transform'] = 'sigmoid' MSM = MultiStageModel(rng=rng, x_in=X_sym, \ p_s0_obs_given_z_obs=p_s0_obs_given_z_obs, \ p_hi_given_si=p_hi_given_si, \ p_sip1_given_si_hi=p_sip1_given_si_hi, \ q_z_given_x=q_z_given_x, \ q_hi_given_x_si=q_hi_given_x_si, \ obs_dim=obs_dim, z_rnn_dim=z_rnn_dim, z_obs_dim=z_obs_dim, \ h_dim=h_dim, model_init_obs=False, model_init_rnn=True, \ ir_steps=3, params=msm_params) obs_mean = (0.9 * np.mean(Xtr, axis=0)) + 0.05 obs_mean_logit = np.log(obs_mean / (1.0 - obs_mean)) MSM.set_input_bias(-obs_mean) MSM.set_obs_bias(0.1*obs_mean_logit) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ costs = [0. for i in range(10)] learn_rate = 0.003 momentum = 0.5 for i in range(300000): scale = min(1.0, ((i+1) / 5000.0)) l1l2_weight = 1.0 #min(1.0, ((i+1) / 2500.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.92 if (i > 100000): momentum = 0.80 if (i > 50000): momentum = 0.65 else: momentum = 0.50 # randomly sample a minibatch tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xb = binarize_data(Xtr.take(tr_idx, axis=0)) Xb = Xb.astype(theano.config.floatX) # set sgd and objective function hyperparams for this update MSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \ mom_1=(scale*momentum), mom_2=0.99) MSM.set_train_switch(1.0) MSM.set_l1l2_weight(l1l2_weight) MSM.set_lam_nll(lam_nll=1.0) MSM.set_lam_kld(lam_kld_1=1.0, lam_kld_2=1.0) MSM.set_lam_l2w(1e-5) MSM.set_kzg_weight(0.01) # perform a minibatch update and record the cost for this batch result = MSM.train_joint(Xb, batch_reps) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 500) == 0): costs = [(v / 500.0) for v in costs] print("-- batch {0:d} --".format(i)) print(" joint_cost: {0:.4f}".format(costs[0])) print(" nll_cost : {0:.4f}".format(costs[1])) print(" kld_cost : {0:.4f}".format(costs[2])) print(" reg_cost : {0:.4f}".format(costs[3])) costs = [0.0 for v in costs] if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))): Xva = row_shuffle(Xva) # draw some independent random samples from the model samp_count = 200 model_samps = MSM.sample_from_prior(samp_count) seq_len = len(model_samps) seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1])) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = model_samps[s2][s1] idx += 1 file_name = "MZ_SAMPLES_b{0:d}.png".format(i) utils.visualize_samples(seq_samps, file_name, num_rows=20) # visualize some important weights in the model file_name = "MZ_INF_1_WEIGHTS_b{0:d}.png".format(i) W = MSM.inf_1_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MZ_INF_2_WEIGHTS_b{0:d}.png".format(i) W = MSM.inf_2_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MZ_GEN_1_WEIGHTS_b{0:d}.png".format(i) W = MSM.gen_1_weights.get_value(borrow=False) utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MZ_GEN_2_WEIGHTS_b{0:d}.png".format(i) W = MSM.gen_2_weights.get_value(borrow=False) utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "MZ_GEN_INF_WEIGHTS_b{0:d}.png".format(i) W = MSM.gen_inf_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) # compute information about posterior KLds on validation set post_klds = MSM.compute_post_klds(Xva[0:5000]) file_name = "MZ_H0_KLDS_b{0:d}.png".format(i) utils.plot_stem(np.arange(post_klds[0].shape[1]), \ np.mean(post_klds[0], axis=0), file_name) file_name = "MZ_HI_COND_KLDS_b{0:d}.png".format(i) utils.plot_stem(np.arange(post_klds[1].shape[1]), \ np.mean(post_klds[1], axis=0), file_name) file_name = "MZ_HI_GLOB_KLDS_b{0:d}.png".format(i) utils.plot_stem(np.arange(post_klds[2].shape[1]), \ np.mean(post_klds[2], axis=0), file_name) # compute information about free-energy on validation set file_name = "MZ_FREE_ENERGY_b{0:d}.png".format(i) fe_terms = MSM.compute_fe_terms(binarize_data(Xva[0:5000]), 20) fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1]) print(" nll_bound : {0:.4f}".format(fe_mean)) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') return
# create pipeline with regressor and scaler pipeline = Pipeline([("scaler", RobustScaler()), ("regressor", LinearRegression())]) # nested cross validation tscv = TimeSeriesSplit(n_splits=6, max_train_size=365 * 48, test_size=48 * 30) # perform nested cross validation and get results y_test, y_pred = utils.my_cross_val_predict(pipeline, X, y, tscv) # calculate results results = utils.get_results(y_test, y_pred) # save results with open("results/results_linear_regression.json", "w") as f: json.dump(results, f) utils.plot_results( y_test, y_pred, filename="linear_regression", window_plot=200, fontsize=14, fig_size=(15, 5), ) utils.plot_scatter(y_test, y_pred, filename="linear_regression")
def test_gip_sigma_scale_mnist(): from LogPDFs import cross_validate_sigma # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(12345) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr = datasets[0][0] Xtr = Xtr.get_value(borrow=False) Xva = datasets[2][0] Xva = Xva.get_value(borrow=False) print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] batch_size = 100 Xtr_mean = np.mean(Xtr, axis=0, keepdims=True) Xtr_mean = (0.0 * Xtr_mean) + np.mean(Xtr) Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0).astype(theano.config.floatX) # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') # Load inferencer and generator from saved parameters gn_fname = "MNIST_WALKOUT_TEST_MAX_KLD/pt_walk_params_b70000_GN.pkl" in_fname = "MNIST_WALKOUT_TEST_MAX_KLD/pt_walk_params_b70000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) x_dim = IN.shared_layers[0].in_dim z_dim = IN.mu_layers[-1].out_dim # construct a GIPair with the loaded InfNet and GenNet osm_params = {} osm_params['x_type'] = 'gaussian' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=x_dim, z_dim=z_dim, params=osm_params) # compute variational likelihood bound and its sub-components Xva = row_shuffle(Xva) Xb = Xva[0:5000] file_name = "A_MNIST_POST_KLDS.png" post_klds = OSM.compute_post_klds(Xb) post_dim_klds = np.mean(post_klds, axis=0) utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ file_name) # compute information about free-energy on validation set file_name = "A_MNIST_FREE_ENERGY.png" fe_terms = OSM.compute_fe_terms(Xb, 20) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # bound_results = OSM.compute_ll_bound(Xva) # ll_bounds = bound_results[0] # post_klds = bound_results[1] # log_likelihoods = bound_results[2] # max_lls = bound_results[3] # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds))) # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds))) # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods))) # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls))) # print("min ll bound: {0:.4f}".format(np.min(ll_bounds))) # print("max posterior KLd: {0:.4f}".format(np.max(post_klds))) # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods))) # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls))) # # compute some information about the approximate posteriors # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva) # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs # post_dim_klds = post_stats[2] # average post KLds for each post dim # post_dim_vars = post_stats[3] # average squared mean for each post dim # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png") # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png") # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png") # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png") # draw many samples from the GIP for i in range(5): tr_idx = npr.randint(low=0,high=tr_samples,size=(100,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xs = [] for row in range(3): Xs.append([]) for col in range(3): sample_lists = OSM.sample_from_chain(Xd_batch[0:10,:], loop_iters=100, \ sigma_scale=1.0) Xs[row].append(group_chains(sample_lists['data samples'])) Xs, block_im_dim = block_video(Xs, (28,28), (3,3)) to_video(Xs, block_im_dim, "A_MNIST_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10) #sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \ # sigma_scale=1.0) #Xs = np.vstack(sample_lists["data samples"]) #file_name = "TFD_TEST_{0:d}.png".format(i) #utils.visualize_samples(Xs, file_name, num_rows=15) file_name = "A_MNIST_KLD_PRIOR_SAMPLE.png" Xs = OSM.sample_from_prior(20*20) utils.visualize_samples(Xs, file_name, num_rows=20) # # test Parzen density estimator built from prior samples # Xs = OSM.sample_from_prior(10000) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.12, 0.14, 0.15, 0.16, 0.18], 20) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_MNIST_BEST_LLS_1.png") # utils.visualize_samples(Xva[sort_idx], "A_MNIST_BAD_DIGITS_1.png", num_rows=20) # ########## # # AGAIN! # # ########## # Xs = OSM.sample_from_prior(10000) # tr_idx = npr.randint(low=0,high=tr_samples,size=(5000,)) # Xva = Xtr.take(tr_idx, axis=0) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.12, 0.14, 0.15, 0.16, 0.18], 20) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_MNIST_BEST_LLS_2.png") # utils.visualize_samples(Xva[sort_idx], "A_MNIST_BAD_DIGITS_2.png", num_rows=20) return
def make_predictions(x_batches, y_batches, model, epochs_to_evaluate, runtype, save_results=False): all_errors = {} neg_predictions = {} for ep in epochs_to_evaluate: sub_path = model.path + '/' + str(ep) maybe_create_path(path=sub_path) check_point = "checkpoints.ckpt-" + str(ep) x_data, _y_pred, _y_true = model.run_check_point( check_point=check_point, x_batches=x_batches, y_batches=y_batches, scalers=model.scalers[runtype]) # create a separate folder for each target and save its relevent data in that folder for idx, out in enumerate(model.data_config['out_features']): out_path = sub_path + '/' + out + '_' + runtype maybe_create_path(path=out_path) y_pred = _y_pred[:, idx] y_true = _y_true[:, idx] negative_predictions = np.sum(np.array(y_pred) < 0, axis=0) if negative_predictions > 0: print("Warning, {} Negative bacteria predictions found".format( negative_predictions)) neg_predictions[str(ep) + '_' + out] = int(negative_predictions) if negative_predictions > 0: y_true = y_true.copy() else: y_true = np.where(y_true > 0.0, y_true, np.nan) y_true_avail, y_pred_avail = get_pred_where_obs_available( y_true, y_pred) errors = get_errors(y_true_avail, y_pred_avail, model.data_config['monitor']) all_errors[str(ep) + '_' + out] = errors print('shapes of predicted arrays: ', y_pred.shape, y_true.shape, x_data.shape) if model.verbosity > 2: for i, j in zip(y_pred, y_true): print(i, j) plot_scatter(y_true_avail, y_pred_avail, out_path + "/scatter") ndf = pd.DataFrame() # fill ndf with input data for i, inp in enumerate(model.data_config['in_features']): ndf[inp] = x_data[:, i] ndf['true'] = y_true ndf[out] = y_pred # ndf['true_avail'] = test_y_true_avail # ndf['pred_avail'] = test_y_pred_avail ndf.index = get_index(model.batches[runtype + '_index']) # removing duplicated values # TODO why duplicated values exist ndf = ndf[~ndf.index.duplicated(keep='first')] plots_on_last_axis = ['true', out] if runtype == 'all': if model.data_config['batch_making_mode'] == 'event_based': train_idx = get_index(model.batches['train' + '_index']) train_idx = train_idx[~train_idx.duplicated()] else: train_tk = model.batches['train_tk_index'] train_tk_nz = train_tk[np.where(train_tk > 0.0)] train_idx = get_index(train_tk_nz) # test_idx = get_index(model.batches['test' + '_index']) out_df = ndf[out] out_df = out_df[~out_df.index.duplicated()] ndf['train'] = ndf[out][train_idx] # out_df[train_idx] # ndf['test'] = ndf[out][test_idx] plots_on_last_axis.append('train') do_plot(ndf, list(ndf.columns), save_name=out_path + '/' + str(out), obs_logy=True, single_ax_plots=plots_on_last_axis) ndf['Prediction'] = ndf[out] plot_single_output(ndf, out_path + '/' + str(out) + '_single', runtype) plot_bact_points(ndf, out_path + "/bact_points", runtype) if save_results: fpath = os.path.join(out_path + '_' + runtype + '_results.xlsx') ndf.to_excel(fpath) return all_errors, neg_predictions
from utils import cal_acc from model_baseline import AE from dataset_baseline import Image_Dataset from clustering_baseline import predict from clustering_baseline import inference from model_strong import AE from dataset_strong import Image_Dataset from clustering_strong import predict from clustering_strong import inference same_seeds(0) model_filename = sys.argv[1] # ~/checkpoints/baseline.pth input_filename2 = sys.argv[2] # ~/Downloads/dataset/valX.npy input_filename3 = sys.argv[3] # ~/Downloads/dataset/valY.npy valX = np.load(input_filename2) valY = np.load(input_filename3) model = AE().cuda() model.load_state_dict(torch.load(model_filename)) model.eval() latents = inference(valX, model) pred_from_latent, emb_from_latent = predict(latents) acc_latent = cal_acc(valY, pred_from_latent) print('The clustering accuracy is:', acc_latent) print('The clustering result:') plot_scatter(emb_from_latent, valY, savefig='p1_baseline.png')
def pretrain_osm(lam_kld=0.0): # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all') Xva = dataset[0] tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 400 batch_reps = 6 carry_frac = 0.25 carry_size = int(batch_size * carry_frac) reset_prob = 0.04 # setup some symbolic variables and stuff Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') data_dim = Xtr.shape[1] prior_sigma = 1.0 Xtr_mean = np.mean(Xtr, axis=0) ########################## # NETWORK CONFIGURATIONS # ########################## gn_params = {} shared_config = [PRIOR_DIM, 1500, 1500] top_config = [shared_config[-1], data_dim] gn_params['shared_config'] = shared_config gn_params['mu_config'] = top_config gn_params['sigma_config'] = top_config gn_params['activation'] = relu_actfun gn_params['init_scale'] = 1.4 gn_params['lam_l2a'] = 0.0 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.0 gn_params['input_noise'] = 0.0 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 1500, 1500] top_config = [shared_config[-1], PRIOR_DIM] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = relu_actfun in_params['init_scale'] = 1.4 in_params['lam_l2a'] = 0.0 in_params['vis_drop'] = 0.0 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.0 in_params['input_noise'] = 0.0 # Initialize the base networks for this OneStageModel IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) # Initialize biases in IN and GN IN.init_biases(0.2) GN.init_biases(0.2) ###################################### # LOAD AND RESTART FROM SAVED PARAMS # ###################################### # gn_fname = RESULT_PATH+"pt_osm_params_b110000_GN.pkl" # in_fname = RESULT_PATH+"pt_osm_params_b110000_IN.pkl" # IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \ # new_params=None) # GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd, \ # new_params=None) # in_params = IN.params # gn_params = GN.params ######################### # INITIALIZE THE GIPAIR # ######################### osm_params = {} osm_params['x_type'] = 'bernoulli' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params) OSM.set_lam_l2w(1e-5) safe_mean = (0.9 * Xtr_mean) + 0.05 safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean)) OSM.set_output_bias(safe_mean_logit) OSM.set_input_bias(-Xtr_mean) ###################### # BASIC VAE TRAINING # ###################### out_file = open(RESULT_PATH+"pt_osm_results.txt", 'wb') # Set initial learning rate and basic SGD hyper parameters obs_costs = np.zeros((batch_size,)) costs = [0. for i in range(10)] learn_rate = 0.002 for i in range(200000): scale = min(1.0, float(i) / 5000.0) if ((i > 1) and ((i % 20000) == 0)): learn_rate = learn_rate * 0.8 if (i < 50000): momentum = 0.5 elif (i < 10000): momentum = 0.7 else: momentum = 0.9 if ((i == 0) or (npr.rand() < reset_prob)): # sample a fully random batch batch_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) else: # sample a partially random batch, which retains some portion of # the worst scoring examples from the previous batch fresh_idx = npr.randint(low=0,high=tr_samples,size=(batch_size-carry_size,)) batch_idx = np.concatenate((fresh_idx.ravel(), carry_idx.ravel())) # do a minibatch update of the model, and compute some costs tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do a minibatch update of the model, and compute some costs OSM.set_sgd_params(lr_1=(scale*learn_rate), \ mom_1=(scale*momentum), mom_2=0.98) OSM.set_lam_nll(1.0) OSM.set_lam_kld(lam_kld_1=scale*lam_kld, lam_kld_2=0.0, lam_kld_c=50.0) result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps) batch_costs = result[4] + result[5] obs_costs = collect_obs_costs(batch_costs, batch_reps) carry_idx = batch_idx[np.argsort(-obs_costs)[0:carry_size]] costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 1000) == 0): # record and then reset the cost trackers costs = [(v / 1000.0) for v in costs] str_1 = "-- batch {0:d} --".format(i) str_2 = " joint_cost: {0:.4f}".format(costs[0]) str_3 = " nll_cost : {0:.4f}".format(costs[1]) str_4 = " kld_cost : {0:.4f}".format(costs[2]) str_5 = " reg_cost : {0:.4f}".format(costs[3]) costs = [0.0 for v in costs] # print out some diagnostic information joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() if ((i % 2000) == 0): Xva = row_shuffle(Xva) model_samps = OSM.sample_from_prior(500) file_name = RESULT_PATH+"pt_osm_samples_b{0:d}_XG.png".format(i) utils.visualize_samples(model_samps, file_name, num_rows=20) file_name = RESULT_PATH+"pt_osm_inf_weights_b{0:d}.png".format(i) utils.visualize_samples(OSM.inf_weights.get_value(borrow=False).T, \ file_name, num_rows=30) file_name = RESULT_PATH+"pt_osm_gen_weights_b{0:d}.png".format(i) utils.visualize_samples(OSM.gen_weights.get_value(borrow=False), \ file_name, num_rows=30) # compute information about free-energy on validation set file_name = RESULT_PATH+"pt_osm_free_energy_b{0:d}.png".format(i) fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20) fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1]) fe_str = " nll_bound : {0:.4f}".format(fe_mean) print(fe_str) out_file.write(fe_str+"\n") utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # compute information about posterior KLds on validation set file_name = RESULT_PATH+"pt_osm_post_klds_b{0:d}.png".format(i) post_klds = OSM.compute_post_klds(Xva[0:2500]) post_dim_klds = np.mean(post_klds, axis=0) utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ file_name) if ((i % 5000) == 0): IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_GN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_IN.pkl") GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_GN.pkl") return
config = load_config(args.config) exp_dir = os.path.join( './experiments', "{}_{}".format(args.config, strftime("%Y-%m-%d_%H:%M:%S", gmtime()))) os.makedirs(exp_dir, exist_ok=True) dset = gaussian_data_generator(config.seed) dset.random_distribution() utils.plot_lines(points=dset.p, title='Weight of each gaussian', path='{}/gaussian_weight.png'.format(exp_dir)) sample_points = dset.sample(100) utils.plot_scatter(points=sample_points, centers=dset.centers, title='Sampled data points', path='{}/samples.png'.format(exp_dir)) prefix = "unrolled_steps-{}-prior_std-{:.2f}".format( config.unrolled_steps, np.std(dset.p)) print("Save file with prefix", prefix) G = Generator(input_size=config.g_inp, hidden_size=config.g_hid, output_size=config.g_out).cuda() G._apply(lambda t: t.detach().checkpoint()) D = Discriminator(input_size=config.d_inp, hidden_size=config.d_hid, output_size=config.d_out).cuda() D._apply(lambda t: t.detach().checkpoint()) criterion = nn.BCELoss(
def run(): logging.basicConfig(format='%(message)s', level=logging.INFO, filename='results.log', filemode='w') # load the new file df = read_csv('pre-processed-in-24-hours.csv', index_col=0, parse_dates=True) for cell in [108 * 2 + 1]: # : for epoch in [ 1000, ]: # [1000, 2000, 3000, 4000, 5000]: for batch_size in [ 500, ]: # [500, 1000, 1500]: for n_input in [1, 2, 4, 8, 12, 16]: for n_out in range(1, 9): logging.info( "Starting... cell {0}, epoch {1}, batch_size {2}, input {3} and output {4}" \ .format(cell , epoch , batch_size , n_input , n_out)) try: logging.info("Training {} {}".format( n_input, n_out)) # transform data scaler, data_scaled = scale(df.values) train, test = split_dataset(df.values, n_out) train_scaled, test_scaled = split_dataset( data_scaled, n_out) # restructure into window size train_scaled, test_scaled = restructure_data_by_window( train_scaled, test_scaled, n_out) train, test = restructure_data_by_window( train, test, n_out) # fit model model = build_model(train_scaled, n_input, n_out, cell, epoch, batch_size) # history is a list by window size history_scaled = [ x for x in train_scaled[:n_input, :, :] ] history = [x for x in train[:n_input, :, :]] train_walk_foward_validation( history, history_scaled, model, n_input, scaler, train, train_scaled) predictions_inverted = test_walk_foward_validation( model, n_input, scaler, test, test_scaled, train, train_scaled) logging.info("predictions_inverted: {}".format( predictions_inverted.shape)) logging.info("test {}".format(test.shape)) data = { 'predict': predictions_inverted.reshape( predictions_inverted.shape[0] * predictions_inverted.shape[1]), 'real': test[:, :, 0].reshape(test[:, :, 0].shape[0] * test[:, :, 0].shape[1]) } data['time'] = df.index[-data["predict"].shape[0]:] df_plot = pandas.DataFrame.from_dict(data) df_plot.to_csv('plot_results_{0}_{1}.csv'.format( n_input, n_out)) plot_results(df_plot) plot_scatter(df_plot) except Exception as e: logging.info(e)
from preprocess import preprocess valX = np.load('data/valX.npy') valY = np.load('data/valY.npy') #model = Base_AE().cuda() model = Improved_AE().cuda() model.load_state_dict(torch.load('./improve-2.pth')) model.eval() latents = inference(valX, model) pred_from_latent, emb_from_latent = predict(latents) acc_latent = cal_acc(valY, pred_from_latent) print('The clustering accuracy is:', acc_latent) print('The clustering result:') plot_scatter(emb_from_latent, valY, savefig='p1_improved.png') ''' import matplotlib.pyplot as plt import numpy as np # 畫出原圖 trainX = np.load('data/trainX_new.npy') trainX_preprocessed = preprocess(trainX) model = Improved_AE().cuda() model.load_state_dict(torch.load('./improve-2.pth')) plt.figure(figsize=(10,4)) indexes = [1,2,3,6,7,9] imgs = trainX[indexes,] for i, img in enumerate(imgs):
if __name__ == '__main__': ''' Example of batch gradient descent. Assumes mean square error (MSE) cost function ''' num_samples = 1000 num_features = 2 X, y = create_data_for_linear_model(num_samples, num_features) num_epochs = 1000 # initial guess at unknown parameters # +1 for the constant/intercept term theta = np.random.randn(num_features + 1, 1) # run solver theta, theta_path = run_BGD(num_epochs, num_samples, theta, X, y) print('Long-hand linear algrebra calculation') print('Intercept {0}, coefficient {1}'.format(theta[0], theta[1:])) # there is no SKLearn module for Batch Gradient Descent, other methods # are more commonly used in practice instead. # plot the evolution of theta in the 2D parameter space x = [theta[0][0] for theta in theta_path] y = [theta[1][0] for theta in theta_path] plot_scatter(x, y)
''' Example of stochastic gradient descent. Assumes mean square error (MSE) cost function ''' num_samples=1000 num_features = 2 X, y = create_data_for_linear_model(num_samples,num_features) num_epochs = 50 # initial guess at unknown parameters # +1 for the constant/intercept term theta = np.random.randn(num_features + 1,1) # run solver theta, theta_path = run_SGD(num_epochs, num_samples, theta, X, y) print('Long-hand linear algrebra calculation') print('Intercept {0}, coefficient {1}'.format(theta[0], theta[1:])) # plot the evolution of theta in the 2D parameter space theta_0 = [ theta[0][0] for theta in theta_path] theta_1 = [ theta[1][0] for theta in theta_path] plot_scatter(theta_0,theta_1) # use SKLearn functions to solve same problem sgd_reg = SGDRegressor(max_iter=num_epochs, penalty=None, eta0=0.1) sgd_reg.fit(X,y.ravel()) print('SKLearn solver') print('Intercept {0}, coefficient {1}'.format(sgd_reg.intercept_, sgd_reg.coef_))