def main(): n_pts = 1000 full_x = torch.linspace(0, 10, n_pts) n_start = 2 n_trials = 50 n_samples = 23 kron_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) fname = "kron_conv_rates.npz" iter = -1 while iter < n_trials: # print("iter hit") iter += 1 _, y1, _, y2 = data_gen(full_x) full_y1 = y1[0] full_y = torch.stack([y1[0], y1[0]], -1) obs_inds = random.sample(range(n_pts), n_start) # full_y[obs_inds, :] try: kron_maxes[iter, :] = bayes_opt_kron(full_x, full_y, obs_inds, end_sample_count=n_samples)[n_start:] # print(kron_maxes) print("trial ", iter, " done") if iter % 5 == 0: np.savez(fname, kron_maxes=kron_maxes) except: print("error hit") iter -=1 np.savez(fname, kron_maxes=kron_maxes) return 1
def main(): n_pts = 1000 full_x = torch.linspace(0, 10, n_pts) n_start = 2 n_trials = 50 kron_iters = np.zeros(n_trials + 1) fname = "conv_iter_kron.npz" iter = -1 while iter < n_trials: iter += 1 _, y1, _, y2 = data_gen(full_x) full_y1 = y1[0] full_y = torch.stack([y1[0], y1[0]], -1) obs_inds = random.sample(range(n_pts), n_start) obs_inds2 = deepcopy(obs_inds) try: kron_iters[iter] = len( bayes_opt_kron(full_x, full_y, obs_inds, ei_tol=0.001, max_iters=30)) print("trial ", iter, " done") if iter % 5 == 0: np.savez(fname, kron_iters=kron_iters) except: print("error hit") iter -= 1 np.savez(fname, kron_iters=kron_iters) return 1
def main(): n_pts = 1000 full_x = torch.linspace(0, 10, n_pts) n_start = 2 n_trials = 50 n_samples = 23 multi_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) kron_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) single_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) iter = -1 while iter < n_trials: iter += 1 _, y1, _, y2 = data_gen(full_x) full_y1 = y1[0] full_y = torch.stack([y1[0], y1[0]], -1) obs_inds = random.sample(range(n_pts), n_start) obs_inds2 = deepcopy(obs_inds) obs_inds3 = deepcopy(obs_inds) try: multi_maxes[iter, :] = bayes_opt_multi(full_x, full_y, obs_inds, end_sample_count=n_samples)[n_start:] single_maxes[iter, :] = bayes_opt_single(full_x, full_y1, obs_inds2, end_sample_count=n_samples)[n_start:] obs_inds2 = deepcopy(obs_inds) kron_maxes[iter, :] = bayes_opt_kron(full_x, full_y, obs_inds3, end_sample_count=n_samples)[n_start:] print("trial ", iter, " done") if iter % 5 == 0: np.savez("conv_rates_data.npz", multi_maxes=multi_maxes, single_maxes=single_maxes, kron_maxes=kron_maxes) except:
def main(): n_pts = 1000 full_x = torch.linspace(0, 10, n_pts) n_start = 2 n_trials = 3 for iter in range(n_trials): _, y1, _, y2 = data_gen(full_x) full_y1 = y1[0] full_y = torch.stack([y1[0], y1[0]], -1) obs_inds = random.sample(range(n_pts), n_start) obs_inds2 = deepcopy(obs_inds) multi_maxes = bayes_opt_multi(full_x, full_y, obs_inds, end_sample_count=15) single_maxes = bayes_opt_single(full_x, full_y1, obs_inds2, end_sample_count=15) # plt.plot(single_maxes[n_start:], marker='*') # plt.plot(multi_maxes[n_start:], marker='o') # plt.show() return 1
def work(Coordinator, report_lock, loss_function, loss_para, data, base_path, repeat=1): os.makedirs(base_path) Model = regession_model(p=data['p'], K=data['K'], loss_function=loss_function, loss_para=loss_para) for r in range(repeat): if data['Env'] == 'Sim': X, Y = data_gen(Nk=data['Nk'], K=data['K'], p=data['p'], seed=hash("Train" + str(r))) TX, TY = data_gen(Nk=data['Nk'], K=data['K'], p=data['p'], seed=hash("Test" + str(r))) elif data['Env'] == 'MNIST': X, Y = data_gen_MNIST(data['Nk'] * data['K'], False, seed=hash("Train" + str(r))) TX, TY = data_gen_MNIST(data['Nk'] * data['K'], True, seed=hash("Test" + str(r))) elif data['Env'] == 'REAL': X, Y, TX, TY = data_gen_REAL(r) res = Model.full_auto(X, Y, TX, TY) local_path = os.path.join(base_path, str(r)) os.makedirs(local_path) matrix_to_save = {'Cfm_train', 'Cfm_test', 'A', 'B'} for m in matrix_to_save: savetxt(os.path.join(local_path, str(m) + ".csv"), res[m], delimiter=",") if loss_function == 'DWD' or loss_function == 'DWDSM' or loss_function == 'DWDnc': loss_para_pad_to_3 = (loss_para['l'], loss_para['alpha'], loss_para['q']) elif loss_function == 'logistic': loss_para_pad_to_3 = (loss_para['l'], "-", "-") else: raise settings = (data['K'], data['p'], data['Nk'], loss_function) rps = ( str(r), res['Loss_train'], res['Loss_test'], res['Ac_train'], res['Ac_test'], res['large_parameter'], res['i']) to_report = list(map(lambda x: str(x), settings + loss_para_pad_to_3 + rps)) report_lock.acquire() print(",".join(to_report), file=open(os.path.join(os.getcwd(), "results", "env=" + data['Env'], "sum.csv"), 'a')) report_lock.release() Model.reset() Coordinator.release() return None
def main(): num_pts = 100 test_x = torch.linspace(0, 10, num_pts) dat1, mean1, dat2, mean2 = data_gen(test_x) test_y = torch.stack([dat1, dat2], -1)[0] num_train = 20 indices = random.sample(range(num_pts), num_train) train_x = test_x[indices] train_y = test_y[indices, :] class KronMultitaskModel(gpytorch.models.ExactGP): def __init__(self, train_x, train_y, likelihood): super(KronMultitaskModel, self).__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.MultitaskMean( gpytorch.means.ConstantMean(), num_tasks=2 ) self.covar_module = gpytorch.kernels.MultitaskKernel( gpytorch.kernels.RBFKernel(), num_tasks=2, rank=1 ) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) kronlikelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2) kronmodel = KronMultitaskModel(train_x,train_y,kronlikelihood) kronmodel.train() kronlikelihood.train() optimizer = torch.optim.Adam([ {'params': kronmodel.parameters()}, ], lr=0.1) mll = gpytorch.mlls.ExactMarginalLogLikelihood(kronlikelihood, kronmodel) n_iter = 50 for i in range(n_iter): optimizer.zero_grad() output = kronmodel(train_x) loss = -mll(output, train_y) loss.backward() kronmodel.eval() kronlikelihood.eval() with torch.no_grad(), gpytorch.fast_pred_var(): eval_x = torch.linspace(0,10,1000) kronpredictions = kronlikelihood(kronmodel(eval_x)) kronmean = kronpredictions.mean kronlower,kronupper = kronpredictions.confidence_region() print("lower = ", kronlower) return 1
def main(): n_pts = 1000 full_x = torch.linspace(0, 10, n_pts) n_start = 2 n_trials = 50 n_samples = 23 one_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) two_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) three_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) four_maxes = np.zeros((n_trials+1, n_samples - n_start - 1)) iter = -1 fname = "num_task_data.npz" while(iter < n_trials): iter += 1 full_y = data_gen(full_x, n_tasks=4).detach() obs_inds = random.sample(range(n_pts), n_start) obs_inds2 = deepcopy(obs_inds) obs_inds3 = deepcopy(obs_inds) obs_inds4 = deepcopy(obs_inds) obs_inds5 = deepcopy(obs_inds) ## implement error catching ## try: one_maxes[iter, :] = bayes_opt_single(full_x, full_y[:, 0], obs_inds, end_sample_count=n_samples)[n_start:] two_maxes[iter, :] = bayes_opt_multi(full_x, full_y[:, 0:2], obs_inds2, end_sample_count=n_samples)[n_start:] three_maxes[iter, :] = bayes_opt_multi(full_x, full_y[:, 0:3], obs_inds3, end_sample_count=n_samples)[n_start:] four_maxes[iter, :] = bayes_opt_multi(full_x, full_y, obs_inds4, end_sample_count=n_samples)[n_start:] except: iter -= 1 print("error hit") if iter % 5 == 0: np.savez(fname, one_maxes=one_maxes, two_maxes=two_maxes, three_maxes=three_maxes, four_maxes=four_maxes) print("trial ", iter, "done") np.savez(fname, one_maxes=one_maxes, two_maxes=two_maxes, three_maxes=three_maxes, four_maxes=four_maxes) return 1
# # Plot training data as black stars # y2_ax.plot(test_data.detach().numpy(), dat[:, 1].detach().numpy(), 'k*') # # Predictive mean as blue line # y2_ax.plot(test_data.numpy(), mean[:, 1].numpy(), 'b') # # Shade in confidence # y2_ax.fill_between(test_data.numpy(), lower[:, 1].numpy(), upper[:, 1].numpy(), alpha=0.5) # # y2_ax.set_ylim([-3, 3]) # y2_ax.legend(['Observed Data', 'Mean', 'Confidence']) # y2_ax.set_title('Observed Values (Likelihood)') # plt.show() return mean if __name__ == '__main__': test_data = torch.linspace(0, 10, 100) dat1, mean1, dat2, mean2 = data_gen(test_data) dat = torch.stack([dat1, dat2], -1)[0] mean = multitask(test_data, dat) f, (y1_ax, y2_ax) = plt.subplots(1, 2, figsize=(8, 3)) y1_ax.plot(test_data.detach().numpy(), dat[:, 0].detach().numpy(), 'k*') # Predictive mean as blue line y1_ax.plot(test_data.numpy(), mean[:, 0].numpy(), 'b') # Shade in confidence # y1_ax.fill_between(test_data.numpy(), lower[:, 0].numpy(), upper[:, 0].numpy(), alpha=0.5) # y1_ax.set_ylim([-3, 3]) y1_ax.legend(['Observed Data', 'Mean', 'Confidence']) y1_ax.set_title('Observed Values (Likelihood)') # Plot training data as black stars
def main(): num_pts = 40 num_train = 15 all_x = torch.linspace(0, 50, num_pts) num_trial = 100 all_mk_error1 = [None for _ in range(num_trial)] all_mk_error2 = [None for _ in range(num_trial)] all_rbf_error1 = [None for _ in range(num_trial)] all_rbf_error2 = [None for _ in range(num_trial)] all_mt_error1 = [None for _ in range(num_trial)] all_mt_error2 = [None for _ in range(num_trial)] for trial in range(num_trial): y1, y1_mean, y2, y2_mean = data_gen(all_x) stack_y = torch.stack([y1, y2], -1)[0] ## subset data into training and heldout points ## indices = random.sample(range(num_pts), num_train) inds = [i for i in sorted(indices)] holdout_inds = [i for i in range(num_pts) if i not in inds] train_x = all_x[inds] train_y = stack_y[inds, :] holdout_x = all_x[holdout_inds] holdout_y = stack_y[holdout_inds, :] ## set the testing points ## test_x = all_x test_y1 = y1 test_y2 = y2 ## get out mean predictions ## mk_mean = mk_tester(train_x, train_y, test_x); # print("multi-kernel done") rbf_mean = indep_rbf(train_x, train_y, test_x); # print("rbf done") mt_mean = multitask(train_x, train_y, test_x); # print("multitask done") ## COMPUTE OUTPUTS ## mk_mean1 = mk_mean[:, 0] mk_mean2 = mk_mean[:, 1] rbf_mean1 = rbf_mean[:, 0] rbf_mean2 = rbf_mean[:, 1] mt_mean1 = mt_mean[:, 0] mt_mean2 = mt_mean[:, 1] all_mk_error1[trial] = (mk_mean1 - test_y1).pow(2).mean() all_mk_error2[trial] = (mk_mean2 - test_y2).pow(2).mean() all_rbf_error1[trial] = (rbf_mean1 - test_y1).pow(2).mean() all_rbf_error2[trial] = (rbf_mean2 - test_y2).pow(2).mean() all_mt_error1[trial] = (mt_mean1 - test_y1).pow(2).mean() all_mt_error2[trial] = (mt_mean2 - test_y2).pow(2).mean() print("trial ", trial, " done") # print("MK ERROR: ", mk_error) # print("RBF ERROR: ", rbf_error) # print("MT ERROR: ", mt_error) # plotting # boxplot_list = [np.array(all_mk_error2), np.array(all_rbf_error2), np.array(all_mt_error2)] fig = plt.figure(1, figsize=(9, 6)) ax = fig.add_subplot(111) bpl = ax.boxplot(boxplot_list) ax.set_xticklabels(["MK Method", "Indep. RBF", "MT Method"]) ax.set_ylabel("MSE") ## just graphical stuff ## box_col = sns.xkcd_palette(["windows blue"])[0] med_col = sns.xkcd_palette(["amber"])[0] for box in bpl["boxes"]: box.set(color=box_col, linewidth=2) for flier in bpl["fliers"]: flier.set(marker='o', c=box_col, alpha=0.5) for median in bpl["medians"]: median.set(color=med_col, linewidth=1.5) for whisker in bpl["whiskers"]: whisker.set(color=box_col, linewidth=2) for cap in bpl["caps"]: cap.set(color=box_col, linewidth=2) plt.show() print("mk1 mse:", sum(all_mk_error1)/len(all_mk_error1)) print("mk2 mse:", sum(all_mk_error2)/len(all_mk_error2)) print("rbf1 mse:", sum(all_rbf_error1)/len(all_rbf_error1)) print("rbf2 mse:", sum(all_rbf_error2)/len(all_rbf_error2)) print("mt1 mse:", sum(all_mt_error1)/len(all_mt_error1)) print("mt2 mse:", sum(all_mt_error2)/len(all_mt_error2)) ## saving ## mse = np.stack([np.concatenate([np.repeat("Multi",2*num_trial), np.repeat("Kron",2*num_trial), np.repeat("Simple",2*num_trial)]), np.tile(np.concatenate([np.repeat("1",num_trial), np.repeat("2",num_trial)]),3), np.concatenate([all_mk_error1, all_mk_error2, all_mt_error1, all_mt_error2, all_rbf_error1, all_rbf_error2])],axis=1) np.savetxt("all_mse.csv", mse, delimiter=",", fmt="%s")
from LabelSmoothing import LabelSmoothing from make_model import make_model from NoamOpt import NoamOpt import torch from run_epoch import run_epoch from data_gen import data_gen from SimpleLossCompute import SimpleLossCompute # Train the simple copy task. V = 11 criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0) model = make_model(V, V, N=2) model_opt = NoamOpt(model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(10): model.train() run_epoch(data_gen(V, 30, 20), model, SimpleLossCompute(model.generator, criterion, model_opt)) model.eval()
def main(): num_pts = 100 # num_train = 15 all_x = torch.linspace(0, 10, num_pts) y1, y1_mean, y2, y2_mean = data_gen(all_x) stack_y = torch.stack([y1, y2], -1)[0] ## subset data into training and heldout points ## all_inds = [i for i in range(num_pts)] holdout_inds = all_inds[25:75] train_inds = [i for i in range(num_pts) if i not in holdout_inds] # holdout_inds = [i for i in range(num_pts) if i not in inds] train_x = all_x[train_inds] train_y = stack_y[train_inds, :] holdout_x = all_x[holdout_inds] holdout_y = stack_y[holdout_inds, :] ## set the testing points ## test_x = all_x test_y1 = y1 test_y2 = y2 ## get out mean predictions ## mk_mean = mk_tester(train_x, train_y, test_x) print("multi-kernel done") rbf_mean = indep_rbf(train_x, train_y, test_x) print("rbf done") mt_mean = multitask(train_x, train_y, test_x) print("multitask done") ## calculate errors ## mk_mean1 = mk_mean[:, 0] mk_mean2 = mk_mean[:, 1] rbf_mean1 = rbf_mean[:, 0] rbf_mean2 = rbf_mean[:, 1] mt_mean1 = mt_mean[:, 0] mt_mean2 = mt_mean[:, 1] mk_error = (mk_mean1 - test_y1).pow(2).mean() mk_error += (mk_mean2 - test_y2).pow(2).mean() rbf_error = (rbf_mean1 - test_y1).pow(2).mean() rbf_error += (rbf_mean2 - test_y2).pow(2).mean() mt_error = (mt_mean1 - test_y1).pow(2).mean() mt_error += (mt_mean2 - test_y2).pow(2).mean() print("MK ERROR: ", mk_error) print("RBF ERROR: ", rbf_error) print("MT ERROR: ", mt_error) ## PLOTTING ## true_col = sns.xkcd_palette(["windows blue"])[0] mod_col = sns.xkcd_palette(["amber"])[0] train_y1 = train_y[:, 0] train_y2 = train_y[:, 1] test_y1 = holdout_y[:, 0] test_y2 = holdout_y[:, 1] col_titles = ["Task 1", "Task 2"] row_titles = ["MK Method", "Indep. RBF", "MT Method"] fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 8)) for ind, ax in enumerate(axes[0]): ax.set_title(col_titles[ind]) for ind, ax in enumerate(axes[:, 0]): ax.set_ylabel(row_titles[ind]) axes[0, 0].plot(train_x.numpy(), train_y1.numpy(), marker='*', c=true_col, ls='None') axes[0, 0].plot(holdout_x.numpy(), test_y1.numpy(), marker='o', c=true_col, ls='None') axes[0, 0].plot(test_x.numpy(), mk_mean1.detach().numpy(), ls='-', c=mod_col) axes[0, 1].plot(train_x.numpy(), train_y2.numpy(), marker='*', c=true_col, ls='None') axes[0, 1].plot(holdout_x.numpy(), test_y2.numpy(), marker='o', c=true_col, ls='None') axes[0, 1].plot(test_x.numpy(), mk_mean2.detach().numpy(), ls='-', c=mod_col) axes[1, 0].plot(train_x.numpy(), train_y1.numpy(), marker='*', c=true_col, ls='None') axes[1, 0].plot(holdout_x.numpy(), test_y1.numpy(), marker='o', c=true_col, ls='None') axes[1, 0].plot(test_x.numpy(), rbf_mean1.detach().numpy(), ls='-', c=mod_col) axes[1, 1].plot(train_x.numpy(), train_y2.numpy(), marker='*', c=true_col, ls='None') axes[1, 1].plot(holdout_x.numpy(), test_y2.numpy(), marker='o', c=true_col, ls='None') axes[1, 1].plot(test_x.numpy(), rbf_mean2.detach().numpy(), ls='-', c=mod_col) axes[2, 0].plot(train_x.numpy(), train_y1.numpy(), marker='*', c=true_col, ls='None') axes[2, 0].plot(holdout_x.numpy(), test_y1.numpy(), marker='o', c=true_col, ls='None') axes[2, 0].plot(test_x.numpy(), mt_mean1.detach().numpy(), ls='-', c=mod_col) axes[2, 1].plot(train_x.numpy(), train_y2.numpy(), marker='*', c=true_col, ls='None') axes[2, 1].plot(holdout_x.numpy(), test_y2.numpy(), marker='o', c=true_col, ls='None') axes[2, 1].plot(test_x.numpy(), mt_mean2.detach().numpy(), ls='-', c=mod_col) plt.show()
def main(): n_pts = 1000 full_x = torch.linspace(0, 10, n_pts) n_start = 2 _, y1, _, y2 = data_gen(full_x) full_y1 = y1[0] full_y = torch.stack([y1[0], y2[0]], -1) obs_inds = random.sample(range(n_pts), n_start) obs_inds2 = deepcopy(obs_inds) ## init plot ## plt_ind = 1 cols = sns.color_palette("muted", 4) iters_per_plot = 3 ## ONLY CHANGE THIS ## n_plots = 3 for iter_count in range(iters_per_plot * n_plots): pred_model, next_pt = bayes_opt_multi(full_x, full_y, obs_inds) means = pred_model.mean lower, upper = pred_model.confidence_region() if iter_count % iters_per_plot == 0: # plot # plt.subplot(n_plots, 2, plt_ind) plt.plot(full_x.numpy(), means[:, 0].detach().numpy(), c=cols[0]) plt.scatter(full_x[obs_inds].numpy(), full_y[obs_inds, 0].numpy(), c=cols[0], marker='o') plt.plot(full_x.numpy(), full_y[:, 0].numpy(), c=cols[0], ls=':') plt.plot(full_x.numpy(), full_y[:, 1].numpy(), c=cols[1], ls=":") plt.plot(full_x.numpy(), means[:, 1].detach().numpy(), c=cols[1]) plt.scatter(full_x[obs_inds].numpy(), full_y[obs_inds, 1].numpy(), c=cols[1], marker='o') plt.fill_between(full_x.numpy(), lower[:, 0].detach().numpy(), upper[:, 0].detach().numpy(), color=cols[0], alpha=0.2) plt.scatter(full_x[next_pt].numpy(), full_y[next_pt, 0].numpy(), c='r', marker="*") ymin, ymax = plt.ylim() plt_ind += 1 obs_inds.append(next_pt) pred_model, next_pt = bayes_opt_single(full_x, full_y[:, 0], obs_inds2) mean = pred_model.mean lower, upper = pred_model.confidence_region() if iter_count % iters_per_plot == 0: # plot # plt.subplot(n_plots, 2, plt_ind) plt.plot(full_x.numpy(), mean.detach().numpy(), c=cols[0]) plt.plot(full_x.numpy(), full_y[:, 0].numpy(), c=cols[0], ls=':') plt.scatter(full_x[obs_inds2].numpy(), full_y[obs_inds2, 0].numpy(), c=cols[0], marker='o') plt.scatter(full_x[next_pt].numpy(), full_y[next_pt, 0].numpy(), c='r', marker="*") plt.fill_between(full_x.numpy(), lower.detach().numpy(), upper.detach().numpy(), color=cols[0], alpha=0.2) plt.ylim(ymin, ymax) plt_ind += 1 obs_inds2.append(next_pt) plt.show() return 1
def main(): ## set ups and inits ## low_x = 0 high_x = 10 num_pts = 1000 full_x = torch.linspace(low_x, high_x, num_pts) full_y = data_gen(full_x) end_sample_count = 10 n_start = 2 obs_inds = random.sample(range(num_pts), n_start) current_max = full_y[obs_inds].max() class ExactGPModel(gpytorch.models.ExactGP): def __init__(self, train_x, train_y, likelihood): super(ExactGPModel, self).__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) entered = 0 expec_improve = (1, ) # trivial start while (max(expec_improve) > 0.001): lh = gpytorch.likelihoods.GaussianLikelihood() lh.log_noise.data[0, 0] = -8 model = ExactGPModel(full_x[obs_inds], full_y[obs_inds], lh) if entered: model.covar_module.base_kernel.log_lengthscale.data[ 0, 0, 0] = stored_length model.covar_module.outputscale[0] = stored_out ## standard training stuff ## model.train() lh.train() optimizer = torch.optim.Adam([ { 'params': model.covar_module.parameters() }, ], lr=0.1) mll = gpytorch.mlls.ExactMarginalLogLikelihood(lh, model) n_iter = 2 for i in range(n_iter): optimizer.zero_grad() output = model(full_x[obs_inds]) loss = -mll(output, full_y[obs_inds]) loss.backward() optimizer.step() entered = 1 stored_length = model.covar_module.base_kernel.log_lengthscale.data[0, 0, 0] stored_out = model.covar_module.outputscale[0] ## do predictions ## model.eval() lh.eval() pred = model(full_x) means = pred.mean sd = pred.stddev lower, upper = pred.confidence_region() found = 0 expec_improve = list( expected_improvement(means, sd, current_max).detach().numpy()) while not found: max_ind = expec_improve.index(max(expec_improve)) if max_ind not in obs_inds: obs_inds.append(int(max_ind)) found = 1 else: expec_improve[max_ind] = min(expec_improve) current_max = full_y[obs_inds].max() full_col = sns.xkcd_palette(["windows blue"])[0] gp_col = sns.xkcd_palette(["amber"])[0] if len(obs_inds) % 1 == -1: plt.figure() plt.plot(full_x.numpy(), full_y.numpy(), c=full_col, ls='-') plt.plot(full_x[obs_inds].numpy(), full_y[obs_inds].numpy(), c=full_col, marker='.', ls="None") plt.plot(full_x[int(max_ind)].numpy(), full_y[int(max_ind)].numpy(), marker="*", c='r') plt.plot(full_x.numpy(), means.detach().numpy(), ls='-', c=gp_col) plt.fill_between(full_x.numpy(), lower.detach().numpy(), upper.detach().numpy(), alpha=0.5, color=gp_col) plt.show() plt.figure() plt.plot(full_x.numpy(), full_y.numpy(), c=full_col, ls='-') plt.plot(full_x[obs_inds].numpy(), full_y[obs_inds].numpy(), c=full_col, marker='.', ls="None") plt.plot(full_x[int(max_ind)].numpy(), full_y[int(max_ind)].numpy(), marker="*", c='r') plt.plot(full_x.numpy(), means.detach().numpy(), ls='-', c=gp_col) plt.fill_between(full_x.numpy(), lower.detach().numpy(), upper.detach().numpy(), alpha=0.5, color=gp_col) plt.show()
def main(): num_pts = 100 num_train = 25 all_x = torch.linspace(0, 10, num_pts) y1, y1_mean, y2, y2_mean = data_gen(all_x) stack_y = torch.stack([y1, y2], -1)[0] ## subset data into training and heldout points ## indices = random.sample(range(num_pts), num_train) inds = [i for i in sorted(indices)] holdout_inds = [i for i in range(num_pts) if i not in inds] train_x = all_x[inds] train_y = stack_y[inds, :] holdout_x = all_x[holdout_inds] holdout_y = stack_y[holdout_inds, :] ## set the testing points ## test_x = all_x test_y1 = y1 test_y2 = y2 ## get out mean predictions ## model_out = mk_tester(train_x, train_y, test_x) mk_mean = model_out.mean lower, upper = model_out.confidence_region() # mk_lower, mk_upper = model_out.confidence_region() print("multi-kernel done") rbf_mean = indep_rbf(train_x, train_y, test_x) print("rbf done") mt_mean = multitask(train_x, train_y, test_x) print("multitask done") ## calculate errors ## mk_mean1 = mk_mean[:, 0] mk_mean2 = mk_mean[:, 1] rbf_mean1 = rbf_mean[:, 0] rbf_mean2 = rbf_mean[:, 1] mt_mean1 = mt_mean[:, 0] mt_mean2 = mt_mean[:, 1] mk_error = (mk_mean1 - test_y1).pow(2).mean() mk_error += (mk_mean2 - test_y2).pow(2).mean() rbf_error = (rbf_mean1 - test_y1).pow(2).mean() rbf_error += (rbf_mean2 - test_y2).pow(2).mean() mt_error = (mt_mean1 - test_y1).pow(2).mean() mt_error += (mt_mean2 - test_y2).pow(2).mean() print("MK ERROR: ", mk_error) print("RBF ERROR: ", rbf_error) print("MT ERROR: ", mt_error) ## PLOTTING ## true_col = sns.xkcd_palette(["windows blue"])[0] mod_col = sns.xkcd_palette(["amber"])[0] train_y1 = train_y[:, 0] train_y2 = train_y[:, 1] test_y1 = holdout_y[:, 0] test_y2 = holdout_y[:, 1] col_titles = ["Task 1", "Task 2"] row_titles = ["M.K.", "Indep. RBF", "Kron"] fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 8)) for ind, ax in enumerate(axes[0]): ax.set_title(col_titles[ind]) for ind, ax in enumerate(axes[:, 0]): ax.set_ylabel(row_titles[ind]) train_dat, = axes[0, 0].plot(train_x.numpy(), train_y1.numpy(), marker='*', c=true_col, ls='None') # axes[0, 0].plot(holdout_x.numpy(), test_y1.numpy(), marker='o', c=true_col, ls='None') pred_mean, = axes[0, 0].plot(test_x.numpy(), mk_mean1.detach().numpy(), ls='-', c=mod_col) true_mean, = axes[0, 0].plot(test_x.numpy(), y1_mean[0].numpy(), ls='-', c=true_col) axes[0, 1].plot(train_x.numpy(), train_y2.numpy(), marker='*', c=true_col, ls='None') # axes[0, 1].plot(holdout_x.numpy(), test_y2.numpy(), marker='o', c=true_col, ls='None') axes[0, 1].plot(test_x.numpy(), mk_mean2.detach().numpy(), ls='-', c=mod_col) axes[0, 1].plot(test_x.numpy(), y2_mean[0].numpy(), ls='-', c=true_col) axes[1, 0].plot(train_x.numpy(), train_y1.numpy(), marker='*', c=true_col, ls='None') # axes[1, 0].plot(holdout_x.numpy(), test_y1.numpy(), marker='o', c=true_col, ls='None') axes[1, 0].plot(test_x.numpy(), rbf_mean1.detach().numpy(), ls='-', c=mod_col) axes[1, 0].plot(test_x.numpy(), y1_mean[0].numpy(), ls='-', c=true_col) axes[1, 1].plot(train_x.numpy(), train_y2.numpy(), marker='*', c=true_col, ls='None') # axes[1, 1].plot(holdout_x.numpy(), test_y2.numpy(), marker='o', c=true_col, ls='None') axes[1, 1].plot(test_x.numpy(), rbf_mean2.detach().numpy(), ls='-', c=mod_col) axes[1, 1].plot(test_x.numpy(), y2_mean[0].numpy(), ls='-', c=true_col) axes[2, 0].plot(train_x.numpy(), train_y1.numpy(), marker='*', c=true_col, ls='None') # axes[2, 0].plot(holdout_x.numpy(), test_y1.numpy(), marker='o', c=true_col, ls='None') axes[2, 0].plot(test_x.numpy(), mt_mean1.detach().numpy(), ls='-', c=mod_col) axes[2, 0].plot(test_x.numpy(), y1_mean[0].numpy(), ls='-', c=true_col) axes[2, 1].plot(train_x.numpy(), train_y2.numpy(), marker='*', c=true_col, ls='None') # axes[2, 1].plot(holdout_x.numpy(), test_y2.numpy(), marker='o', c=true_col, ls='None') axes[2, 1].plot(test_x.numpy(), mt_mean2.detach().numpy(), ls='-', c=mod_col) axes[2, 1].plot(test_x.numpy(), y2_mean[0].numpy(), ls='-', c=true_col) plt.legend([train_dat, pred_mean, true_mean], ["Training Points", "Predicted Mean", "Underlying Process"]) plt.show()
batch_size = 50 chords_on_either_side = 3 model = tf.keras.Sequential() model.add( layers.Dense(200, input_shape=(2 * chords_on_either_side, chord_array_len))) model.add(layers.Dropout(0.2)) model.add(layers.Dense(300)) model.add(layers.Dropout(0.4)) model.add(layers.Flatten()) model.add(layers.Dense(192)) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary() #currently training and validaiting with random data model.fit( data_gen(chords_on_either_side=chords_on_either_side, batch_size=batch_size), steps_per_epoch=1024, epochs=100, )
def main(): test_data = torch.linspace(0, 10, 100) test_y1, y1_mean, test_y2, y2_mean = data_gen(test_data) stack_y = torch.stack([test_y1, test_y2], -1)[0] ## get out mean predictions ## mk_mean = mk_tester(test_data, stack_y); print("multi-kernel done") rbf_mean = indep_rbf(test_data, stack_y); rbf_mean.shape print("rbf done") mt_mean = multitask(test_data, stack_y); print("multitask done") ## calculate errors ## mk_mean1 = mk_mean[:, 0] mk_mean2 = mk_mean[:, 1] mk_error = (mk_mean1 - test_y1).pow(2).mean() mk_error += (mk_mean2 - test_y2).pow(2).mean() rbf_mean1 = rbf_mean[:, 0] rbf_mean2 = rbf_mean[:, 1] rbf_error = (rbf_mean1 - test_y1).pow(2).mean() rbf_error += (rbf_mean2 - test_y2).pow(2).mean() mt_mean1 = mt_mean[:, 0] mt_mean2 = mt_mean[:, 1] mt_error = (mt_mean1 - test_y1).pow(2).mean() mt_error += (mt_mean2 - test_y2).pow(2).mean() # print("MK ERROR: ", mk_error) # print("RBF ERROR: ", rbf_error) # print("MT ERROR: ", mt_error) ## PLOTTING ## true_col = sns.xkcd_palette(["windows blue"])[0] mod_col = sns.xkcd_palette(["amber"])[0] plt.subplot(3, 2, 1) plt.plot(test_y1[0].numpy(), marker='o', c=true_col) plt.plot(mk_mean1.detach().numpy(), ls='-', c=mod_col) plt.subplot(3, 2, 2) plt.plot(test_y2[0].numpy(), marker='o', c=true_col) plt.plot(mk_mean2.detach().numpy(), ls='-', c=mod_col) plt.subplot(3, 2, 3) plt.plot(test_y1[0].numpy(), marker='o', c=true_col) plt.plot(rbf_mean1.detach().numpy(), ls='-', c=mod_col) plt.subplot(3, 2, 4) plt.plot(test_y2[0].numpy(), marker='o', c=true_col) plt.plot(rbf_mean2.detach().numpy(), ls='-', c=mod_col) plt.subplot(3, 2, 5) plt.plot(test_y1[0].numpy(), marker='o', c=true_col) plt.plot(mt_mean1.detach().numpy(), ls='-', c=mod_col) plt.subplot(3, 2, 6) plt.plot(test_y2[0].numpy(), marker='o', c=true_col) plt.plot(mt_mean2.detach().numpy(), ls='-', c=mod_col) plt.show()
from train_net import NoamOpt, LabelSmoothing, SimpleLossCompute, run_epoch import config as cfg from mxnet.gluon import Trainer V = cfg.VOCAB_SIZE criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0) model = make_model() model.collect_params().reset_ctx(cfg.ctx) model_opt = NoamOpt(cfg.D_MODEL, 1, 400) trainer = Trainer(model.collect_params(), "Adam", { "beta1": 0.9, "beta2": 0.98 }) for epoch in range(10): run_epoch(data_gen(V, 30, 20), model, SimpleLossCompute(model.generator, criterion), trainer=trainer, lr_sch=model_opt) test_loss = run_epoch(data_gen(V, 30, 5), model, SimpleLossCompute(model.generator, criterion), trainer=None, lr_sch=model_opt) print("test loss: ", test_loss.asnumpy()[0]) def greedy_decode(model, src, src_mask, max_len, start_symbol): memory = model.encode(src, src_mask) ys = mx.nd.zeros((1, 1), dtype=src.dtype, ctx=cfg.ctx) + start_symbol
from LabelSmoothing import LabelSmoothing from make_model import make_model from NoamOpt import NoamOpt import torch from run_epoch import run_epoch from data_gen import data_gen from SimpleLossCompute import SimpleLossCompute # Train the simple copy task. V = 11 criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0) model = make_model(V, V, N=2) model_opt = NoamOpt( model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(10): model.train() run_epoch(data_gen(V, batch=30, nbatches=20), model, SimpleLossCompute(model.generator, criterion, model_opt)) model.eval() print( run_epoch(data_gen(V, batch=30, nbatches=5), model, SimpleLossCompute(model.generator, criterion, None)))
gpytorch.means.ConstantMean(), num_tasks=2) # self.mean_module = gpytorch.means.ConstantMean() # self.covar_module = mk_kernel.MultitaskRBFKernel(num_tasks=2,log_task_lengthscales=torch.Tensor([math.log(2.5), math.log(0.3)])) self.covar_module = old_kernel.MultitaskRBFKernel(num_tasks=2) # self.covar_module = gpytorch.kernels.ScaleKernel(mk_kernel.multi_kernel()) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultitaskMultivariateNormal( mean_x, covar_x) if __name__ == '__main__': train_x = torch.linspace(0, 10, 3) dat1, mean1, dat2, mean2 = data_gen(train_x) train_y = torch.stack([dat1, dat2], -1)[0] like_1 = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2) new_mod = MultitaskModel(train_x, train_y, like_1) like_2 = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2) old_mod = OldModel(train_x, train_y, like_2) new_mod.eval() old_mod.eval() new_mod.covar_module.output_scale_kernel = old_mod.covar_module.output_scale_kernel new_mat = new_mod.covar_module(train_x).evaluate() print("new mat = ", new_mat) old_mat = old_mod.covar_module(train_x).evaluate()
def learn(input_dir, output_dir, epochs, save_nmt=False): dataset, vocab_inp_size, vocab_tar_size, embedding_dim, units, batch_size, example_input_batch, steps_per_epoch, targ_lang, max_length_targ, max_length_inp, inp_lang, targ_lang = data_gen.data_gen( input_dir, output_dir) config = NeuralMTConfig(vocab_inp_size, vocab_tar_size, embedding_dim, units, batch_size, example_input_batch, max_length_targ, max_length_inp, inp_lang, targ_lang) neural_mt = NeuralMT(config) encoder, decoder, checkpoint = neural_mt.encoder, neural_mt.decoder, neural_mt.checkpoint given_dir = input_dir + '/training_log.txt' checkpoint_prefix = os.path.join(input_dir + '/training_checkpoints', "ckpt") train_l = [] for epoch in range(epochs): empty_s = " " start = time.time() enc_hidden = encoder.initialize_hidden_state() total_loss = 0 for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)): batch_loss = train_step(inp, targ, enc_hidden, targ_lang, batch_size, neural_mt) total_loss += batch_loss if batch % 100 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format( epoch + 1, batch, batch_loss.numpy())) # saving (checkpoint) the model every 2 epochs if (epoch + 1) % 2 == 0: checkpoint.save(file_prefix=checkpoint_prefix) print('Epoch {} Loss {:.4f} \n'.format(epoch + 1, total_loss / steps_per_epoch)) empty_s = 'Epoch {} Loss {:.4f} \n'.format( epoch + 1, total_loss / steps_per_epoch) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) empty_s += 'Time taken for 1 epoch {} sec\n'.format(time.time() - start) train_l.append(empty_s) filelog = open(given_dir, 'w', encoding="utf8") filelog.writelines(train_l) filelog.close() if save_nmt: neural_mt.save(output_dir) return neural_mt
from tensorflow import keras as k from model import model from data_gen import data_gen import os import jdatetime train_dir = 'data\\dataset\\train.csv' test_dir = 'data\\dataset\\test.csv' BATCH_SIZE = 64 EPOCHS = 20 train_gen = data_gen(train_dir, BATCH_SIZE) val_gen = data_gen(test_dir, BATCH_SIZE) train_gen.build_data() val_gen.build_data() now = jdatetime.datetime.today() model_folder_name = '%s-%s[%s-%s]__Model' % (now.month, now.day, now.hour, now.minute) os.mkdir('data\\models\\%s' % model_folder_name) tensorboard_callback = k.callbacks.TensorBoard(log_dir='data\\models\\%s' % (model_folder_name), histogram_freq=1) adam = k.optimizers.Adam() model.compile(loss='mean_squared_error', optimizer=adam) history = model.fit_generator( generator=train_gen.next_batch(),
def main(): data_gen() run_experiment() post_process()
def main(): ## set up and inits ## low_x = 0 high_x = 10 num_pts = 1000 end_sample_count = 30 # this seems like a bad criteria... full_x = torch.linspace(low_x, high_x, num_pts) n_tasks = 2 # full_y = gen_correlated_rbfs(full_x, _num_tasks=n_tasks) full_y = trash_genner( full_x) # this is just a holdover until I can do something better _, y1, _, y2 = data_gen(full_x) full_y = torch.stack([y1[0], y2[0]], -1) # plt.plot(full_x.numpy(), full_y[:, 0].numpy()) # plt.plot(full_x.numpy(), full_y[:, 1].numpy()) # plt.show() # get out starting points # n_start = 2 obs_inds = random.sample(range(num_pts), n_start) obs_x = full_x[obs_inds] obs_y = full_y[obs_inds, :] current_max = obs_y[:, 0].max() ## set up the model ## class MultitaskModel(gpytorch.models.ExactGP): def __init__(self, train_x, train_y, likelihood): super(MultitaskModel, self).__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.MultitaskMean( gpytorch.means.ConstantMean(), num_tasks=n_tasks) self.covar_module = mk_kernel.MultiKernel( [gpytorch.kernels.RBFKernel() for _ in range(n_tasks)]) # self.covar_module = mk_kernel.MultitaskRBFKernel(num_tasks=2, rank=2) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultitaskMultivariateNormal( mean_x, covar_x) # lh = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=n_tasks) ## set up parameter storage ## stored_lengths = [None for _ in range(n_tasks)] # stored_covar_factor = None # stored_var = None entered = 0 ei_tol = 0.001 expec_improve = (1, ) while (max(expec_improve) > ei_tol): lh = gpytorch.likelihoods.MultitaskGaussianLikelihood( num_tasks=n_tasks) model = MultitaskModel(full_x[obs_inds], full_y[obs_inds, :], lh) model.likelihood.log_noise.data[0, 0] = -8 if entered: # # overwrite parameters # for tt in range(n_tasks): model.covar_module.in_task_covar[tt].log_lengthscale.data[ 0, 0, 0] = stored_lengths[tt] model.covar_module.output_scale_kernel.covar_factor = stored_covar_factor # model.covar_module.output_scale_kernel.var = stored_var model.train() lh.train() ## need to train a little more each time ## # Use the adam optimizer optimizer = torch.optim.Adam([ { 'params': model.parameters() }, ], lr=0.1) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(lh, model) n_iter = 50 for i in range(n_iter): optimizer.zero_grad() output = model(full_x[obs_inds]) loss = -mll(output, full_y[obs_inds, :]) loss.backward() optimizer.step() ## store covar parameters ## entered = 1 stored_lengths = [ model.covar_module.in_task_covar[tt].log_lengthscale.data[0, 0, 0] for tt in range(n_tasks) ] stored_covar_factor = model.covar_module.output_scale_kernel.covar_factor # stored_var = model.covar_module.output_scale_kernel.var ## predict full domain ## lh.eval() model.eval() pred = model(full_x) dump = pred.covariance_matrix ## just to build the cache means = pred.mean[:, 0] sd = pred.stddev[:, 0] lower, upper = pred.confidence_region() ## observe function at max of expected improvment ## found = 0 expec_improve = list( expected_improvement(means, sd, current_max).detach().numpy()) while not found: max_ind = expec_improve.index(max(expec_improve)) if max_ind not in obs_inds: obs_inds.append(int(max_ind)) found = 1 else: expec_improve[max_ind] = min(expec_improve) current_max = full_y[obs_inds, 0].max() # max, max_ind = torch.max(expec_improve, 0) # obs_x = full_x[obs_inds] # obs_y = full_y[obs_inds, :] ## Plotting To Track Progress ## full_col = sns.xkcd_palette(["windows blue"])[0] gp_col = sns.xkcd_palette(["amber"])[0] if len(obs_inds) % 5 == 0: plt.figure() plt.plot(full_x.numpy(), full_y[:, 0].numpy(), c=full_col, ls='-') plt.plot(full_x[obs_inds].numpy(), full_y[obs_inds, 0].numpy(), c=full_col, marker='.', ls="None") plt.plot(full_x[int(max_ind)].numpy(), full_y[int(max_ind), 0].numpy(), marker="*", c='r') plt.plot(full_x.numpy(), means.detach().numpy(), ls='-', c=gp_col) plt.fill_between(full_x.numpy(), lower[:, 0].detach().numpy(), upper[:, 0].detach().numpy(), alpha=0.5, color=gp_col) plt.show() print("seen ", len(obs_inds), " observations") print("observered ", obs_inds) print("(", len(obs_inds), " points)") return 1