def compareLight(learn, epochs, itrs, alpha, batchSize, o=10): np.random.seed(2) N = 1000 x = np.sort(np.random.uniform(0, 1, N)) y = np.sort(np.random.uniform(0, 1, N)) z = FrankeFunction(x, y) MSE1, MSE2, MSE3, MSE4 = [], [], [], [] for i in range(o): X = X_Mat(x, y, o) B3 = SGD(X, z, learn, epochs, itrs, alpha, batchSize) #for some reason, OLS and RIDGE act weird zpred3 = np.dot(X, B3) #if this happens below them. Why I do not know. B = OLS(X, z) zpred = np.dot(X, B) B2 = RIDGE(X, z) zpred2 = np.dot(X, B2) sgdreg = SGDRegressor(max_iter=3000, penalty='l2', eta0=0.05, learning_rate='adaptive', alpha=0.00001, loss='epsilon_insensitive', fit_intercept=False) sgdreg.fit(X, z) B4 = sgdreg.coef_ zpred4 = np.dot(X, B4) MSE1.append(MSE(z, zpred)) MSE2.append(MSE(z, zpred2)) MSE3.append(MSE(z, zpred3)) MSE4.append(MSE(z, zpred4)) plt.plot(range(o), MSE1, label='SciKit OLS') plt.plot(range(o), MSE2, label='SciKit Ridge') plt.plot(range(o), MSE3, label='SGD') plt.plot(range(o), MSE4, label='SciKit SGD') plt.legend() plt.title( 'Mean-Squared Errors given polynomial order for different methods') plt.xlabel('Polynomial Order') plt.ylabel('MSE') plt.show()
n = len(x_) i = np.random.randint(n - 1, size=int(n * 0.2)) x_learn = np.delete(x_, i) y_learn = np.delete(y_, i) x_test = np.take(x_, i) y_test = np.take(y_, i) #5th order X = np.c_[np.ones( (n, 1)), x_, y_, x_ * x_, y_ * x_, y_ * y_, x_**3, x_**2 * y_, x_ * y_**2, y_**3, x_**4, x_**3 * y_, x_**2 * y_**2, x_ * y_**3, y_**4, x_**5, x_**4 * y_, x_**3 * y_**2, x_**2 * y_**3, x_ * y_**4, y_**5] beta, zpredict = linear(X, z) MSE_ = MSE(z, zpredict) print(R2_Score(z, zpredict)) print(MSE_) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(x, y, zpredict) #,cmap=cm.coolwarm) # Plot the surface. surf = ax.plot_surface(x, y, z, cmap=cm.coolwarm, linewidth=0, antialiased=False)
import numpy as np from sklearn.linear_model import Lasso # Making meshgrid of datapoints and compute Franke's function N = 1000 x = np.sort(np.random.uniform(0, 1, N)) y = np.sort(np.random.uniform(0, 1, N)) x_mesh_, y_mesh_ = np.meshgrid(x, y) z = FrankeFunction(x_mesh_, y_mesh_) # Add noise z_noise = z + np.random.normal(scale=1, size=(N, N)) # Perform regression X = create_X(x_mesh_, y_mesh_) model = Lasso(alpha=1e-10, fit_intercept=False) model.fit(X, np.ravel(z_noise)) # Create best-fit matrix for plotting x_r = np.linspace(0, 1, N) y_r = np.linspace(0, 1, N) x_mesh, y_mesh = np.meshgrid(x, y) X_r = create_X(x_mesh, y_mesh) # Predict z_reg = (model.predict(X_r)).reshape((N, N)) plot_surface(x_mesh, y_mesh, z_reg, "Lasso regression", show=True) print("MSE: %.5f" % MSE(z, z_reg)) print("R2_Score: %.5f" % R2_Score(z, z_reg))
def run(net_str): # execute only if run as the entry point into the program # 定义源域和当前目标域 net_str = os.path.join( 'D:\study\graduation_project\grdaution_project\instru_identify\dataset18dataset2', net_str) source_image_root = os.path.join('D:\\', 'study', 'graduation_project', 'grdaution_project', 'instru_identify', 'dataset', 'dataset1') target_image_root = os.path.join('D:\\', 'study', 'graduation_project', 'grdaution_project', 'instru_identify', 'dataset', 'dataset2') target = 'dataset2' # 选取历史数据的比例 p = str(8) # 模型保存路径 model_root = 'dataset1' + p + 'dataset2' if not os.path.exists(model_root): os.mkdir(model_root) if not os.path.exists(model_root): os.makedirs(model_root) # 训练日志保存 log_path = os.path.join(model_root, 'train.txt') sys.stdout = Logger(log_path) # print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) # 训练参数定义 cuda = False cudnn.benchmark = True lr = 1e-2 batch_size = 16 image_size = 28 n_epoch = 1 step_decay_weight = 0.95 lr_decay_step = 20000 active_domain_loss_step = 10000 weight_decay = 1e-6 alpha_weight = 0.01 beta_weight = 0.075 gamma_weight = 0.25 momentum = 0.9 manual_seed = random.randint(1, 10000) random.seed(manual_seed) torch.manual_seed(manual_seed) ####################### # load data # ####################### img_transform_source = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=(0.1307, ), std=(0.3081, )) ]) img_transform_target = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) # 源域数据加载 source_list = os.path.join(source_image_root, 'dataset1_train_labels.txt') dataset_source = GetLoader( data_root=os.path.join(source_image_root, 'dataset1_train'), data_list=source_list, transform=img_transform_target, ) dataloader_source = torch.utils.data.DataLoader( dataset=dataset_source, batch_size=batch_size, shuffle=True, # 随机数种子 num_workers=0 # 进程数 ) # 目标域数据加载 target_list = os.path.join(target_image_root, 'dataset2_train_labels.txt') dataset_target = GetLoader( data_root=os.path.join(target_image_root, 'dataset2_train'), data_list=target_list, transform=img_transform_target, ) dataloader_target = torch.utils.data.DataLoader( dataset=dataset_target, batch_size=batch_size, shuffle=True, num_workers=0, # 单进程加载 ) ##################### # load model # ##################### my_net = DSN() my_net.load_state_dict(torch.load(net_str)) ##################### # setup optimizer # ##################### def exp_lr_scheduler(optimizer, step, init_lr=lr, lr_decay_step=lr_decay_step, step_decay_weight=step_decay_weight): # Decay learning rate by a factor of step_decay_weight every lr_decay_step current_lr = init_lr * (step_decay_weight**(step / lr_decay_step)) if step % lr_decay_step == 0: print('learning rate is set to %f' % current_lr) for param_group in optimizer.param_groups: param_group['lr'] = current_lr return optimizer optimizer = optim.SGD(my_net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) # 损失函数定义 loss_classfication = torch.nn.CrossEntropyLoss() loss_recon1 = MSE() loss_recon2 = SIMSE() loss_diff = DiffLoss_tfTrans() loss_similarity = torch.nn.CrossEntropyLoss() if cuda: my_net = my_net.cuda() loss_classification = loss_classification.cuda() loss_recon1 = loss_recon1.cuda() loss_recon2 = loss_recon2.cuda() loss_diff = loss_diff.cuda() loss_similarity = loss_similarity.cuda() for p in my_net.parameters(): p.requires_grad = True ############################# # training network # ############################# # 获取最短数据长度 len_dataloader = min(len(dataloader_source), len(dataloader_target)) # 设置epoch dann_epoch = np.floor(active_domain_loss_step / len_dataloader * 1.0) current_step = 0 # 开始训练 accu_total1 = 0 # 统计dataset1中的总准确率和 accu_total2 = 0 # 统计dataset2中的总准确率和 time_total1 = 0 # 统计dataset1训练的总时间 time_total2 = 0 # 统计dataset2训练的总时间 for epoch in range(n_epoch): # 1.加载数据 data_source_iter = iter(dataloader_source) data_target_iter = iter(dataloader_target) i = 0 # 防止数据超过最短数据长度,否则可能由于缺失某些数据出现报错 while i < len_dataloader: ######################## # target data training # ######################## # 加载target data_target = data_target_iter.next() t_img, t_label = data_target # 1.梯度清零 my_net.zero_grad() loss = 0 batch_size = len(t_label) # 2.初始化一些变量 input_img = torch.FloatTensor(batch_size, 3, image_size, image_size) class_label = torch.LongTensor(batch_size) domain_label = torch.ones(batch_size) domain_label = domain_label.long() # 判断gpu是否可用,如果可用,就将数据传入cuda中 if cuda: t_img = t_img.cuda() t_label = t_label.cuda() input_img = input_img.cuda() class_label = class_label.cuda() domain_label = domain_label.cuda() # 将一部分数据resize,并拷贝到上面设置的变量 input_img.resize_as_(t_img).copy_(t_img) class_label.resize_as_(t_label).copy_(t_label) target_inputv_img = Variable(input_img) target_classv_label = Variable(class_label) target_domainv_label = Variable(domain_label) # 论文中涉及到的公式 if current_step > active_domain_loss_step: p = float(i + (epoch - dann_epoch) * len_dataloader / (n_epoch - dann_epoch) / len_dataloader) p = 2. / (1. + np.exp(-10 * p)) - 1 # active domain loss # 这一步就是将输入输入到模型中,然后得到模型的结果 result = my_net(input_data=target_inputv_img, mode='target', rec_scheme='all', p=p) target_private_coda, target_share_coda, target_domain_label, target_rec_code = result # 通过python拆包得到的几个变量 target_dann = gamma_weight * loss_similarity( target_domain_label, target_domainv_label) # 4.计算损失值 loss += target_dann # 计算累计损失值 else: if cuda: target_dann = Variable(torch.zeros(1).float().cuda()) # ? else: target_dann = Variable(torch.zeros(1).float()) # 将输入传到模型中,然后得到模型结果 result = my_net(input_data=target_inputv_img, mode='target', rec_scheme='all') target_private_coda, target_share_coda, _, target_rec_code = result # 通过python的拆包得到几个变量 # 以下几步用于计算损失值 target_diff = beta_weight * loss_diff( target_private_coda, target_share_coda, weight=0.05) loss += target_diff target_mse = alpha_weight * loss_recon1( target_rec_code, target_inputv_img) loss += target_mse target_simse = alpha_weight * loss_recon2( target_rec_code, target_inputv_img) loss += target_mse # 5.计算梯度 loss.backward() # 6.利用梯度优化权重和偏置等网络参数 # optimizer = exp_lr_scheduler(optimizer=optimizer,step = current_step) optimizer.step() ####################### # source data training# ####################### data_source = data_source_iter.next() s_img, s_label = data_source my_net.zero_grad() batch_size = len(s_label) input_img = torch.FloatTensor(batch_size, 3, image_size, image_size) class_label = torch.LongTensor(batch_size) domain_label = torch.zeros(batch_size) damain_label = domain_label.long() loss = 0 if cuda: s_img = s_img.cuda() s_label = s_label.cuda() input_img = input_img.cuda() class_label = class_label.cuda() domain_label = domain_label.cuda() input_img.resize_as_(input_img).copy_(s_img) class_label.resize_as_(s_label).copy_(s_label) source_inputv_img = Variable(input_img) source_classv_label = Variable(class_label) source_domainv_label = Variable(domain_label) if current_step > active_domain_loss_step: # active domain loss # 输入模型进行训练 result = my_net(input_data=source_inputv_img, mode='source', rec_scheme='all', p=p) source_private_code, source_share_code, source_domain_label, source_classv_label, source_rec_code = result source_dann = gamma_weight * loss_similarity( source_domain_label, source_classv_label) loss += source_dann else: if cuda: source_dann = Variable(torch.zeros(1).float().cuda()) else: if cuda: source_dann = Variable( torch.zeros(1).float().cuda()) else: source_dann = Variable(torch.zeros(1).float()) result = my_net(input_data=source_inputv_img, mode='source', rec_scheme='all') source_private_code, source_share_code, _, source_class_label, source_rec_code = result source_classification = loss_classfication( source_class_label, source_classv_label) loss += source_classification source_diff = beta_weight * loss_diff( source_private_code, source_share_code, weight=0.05) loss += source_diff source_mse = alpha_weight * loss_recon1( source_rec_code, source_inputv_img) loss += source_mse source_simse = gamma_weight * loss_recon2( source_rec_code, source_inputv_img) loss += source_simse loss.backward() # optimizer = exp_lr_scheduler(optimizer=optimizer,step=current_step) optimizer.step() ############## # 测试保存 # ############## i += 1 current_step += 1 # print('source_classification: %f, source_dann: %f, source_diff: %f, '\ # 'source_mse: %f, source_simse: %f, target_dann: %f, target_diff: %f, '\ # 'target_mse: %f, target_simse: %f' \ # % (source_classification.data.cpu().numpy(), source_dann.data.cpu().numpy(), # source_diff.data.cpu().numpy(), # source_mse.data.cpu().numpy(), source_simse.data.cpu().numpy(), target_dann.data.cpu().numpy(), # target_diff.data.cpu().numpy(), target_mse.data.cpu().numpy(), target_simse.data.cpu().numpy())) # 训练数据集1并计算累积时间,和累积准确率 start1 = time.time() accu1 = test(epoch=epoch, name='dataset1') end1 = time.time() curr1 = end1 - start1 time_total1 += curr1 accu_total1 += accu1 # 训练数据集2并计算累积时间,和累积准确率 start2 = time.time() accu2 = test(epoch=epoch, name='dataset2') end2 = time.time() curr2 = end2 - start2 time_total2 += curr2 accu_total2 += accu2 # print(time.strftime('%Y-%m-%d %H:%M:%S'), time.localtime(time.time())) # 获取平均准确率做为训练性能的评价指标 model_index = epoch # 获取模型保存路径 model_path = 'D:\study\graduation_project\grdaution_project\instru_identify\dataset18dataset2' + '\dsn_epoch_' + str( model_index) + '.pth' while os.path.exists(model_path): model_index = model_index + 1 model_path = 'D:\study\graduation_project\grdaution_project\instru_identify\dataset18dataset2' + '\dsn_epoch_' + str( model_index) + '.pth' torch.save(my_net.state_dict(), model_path) # 保存模型 average_accu1 = accu_total1 / (len_dataloader * n_epoch) average_accu2 = accu_total2 / (len_dataloader * n_epoch) # result = [float(average_accu1),float(average_accu2)] # 所有数据均保留三位小数进行存储 print(round(float(average_accu1), 3)) print(round(float(average_accu2), 3)) print(round(float(time_total1), 3)) print(round(float(time_total2), 3)) # print('result:',result) return result
if step % lr_decay_step == 0: print 'learning rate is set to %f' % current_lr for param_group in optimizer.param_groups: param_group['lr'] = current_lr return optimizer optimizer = optim.SGD(my_net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) loss_classification = torch.nn.CrossEntropyLoss() loss_recon1 = MSE() loss_recon2 = SIMSE() loss_diff = DiffLoss() loss_similarity = torch.nn.CrossEntropyLoss() if cuda: my_net = my_net.cuda() loss_classification = loss_classification.cuda() loss_recon1 = loss_recon1.cuda() loss_recon2 = loss_recon2.cuda() loss_diff = loss_diff.cuda() loss_similarity = loss_similarity.cuda() for p in my_net.parameters(): p.requires_grad = True
def soloSGD(learn, epochs, itrs, batchSize, o=10): np.random.seed(2) N = 1000 x = np.sort(np.random.uniform(0, 1, N)) y = np.sort(np.random.uniform(0, 1, N)) z = FrankeFunction(x, y) k = 0 l = 0 m = 0 alpha = 0.9 lst = [learn, epochs, itrs, batchSize] for i in lst: if hasattr(i, "__len__") == True: l = k array = i m += 1 k += 1 if m > 1: raise ValueError('Only one input can be an array-like') if m != 0: if l == 0: error = np.zeros((o, len(learn))) if l == 1: error = np.zeros((o, len(epochs))) if l == 2: error = np.zeros((o, len(itrs))) if l == 3: error = np.zeros((o, len(batchSize))) for i in range(o): X = X_Mat(x, y, o) j = 0 for k in array: if l == 0: variable = 'Learning Rate' B = SGD(X, z, k, epochs, itrs, alpha, batchSize) zpred = np.dot(X, B) error[i][j] = MSE(z, zpred) elif l == 1: variable = 'Epochs' B = SGD(X, z, learn, k, itrs, alpha, batchSize) zpred = np.dot(X, B) error[i][j] = MSE(z, zpred) elif l == 2: variable = 'Iterations' B = SGD(X, z, learn, epochs, k, alpha, batchSize) zpred = np.dot(X, B) error[i][j] = MSE(z, zpred) elif l == 3: variable = 'Batch Size' B = SGD(X, z, learn, epochs, itrs, alpha, k) zpred = np.dot(X, B) error[i][j] = MSE(z, zpred) j += 1 plt.title( 'Mean-Squared-Error as a function of Polynomial Order\n with %s from %g to %g' % (variable, array[0], array[-1])) for i in range(len(error[1])): plt.plot(range(o), error[:, i], label='%s = %g' % (variable, array[i])) plt.xlabel('Polynomial Order') plt.ylabel('MSE') plt.legend(loc='upper right') plt.show() for i in range(len(error[1])): print('MSE:', array[i], np.mean(error[:][i])) else: error = [] for i in range(o): X = X_Mat(x, y, o) B = SGD(X, z, learn, epochs, itrs, alpha, batchSize) zpred = np.dot(X, B) error.append(MSE(z, zpred)) plt.plot(range(o), error) plt.xlabel('Polynomial Order') plt.ylabel('MSE') plt.title( 'MSE of our Custom Stochastic Gradient Descent model on its own') plt.show()
def LogisticRegression(Type, aFunc='sm', penalty='l2', designOrder=5, iters=600, epochs=200, alpha=0.9, kappa=1, batchSize=50, learn=0.001, plotting=False, bestFound=False): np.random.seed(2) k = 0 l = 0 m = 0 vari = False lst = [designOrder, iters, batchSize, learn, epochs] for i in lst: if hasattr(i, "__len__") == True: vari = True l = k array = i m += 1 k += 1 if m > 1: raise ValueError('Only one input can be an array-like') if bestFound == True: #will use the best parameters found by me whilst coding and testing print( 'Fitting with best possible parameters\n' ) #note that these can probably be optimized further, these are just some examples of good results if Type.lower() == 'gd': #gives an MSE of about ~ 0.02 designOrder = 5 aFunc = 'relu' penalty = 'l2' iters = 800 epochs = 500 alpha = 0.6 learn = 1 bestArray = [iters, epochs, alpha, learn] bestFunc = 'Rectified Linear Unit' elif Type.lower() == 'sgdmb': #gives an MSE of about ~ 0.07 designOrder = 5 aFunc = 'elu' penalty = 'l2' iters = 400 alpha = 0.8 epochs = 200 batchSize = 50 learn = 0.0005 bestArray = [iters, epochs, alpha, batchSize, learn] bestFunc = 'Exponential Linear Unit' else: #assumes normal SGD w/o mini batches, MSE of about ~ 0.11 designOrder = 5 aFunc = 'sp' penalty = 'l2' iters = 600 epochs = 200 alpha = 0.7 learn = 0.006 kappa = 2.3 bestArray = [iters, epochs, alpha, kappa, learn] bestFunc = 'Softplus' N = 1000 x = np.sort(np.random.uniform(0, 1, N)) y = np.sort(np.random.uniform(0, 1, N)) z = FrankeFunction(x, y) ploter = plt.plot if vari == True: Blst = [] for i in array: if l == 0: variable = 'Polynomial Order' X = X_Mat(x, y, i) X_train, X_test, z_train, z_test = train_test_split( X, z, test_size=0.2) model = LR(X, z, Type, aFunc, iters, epochs, penalty, alpha, k, batchSize) model.fitter(X_train, z_train, learn) Blst.append(model.B) elif l == 1: variable = 'Iterations' X = X_Mat(x, y, designOrder) X_train, X_test, z_train, z_test = train_test_split( X, z, test_size=0.2) model = LR(X, z, Type, aFunc, i, epochs, penalty, alpha, k, batchSize) model.fitter(X_train, z_train, learn) Blst.append(model.B) elif l == 2: variable = 'Batch Size' X = X_Mat(x, y, designOrder) X_train, X_test, z_train, z_test = train_test_split( X, z, test_size=0.2) model = LR(X, z, Type, aFunc, iters, epochs, penalty, k, alpha, i) model.fitter(X_train, z_train, learn) Blst.append(model.B) elif l == 3: variable = 'Learning Rate' X = X_Mat(x, y, designOrder) X_train, X_test, z_train, z_test = train_test_split( X, z, test_size=0.2) model = LR(X, z, Type, aFunc, iters, epochs, penalty, alpha, k, batchSize) model.fitter(X_train, z_train, i) Blst.append(model.B) ploter = plt.semilogx elif l == 4: variable = 'Epochs' X = X_Mat(x, y, designOrder) X_train, X_test, z_train, z_test = train_test_split( X, z, test_size=0.2) model = LR(X, z, Type, aFunc, iters, i, penalty, alpha, k, batchSize) model.fitter(X_train, z_train, learn) Blst.append(model.B) msestore = [] for i in range(len(array)): zpred4 = np.dot(X, Blst[i]) print('\n%s: %g\nMSE: %g' % (variable, array[i], MSE(z, zpred4))) msestore.append(MSE(z, zpred4)) if plotting == True: plt.title('Mean-Squared-Error as a function of %s\nfrom %g to %g' % (variable, array[0], array[-1])) ploter(array, msestore) plt.xlabel('%s' % (variable)) plt.ylabel('MSE') plt.show() else: X = X_Mat(x, y, designOrder) X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2) model = LR(X, z, Type, aFunc, iters, epochs, penalty, alpha, kappa, batchSize) model.fitter(X_train, z_train, learn) B4 = model.B zpred4 = np.dot(X, B4) print('MSE: %g || R2: %g' % (MSE(z, zpred4), R2(z, zpred4))) if bestFound == True: if Type.lower() == 'sgdmb': print( '\nActivation Function: %s\n%s\nUsing the variables:\nIterations: %g\nEpochs: %g\nAlpha: %g\nBatch Size: %g\nLearning Rate: %g' % (bestFunc, '=' * 45, bestArray[0], bestArray[1], bestArray[2], bestArray[3], bestArray[4])) elif Type.lower() == 'sgd': print( 'Activation Function: %s\n%s\nUsing the variables:\nIterations: %g\nEpochs: %g\nAlpha Parameter: %g\nSharpness Parameter: %g\nLearning Rate: %g' % (bestFunc, '=' * 30, bestArray[0], bestArray[1], bestArray[2], bestArray[3], bestArray[4])) else: print( 'Activation Function: %s\n%s\nUsing the variables:\nIterations: %g\nEpochs: %g\nAlpha: %g\nLearning Rate: %g' % (bestFunc, '=' * 45, bestArray[0], bestArray[1], bestArray[2], bestArray[3]))
return Xr @ B X = X_Mat(xm,ym,o) Xr = X_Mat(xm,ym,o) B, ztilde, zpred = OLS(X,z) surf = ax.plot_surface(xm, ym, (y_t(Xr,B)).reshape((N,N)),cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(10)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) fig.colorbar(surf, shrink=0.5, aspect=5) for angle in range(0, 360): ax.view_init(30, angle) plt.draw() plt.pause(.001) #Franke3D(x,y) z = np.ravel(z) X_train,X_test,z_Train,z_Test = train_test_split(X,z,test_size=0.2) X_train,X_test = scaler(X_train,X_test) MSE_train = MSE(z_Train,ztilde) R2_train = R2(z_Train,ztilde) MSE_test = MSE(z_Test,zpred) R2_test = R2(z_Test,zpred) vB = np.diag(np.linalg.inv(X_train.T @ X_train)) conf = 1.96*np.sqrt(vB) print('Beta:', B) print('Confidence:', conf) print('Training MSE:', MSE_train) print('Training R2:', R2_train) print('Testing MSE:', MSE_test) print('Testing R2:', R2_test)
def CV(max_deg, fold_num, noise, method): """ applies cross validation for a given x and y arrays (must be same size) initially splits into train and test data, then loops over given amount of folds and finds MSE, for each degree , With the lowest MSE found we use this on the initial test data from the train test split on x and y to validate. args: x, y (np.array): initial datapoints max_deg (int): highest degree, (goes from 0 to this values fold_num (int) : number of k-folds performed returns: MSE averaged over fold_num number of iterations """ np.random.seed(130) scaler = StandardScaler() x = np.sort(np.random.uniform(0, 1, dp)) y = np.sort(np.random.uniform(0, 1, dp)) #x,y = np.meshgrid(x,y) X_tr_, X_te_, Y_tr_, Y_te_ = train_test_split(x, y, test_size=0.2) X_tr, Y_tr = np.meshgrid(X_tr_, Y_tr_) X_te, Y_te = np.meshgrid(X_te_, Y_te_) z_tr = np.ravel( FrankeFunction(X_tr, Y_tr) + noise * np.random.randn(X_tr.shape[0], X_tr.shape[0])) z_te = np.ravel( FrankeFunction(X_te, Y_te) + noise * np.random.randn(X_te.shape[0], X_te.shape[0])) MSE_train_values = np.zeros(max_deg) MSE_test_values = np.zeros(max_deg) print(len(z_tr)) for k, deg in enumerate(range(0, max_deg)): #Degrees loop that contains K-fold X_design = X_make(X_tr, Y_tr, deg) X_design[:, 0] = 1 X_master = np.array(np.array_split(X_design, fold_num)) z_tr_spl = np.array( np.array_split(z_tr, fold_num) ) #This nasty c**t has to be dividable by fold_num in order to split it X_train_fold = np.zeros( (X_master.shape[0] - 1, X_master.shape[1], X_master.shape[2])) z_train_fold = np.zeros( (len(z_tr) - int(len(z_tr) / fold_num) )) #note; this c**t is scaled and split training data MSEtrain = np.zeros(fold_num) MSEtest = np.zeros(fold_num) test = np.empty(3) print(test) for i in range(fold_num): X_test_fold = X_master[i] z_test_fold = z_tr_spl[i] for ex in range(fold_num): if ex == i: pass else: X_train_fold = X_master[ex, :, :] z_train_fold = z_tr_spl[ex, :] #Scaling X_train_fold_s = scaler.fit(X_train_fold).transform( X_train_fold) #NOTE remember to scale the rest of the data!! #since we are using part of the original train data to test each fold, #we must make sure to not scale this part of the data, and instead #fit the given training data for each fold if method == OLS: z_test_fold_hat = X_test_fold.dot( method(X_train_fold_s, z_train_fold)) z_train_fold_hat = X_train_fold_s.dot( method(X_train_fold_s, z_train_fold)) elif method == Ridge_func: z_test_fold_hat = X_test_fold.dot( method(X_train_fold_s, z_train_fold)) z_train_fold_hat = X_train_fold_s.dot( method(X_train_fold_s, z_train_fold)) elif method == 'lasso': clf_lasso = skl.Lasso(alpha=lmda).fit(X_train_fold_s) z_test_fold_hat = clf_lasso.predict(X_test_fold) MSEtrain[i] = MSE(z_test_fold, z_test_fold_hat) MSEtest[i] = MSE(z_train_fold, z_train_fold_hat) MSE_train_values[k] = np.mean(MSEtrain) MSE_test_values[k] = np.mean(MSEtest) return MSE_train_values, MSE_test_values
discri_2 = [] encoder, decoder_0, decoder_1, decoder_2, discri_0, discri_1, discri_2 = get_param(my_net,encoder,decoder_0,discri_0,decoder_1,discri_1,decoder_2,discri_2) #criterion = nn.MSELoss(size_average=False) #criterion.cuda() #target_optimizer = optim.SGD(my_target_net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) optimizer_encoder = optim.SGD(encoder, lr=lr, momentum=momentum, weight_decay=weight_decay) optimizer_decoder_0 = optim.SGD(decoder_0, lr=args.decoder_lr, momentum=momentum, weight_decay=weight_decay) optimizer_decoder_1 = optim.SGD(decoder_1, lr=args.decoder_lr, momentum=momentum, weight_decay=weight_decay) optimizer_decoder_2 = optim.SGD(decoder_2, lr=args.decoder_lr, momentum=momentum, weight_decay=weight_decay) optimizer_discri_0 = optim.SGD(discri_0, lr=0.001, momentum=momentum, weight_decay=weight_decay) optimizer_discri_1 = optim.SGD(discri_1, lr=0.001, momentum=momentum, weight_decay=weight_decay) optimizer_discri_2 = optim.SGD(discri_2, lr=0.001, momentum=momentum, weight_decay=weight_decay) loss_classification = torch.nn.CrossEntropyLoss() loss_l2 = MSE() loss_bce = torch.nn.BCELoss() loss_l1 = L1_Loss() if cuda: #my_target_net = my_target_net.cuda() my_net = my_net.cuda() loss_classification = loss_classification.cuda() loss_l2 = loss_l2.cuda() loss_bce = loss_bce.cuda() loss_l1 = loss_l1.cuda() for p in my_net.parameters(): p.requires_grad = True #q.requires_grad = True ############################# # training network # #############################