def testINN(i_epoch): test_loss=[] print("interesting observations") print("FN_model_train_mode before test call",nets.model.training) print("INN_model_train_mode before test call",model.training) model.eval() print("FN_model_train_mode during test call",nets.model.training) print("INN_model_train_mode during test call",model.training) print("\n\n\n") for x_test,y_test, ana in c.test_ana_loader: x_test, y_test = x_test.to(c.device), y_test.to(c.device) with torch.no_grad(): output, jac = model.forward(x_test, y_test) zz = torch.sum(output**2, dim=1) neg_log_likeli = 0.5 * zz - jac test_loss.append(torch.mean(neg_log_likeli).item()) test_loss=np.mean(np.array(test_loss), axis=0) ht.sample_posterior(y_test,x_test, name = "test") model.train() print(f"Test Loss {i_epoch}: {test_loss}") print("\nTest loss_train: ",test_loss, f"{time.perf_counter() - t:.2f}s") viz.show_loss(test_loss,"test")
def train_fn(): t = time.process_time() print(f"starts to train: {c.feature_net_file}") #load(c.feature_net_file+"_prior", model=prior_net) #optimizer = optim.SGD(model.parameters(), lr=5e-2, momentum=0.5) #optimizer = optim.SGD(model.parameters(), lr=0.5e-2, momentum=0.5) #params_trainable = list(filter(lambda p: p.requires_grad, model.parameters())) #feature_net.optim = torch.optim.Adam(feature_net.model.parameters(), lr=1e-3, betas=c.adam_betas, eps=1e-6, weight_decay=c.l2_weight_reg) #_,y_test,_ = next(iter(c.test_ana_loader)) #print(y_test.shape,y_test) #viz.show_graph(model,y_test,mode = "test") year_error = show_year_error(network="FN") for epoch in range(c.fn_pretrain_percentage): epoch_t = time.perf_counter() print(f"\nTraining {epoch}\n") train_loss = train_solver(epoch, c.fn_pretrain_percentage) # print(f"\nTesting {epoch}\n") test_t = time.perf_counter() test_loss = test_solver() delta_t = time.perf_counter() - test_t delta_t_epoch = time.perf_counter() - epoch_t print( f"Time for Epoch {epoch}: {delta_t_epoch:.3f}s and testing {delta_t:.3f}s" ) scheduler_step() #optim.step() viz.show_loss(train_loss, "train") viz.show_loss(test_loss, "test") print("Model_mode", model.training) year_error.next(model) viz.make_step() print( f"Time to train solver in {c.fn_pretrain_percentage} iterations: {time.process_time() - t:.2f}s" ) save(c.feature_net_file) save(f"{c.feature_net_file}2", model=model2) #import feature_net_eval if c.train_uncert: import uncert_eval else: import evaluate
def train_solver(epoch, N_epochs, model=model, vizual=False): model.train() model2.train() start_t = time.perf_counter() #raw_index = c.ana_names.index("xco2_raw") errors = ht.show_error("train") #,loss_fnc= loss_sub, mode = "train") errors2 = ht.show_error( "trainuncert", uncertainty=True) #,loss_fnc= loss_sub, mode = "train") for batch_idx, (x, y, ana) in enumerate(train_loader): y, x = y.to(c.device), x.to(c.device) optim.zero_grad() model.zero_grad() #print(y.shape) #print("y",y.shape) output = model.fn_func(y) #print(output.shape,x.shape) loss, loss_a = create_loss(output, x, loss_a_use=True) loss.backward() #print(loss_a) if c.train_uncert: output = output[:, ::2] ####errors.add_error(output,x) for i in range(output.shape[0]): #print(test_output[i],x_test[i]) errors.add_error(output[i][None, :], x[i][None, :]) optim_step() if two_nets: optim2.zero_grad() model2.zero_grad() output2 = model2.fn_func(y) #print(output2.shape,x.shape,y.shape) loss2, loss_a2 = create_loss(output2, x, loss_a_use=True, predict_uncert=True) loss2.backward() for i in range(output2.shape[0]): #print(test_output[i],x_test[i]) #print(output2.shape) errors2.add_error(output2[i][None, ::2], x[i][None, :], output2[i][None, 1::2]) #assert 0 optim2.step() #print trainloss if batch_idx % c.fn_pretrain_log_interval == 0: if two_nets: print( f'\rTrain Epoch: {epoch}/{N_epochs-1} [{batch_idx * len(x)}/{len(train_loader.dataset)}' f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f},{loss2.item():.6f}, {torch.mean(loss_a2).item():.4f} Time: {time.perf_counter() - start_t:.1f}', end='') else: print( f'\rTrain Epoch: {epoch}/{N_epochs-1} [{batch_idx * len(x)}/{len(train_loader.dataset)}' f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}, Time: {time.perf_counter() - start_t:.1f}', end='') ##assert not torch.isnan(torch.sum(output)), (loss_a,x,y) #torch.mean(loss_a2).detach().cpu().numpy() #print testloss if (batch_idx) % int( len(train_loader.dataset) / len(x) / c.fn_pretrain_number_of_tests) == int( len(train_loader.dataset) / len(x) / c.fn_pretrain_number_of_tests) - 1: difference = prepare_data.x_to_params( output.detach().cpu().numpy()) - prepare_data.x_to_params( x.detach().cpu().numpy()) difference2 = prepare_data.x_to_params( output.detach().cpu().numpy()) - prepare_data.x_to_params( x.detach().cpu().numpy()) tot_difference = np.mean(np.abs(difference), axis=0) rel_difference = np.mean((difference), axis=0) print( f"Train errors {tot_difference} and with mean at {rel_difference}" ) mean_er = torch.mean(torch.abs(output - x), dim=0) print("mean error train", mean_er.detach().cpu().numpy()) errors.print_error() print("new") #difference = prepare_data.x_to_params(output.detach().cpu().numpy()) - prepare_data.x_to_params(x.detach().cpu().numpy()) if two_nets: errors2.print_error() print(f"\nTrain_loss {loss_a.detach().cpu().numpy()}") print( f"\nTrain_loss new {loss2.item():.6f}, {torch.mean(loss_a2).detach().cpu().numpy():.6f}{loss_a2.detach().cpu().numpy()}" ) viz.show_loss(loss2.item(), "trainuncert") break return loss.item()
def test_solver(): model.eval() model2.eval() test_loss = 0 test_loss2 = 0 test_losses = np.zeros(c.x_dim) test_losses2 = np.zeros(c.x_dim) errors = ht.show_error("test") #loss_fnc= loss_sub, mode = "test") errors2 = ht.show_error( "testuncert", uncertainty=True) #loss_fnc= loss_sub, mode = "test") with torch.no_grad(): #error_av = [] #mean_er_av = [] #mean_offset_av = [] iterations = 0 for x_test, y_test, ana_test in test_loader: y_test, x_test = y_test.to(c.device), x_test.to(c.device) test_output = model.fn_func(y_test) #forward_solve(y_test) loss, loss_a = create_loss(test_output, x_test, loss_a_use=True) #test_loss += torch.mean(((test_output - x_test))**2).item() # sum up batch loss test_loss += loss.item() # sum up batch loss #test_losses += torch.mean(((test_output - x_test))**2, dim=0).detach().cpu().numpy()# sum up batch loss test_losses += loss_a.detach().cpu().numpy() # sum up batch loss #x_new=dataloader.prepare_data.x_to_params(x_test.detach().cpu().numpy())[0:512] #out_new=dataloader.prepare_data.x_to_params(test_output.detach().cpu().numpy())[0:512] #print(test_output,test_output.shape) if c.train_uncert: test_output = test_output[:, ::2] for i in range(test_output.shape[0]): #print(test_output[i],x_test[i]) errors.add_error(test_output[i][None, :], x_test[i][None, :]) iterations += 1 ###errors.add_error(test_output,x_test) #mean_er = torch.mean(torch.abs(test_output - x_test), dim = 0) #mean_offset = torch.mean((test_output - x_test), dim = 0) #mean_er_av.append(mean_er.detach().cpu().numpy()) #mean_offset_av.append(mean_offset.detach().cpu().numpy()) #error_av.append(np.average(np.abs(out_new-x_new),axis=0)) if two_nets: test_output = model2.fn_func(y_test) #forward_solve(y_test) #print(test_output) loss2, loss_a2 = create_loss(test_output[:, ::2], x_test, loss_a_use=True) test_loss2 += loss2.item() # sum up batch loss test_losses2 += loss_a2.detach().cpu().numpy() for i in range(test_output.shape[0]): #print(test_output[i],x_test[i]) errors2.add_error(test_output[i][None, ::2], x_test[i][None, :], test_output[i][None, 1::2]) #print("newtest",test_losses2,loss2.detach().cpu().numpy()) #print("oldtest",test_losses,loss.detach().cpu().numpy()) print(f"\nTotal number of Testsamples {iterations}") #_,y,_ = next(iter(test_loader)) #viz.show_graph(model,y_test,mode = "test") #print("absdiffs",mean_er.detach().cpu().numpy()) #print("absdiffs",np.mean(np.array(mean_er_av),axis = 0)) #print("meandiffs",np.mean(np.array(mean_offset_av),axis = 0)) #error_av = np.array(error_av) #x_new=dataloader.prepare_data.x_to_params(x_test.detach().cpu().numpy())[0] #out_new=dataloader.prepare_data.x_to_params(test_output.detach().cpu().numpy())[0] #x_new=dataloader.prepare_data.x_to_params(x_test.detach().cpu().numpy())[0:512] #out_new=dataloader.prepare_data.x_to_params(test_output.detach().cpu().numpy())[0:512] #print("\ndifference x to guessed for last batch:\n",np.average(np.abs(x_new-out_new),axis=0)) #print("\nand for all batches:\n",np.average(error_av,axis=0)) errors.print_error() test_loss /= len(test_loader) print(f'Testloss: {test_loss} each{test_losses}') test_losses /= len(test_loader) #print(f'Test (eval) set: {test_losses}') if two_nets: errors2.print_error() test_loss2 /= len(test_loader) test_losses2 /= len(test_loader) print(f"New testloss {test_loss2} each {test_losses2}") viz.show_loss(test_loss2, "testuncert") model.train() model2.train() return test_loss
def trainINN(i_epoch): loss_history = [] data_iter = iter(c.train_ana_loader) if i_epoch < 0: for param_group in model.optimizer.param_groups: param_group['lr'] = c.lr_init * c.lr_reduce_factor if i_epoch == 0: for param_group in model.optimizer.param_groups: param_group['lr'] = c.lr_init #print("FN_model_train_mode before train call",nets.model.training) print("INN_model_train_mode before train call",model.training) for param_group in model.optimizer.param_groups: print(f"Start Learningrate for epoch {i_epoch} is {param_group['lr']:.3e}") print(f"Learningrate for epoch {i_epoch} is {model.scheduler.get_lr()[0]:.3e}") iterator = tqdm.tqdm(enumerate(data_iter), total=min(len(c.train_ana_loader), c.n_its_per_epoch), leave=False, mininterval=1., disable=(not c.progress_bar), ncols=83) model.train() for i_batch , (x,cond,_) in iterator: cond, x = cond.to(c.device), x.to(c.device) model.optimizer.zero_grad() if c.do_rev: #this condition hasn't been updated for a while. Don't expect this to work def sample_outputs(sigma, out_shape, batchsize=4): return [sigma * torch.cuda.FloatTensor(torch.Size((batchsize, o))).normal_() for o in out_shape] z = sample_outputs(1., model.output_dimensions, c.batch_size) features = nets.model.features(cond) output = model.model(z, features, rev = True) #x_gen = model.combined_model.module.reverse_sample(z, cond.cuda()) jac = model.model.log_jacobian(run_forward=False) l = 3.5 * torch.mean((x - output)**2) - torch.mean(jac)#/tot_output_size else: #default case z, jac = model.forward(x, cond) zz = torch.sum(z**2, dim=1) neg_log_likeli = 0.5 * zz - jac l = torch.mean(neg_log_likeli) #/ tot_output_size l.backward() model.optim_step() loss_history.append([l.item()]) assert not np.isnan(np.sum(np.array(l.item()))),f"\n loss_history {loss_history}" if i_batch+1 >= c.n_its_per_epoch: # somehow the data loader workers don't shut down automatically try: data_iter._shutdown_workers() except: pass iterator.close() break print(loss_history) ht.sample_posterior(cond,x, "train") epoch_losses = np.mean(np.array(loss_history), axis=0) print("Train loss",epoch_losses[0]) print(epoch_losses.shape) print(epoch_losses) assert not np.isnan(np.sum(epoch_losses)),loss_history viz.show_loss(epoch_losses[0],"train")
for i in range(3): z = sample_outputs(c.sampling_temperature, model.output_dimensions) x_ab_sampled = model.combined_model.module.reverse_sample( z, cond) ims.extend(list(data.norm_lab_to_rgb(x_l, x_ab_sampled))) break if i_epoch >= c.pretrain_epochs * 2: model.weight_scheduler.step(epoch_losses[0]) model.feature_scheduler.step(epoch_losses[0]) viz.show_imgs(*ims) viz.show_loss(epoch_losses) if i_epoch > 0 and (i_epoch % c.checkpoint_save_interval) == 0: model.save(c.filename + '_checkpoint_%.4i' % (i_epoch * (1 - c.checkpoint_save_overwrite))) model.save(c.filename) except: if c.checkpoint_on_error: model.save(c.filename + '_ABORT') raise finally: viz.signal_stop()
if i_batch + 1 >= c.n_its_per_epoch: # somehow the data loader workers don't shut down automatically try: data_iter._shutdown_workers() except: pass break model.weight_scheduler.step() epoch_losses = np.mean(np.array(loss_history), axis=0) epoch_losses[0] = min(epoch_losses[0], 0) if i_epoch > 1 - c.pre_low_lr: viz.show_loss(epoch_losses, logscale=False) output_orig = output.cpu() viz.show_hist(output_orig) with torch.no_grad(): samples = sample_outputs(c.sampling_temperature) if not c.colorize: cond = test_cond rev_imgs = model.model(samples, cond, rev=True) ims = [rev_imgs] viz.show_imgs(*list(data.unnormalize(i) for i in ims)) model.model.zero_grad()