x_values = [] x_values_2_scores = {} network.eval() # (set in eval mode, this affects BatchNorm and dropout) for step, (x) in enumerate(val_loader): x = x.cuda().unsqueeze(1) # (shape: (batch_size, 1)) y_samples = np.linspace(-3.0, 3.0, num_samples) # (shape: (num_samples, )) y_samples = y_samples.astype(np.float32) y_samples = torch.from_numpy( y_samples).cuda() # (shape: (batch_size, num_samples)) x_features = network.feature_net(x) scores = network.predictor_net( x_features, y_samples.expand(x.shape[0], -1)) # (shape: (batch_size, num_samples)) x_values.extend(x.squeeze(1).cpu().tolist()) for i, x_val in enumerate(x): x_values_2_scores[x_val.item()] = scores[i, :].cpu().numpy() print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") num_x_values = float(len(x_values)) print(num_x_values) KL = 0.0 for step, x_value in enumerate(x_values): scores = np.exp(
x_values = [] x_values_2_scores = {} network.eval() # (set in eval mode, this affects BatchNorm and dropout) for step, (x) in enumerate(val_loader): if (step % 1000) == 0: print(step) x = x.cuda().unsqueeze(1) # (shape: (batch_size, 1)) y_samples = np.linspace(-3.0, 3.0, num_samples) # (shape: (num_samples, )) y_samples = y_samples.astype(np.float32) y_samples = torch.from_numpy( y_samples).cuda() # (shape: (batch_size, num_samples)) x_features = network.feature_net(x) scores = network.predictor_net(x_features, y_samples.expand(x.shape[0], -1)) x_values.extend(x.squeeze(1).cpu().tolist()) for i, x_val in enumerate(x): x_values_2_scores[x_val.item()] = scores[i, :].cpu().numpy() print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") y_values = [] most_y = [] for step, x_value in enumerate(x_values): if (step % 1000) == 0: print(step) scores = np.exp( x_values_2_scores[x_value].flatten()) # (shape: (num_samples, ))
print("###########################") print("model: %d/%d | epoch: %d/%d" % (i + 1, num_models, epoch + 1, num_epochs)) network.train( ) # (set in training mode, this affects BatchNorm and dropout) batch_losses = [] for step, (xs, ys) in enumerate(train_loader): xs = xs.cuda().unsqueeze(1) # (shape: (batch_size, 1)) ys = ys.cuda().unsqueeze(1) # (shape: (batch_size, 1)) ys.requires_grad_(True) x_features = network.feature_net( xs) # (shape: (batch_size, hidden_dim)) fs = network.predictor_net(x_features, ys) # (shape: (batch_size, 1)) fs = fs.squeeze(1) # (shape: (batch_size)) ######################################################################## # compute loss: ######################################################################## grad_y_fs = torch.autograd.grad(fs.sum(), ys, create_graph=True)[0] # (shape: (batch_size, 1)) (same as ys) trace_grad_y_grad_y_fs = torch.zeros(ys.size(0), device=ys.device) # (shape: (batch_size)) for index in range(ys.size(1)): # (grad_y_fs[:, index] has shape: (batch_size)) trace_grad_y_grad_y_fs += torch.autograd.grad( grad_y_fs[:, index].sum(), ys,
epoch_losses_train = [] for epoch in range(num_epochs): print ("###########################") print ("######## NEW EPOCH ########") print ("###########################") print ("model: %d/%d | epoch: %d/%d" % (i+1, num_models, epoch+1, num_epochs)) network.train() # (set in training mode, this affects BatchNorm and dropout) batch_losses = [] for step, (xs, ys) in enumerate(train_loader): xs = xs.cuda().unsqueeze(1) # (shape: (batch_size, 1)) ys = ys.cuda().unsqueeze(1) # (shape: (batch_size, 1)) x_features = network.feature_net(xs) # (shape: (batch_size, hidden_dim)) scores_gt = network.predictor_net(x_features, ys) # (shape: (batch_size, 1)) scores_gt = scores_gt.squeeze(1) # (shape: (batch_size)) y_samples_zero, q_y_samples, q_ys = sample_gmm_centered(stds, num_samples=num_samples) y_samples_zero = y_samples_zero.cuda() # (shape: (num_samples, 1)) y_samples_zero = y_samples_zero.squeeze(1) # (shape: (num_samples)) q_y_samples = q_y_samples.cuda() # (shape: (num_samples)) y_samples = ys + y_samples_zero.unsqueeze(0) # (shape: (batch_size, num_samples)) q_y_samples = q_y_samples.unsqueeze(0)*torch.ones(y_samples.size()).cuda() # (shape: (batch_size, num_samples)) q_ys = q_ys[0]*torch.ones(xs.size(0)).cuda() # (shape: (batch_size)) scores_samples = network.predictor_net(x_features, y_samples) # (shape: (batch_size, num_samples)) ######################################################################## # compute loss: ########################################################################
print("###########################") print("######## NEW EPOCH ########") print("###########################") print("model: %d/%d | epoch: %d/%d" % (i + 1, num_models, epoch + 1, num_epochs)) network.train( ) # (set in training mode, this affects BatchNorm and dropout) batch_losses = [] for step, (xs, ys) in enumerate(train_loader): xs = xs.cuda().unsqueeze(1) # (shape: (batch_size, 1)) ys = ys.cuda().unsqueeze(1) # (shape: (batch_size, 1)) x_features = network.feature_net( xs) # (shape: (batch_size, hidden_dim)) fs = network.predictor_net(x_features, ys) # (shape: (batch_size, 1)) fs = fs.squeeze(1) # (shape: (batch_size)) y_samples_zero, _ = sample_gmm_centered(stds, num_samples=num_samples) y_samples_zero = y_samples_zero.cuda() # (shape: (num_samples, 1)) y_samples_zero = y_samples_zero.squeeze( 1) # (shape: (num_samples)) y_samples = ys + y_samples_zero.unsqueeze( 0) # (shape: (batch_size, num_samples)) y_samples = y_samples.reshape(-1).unsqueeze( 1) # (shape: (batch_size*num_samples, 1)) x_features = x_features.view(xs.size(0), 1, -1).expand( -1, num_samples, -1) # (shape: (batch_size, num_samples, hidden_dim))