def _get_random_data(n, **tkwargs): train_x1 = torch.linspace(0, 0.95, n + 1, **tkwargs) + 0.05 * torch.rand( n + 1, **tkwargs ) train_x2 = torch.linspace(0, 0.95, n, **tkwargs) + 0.05 * torch.rand(n, **tkwargs) train_y1 = torch.sin(train_x1 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x1) train_y2 = torch.cos(train_x2 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x2) return train_x1.unsqueeze(-1), train_x2.unsqueeze(-1), train_y1, train_y2
def _get_random_mt_data(**tkwargs): train_x = torch.linspace(0, 0.95, 10, **tkwargs) + 0.05 * torch.rand(10, **tkwargs) train_y1 = torch.sin(train_x * (2 * math.pi)) + torch.randn_like(train_x) * 0.2 train_y2 = torch.cos(train_x * (2 * math.pi)) + torch.randn_like(train_x) * 0.2 train_i_task1 = torch.full_like(train_x, dtype=torch.long, fill_value=0) train_i_task2 = torch.full_like(train_x, dtype=torch.long, fill_value=1) full_train_x = torch.cat([train_x, train_x]) full_train_i = torch.cat([train_i_task1, train_i_task2]) full_train_y = torch.cat([train_y1, train_y2]) train_X = torch.stack([full_train_x, full_train_i.type_as(full_train_x)], dim=-1) train_Y = full_train_y return train_X, train_Y
def bisect_demo(): """ Bisect the LB/UB on specified columns. The key is to use scatter_() to convert indices into one-hot encodings. """ t1t2 = torch.stack((torch.randn(5, 4), torch.randn(5, 4)), dim=-1) lb, _ = torch.min(t1t2, dim=-1) ub, _ = torch.max(t1t2, dim=-1) print('LB:', lb) print('UB:', ub) # random idxs for testing idxs = torch.randn_like(lb) _, idxs = idxs.max(dim=-1) # <Batch> print('Split idxs:', idxs) idxs = idxs.unsqueeze(dim=-1) # Batch x 1 idxs = torch.zeros_like(lb).byte().scatter_(-1, idxs, 1) # convert into one-hot encoding print('Reorg idxs:', idxs) mid = (lb + ub) / 2.0 lefts_lb = lb lefts_ub = torch.where(idxs, mid, ub) # use the one-hot encoding to call torch.where() rights_lb = torch.where(idxs, mid, lb) # definitely faster than element-wise reassignment rights_ub = ub print('LEFT LB:', lefts_lb) print('LEFT UB:', lefts_ub) print('RIGHT LB:', rights_lb) print('RIGHT UB:', rights_ub) newlb = torch.cat((lefts_lb, rights_lb), dim=0) newub = torch.cat((lefts_ub, rights_ub), dim=0) return newlb, newub
def reparameterize(self, mu, logvar): if self.training: std = torch.exp(0.5*logvar) eps = torch.randn_like(std) return eps.mul(std).add_(mu) else: return mu
def varlen_lstm_backward_setup(forward_output, seed=None): if seed: torch.manual_seed(seed) rnn_utils = torch.nn.utils.rnn sequences = forward_output[0] padded = rnn_utils.pad_sequence(sequences) grad = torch.randn_like(padded) return padded, grad
def forward(self, x): # pylint: disable=arguments-differ mu, logsigma = self.encoder(x) sigma = logsigma.exp() eps = torch.randn_like(sigma) z = eps.mul(sigma).add_(mu) recon_x = self.decoder(z) return recon_x, mu, logsigma
def to_latent(obs, next_obs): """ Transform observations to latent space. :args obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE) :args next_obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE) :returns: (latent_obs, latent_next_obs) - latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE) - next_latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE) """ with torch.no_grad(): obs, next_obs = [ f.upsample(x.view(-1, 3, SIZE, SIZE), size=RED_SIZE, mode='bilinear', align_corners=True) for x in (obs, next_obs)] (obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma) = [ vae(x)[1:] for x in (obs, next_obs)] latent_obs, latent_next_obs = [ (x_mu + x_logsigma.exp() * torch.randn_like(x_mu)).view(BSIZE, SEQ_LEN, LSIZE) for x_mu, x_logsigma in [(obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma)]] return latent_obs, latent_next_obs
def guide(batch, tag, hidden, label): softplus = torch.nn.Softplus() # embedding weight distribution priors embedding_mu = torch.randn_like(net.embedding.weight) embedding_sigma = torch.randn_like(net.embedding.weight) embedding_mu_param = pyro.param("embedding_mu", embedding_mu) embedding_sigma_param = softplus( pyro.param("embedding_sigma", embedding_sigma)) embedding_prior = Normal(loc=embedding_mu_param, scale=embedding_sigma_param) # gru input-hidden weight distribution priors gruihw_mu = torch.randn_like(net.gru.weight_ih_l0) gruihw_sigma = torch.randn_like(net.gru.weight_ih_l0) gruihw_mu_param = pyro.param("gruihw_mu", gruihw_mu) gruihw_sigma_param = softplus(pyro.param("gruihw_sigma", gruihw_sigma)) gruihw_prior = Normal(loc=gruihw_mu_param, scale=gruihw_sigma_param) # gru input-hidden bias distribution priors gruihb_mu = torch.randn_like(net.gru.bias_ih_l0) gruihb_sigma = torch.randn_like(net.gru.bias_ih_l0) gruihb_mu_param = pyro.param("gruihb_mu", gruihb_mu) gruihb_sigma_param = softplus(pyro.param("gruihb_sigma", gruihb_sigma)) gruihb_prior = Normal(loc=gruihb_mu_param, scale=gruihb_sigma_param) # gru hidden-hidden weight distribution priors gruhhw_mu = torch.randn_like(net.gru.weight_hh_l0) gruhhw_sigma = torch.randn_like(net.gru.weight_hh_l0) gruhhw_mu_param = pyro.param("gruhhw_mu", gruhhw_mu) gruhhw_sigma_param = softplus(pyro.param("gruhhw_sigma", gruhhw_sigma)) gruhhw_prior = Normal(loc=gruhhw_mu_param, scale=gruhhw_sigma_param) # gru hidden-hidden bias distribution priors gruhhb_mu = torch.randn_like(net.gru.bias_hh_l0) gruhhb_sigma = torch.randn_like(net.gru.bias_hh_l0) gruhhb_mu_param = pyro.param("gruhhb_mu", gruhhb_mu) gruhhb_sigma_param = softplus(pyro.param("gruhhb_sigma", gruhhb_sigma)) gruhhb_prior = Normal(loc=gruhhb_mu_param, scale=gruhhb_sigma_param) # first fully connected layer weight distribution priors fc1w_mu = torch.randn_like(net.fc1.weight) fc1w_sigma = torch.randn_like(net.fc1.weight) fc1w_mu_param = pyro.param("fc1w_mu", fc1w_mu) fc1w_sigma_param = softplus(pyro.param("fc1w_sigma", fc1w_sigma)) fc1w_prior = Normal(loc=fc1w_mu_param, scale=fc1w_sigma_param) # first fully connected layer bias distribution priors fc1b_mu = torch.randn_like(net.fc1.bias) fc1b_sigma = torch.randn_like(net.fc1.bias) fc1b_mu_param = pyro.param("fc1b_mu", fc1b_mu) fc1b_sigma_param = softplus(pyro.param("fc1b_sigma", fc1b_sigma)) fc1b_prior = Normal(loc=fc1b_mu_param, scale=fc1b_sigma_param) # second fully connected layer weight distribution priors fc2w_mu = torch.randn_like(net.fc2.weight) fc2w_sigma = torch.randn_like(net.fc2.weight) fc2w_mu_param = pyro.param("fc2w_mu", fc2w_mu) fc2w_sigma_param = softplus(pyro.param("fc2w_sigma", fc2w_sigma)) fc2w_prior = Normal(loc=fc2w_mu_param, scale=fc2w_sigma_param) # Output layer bias distribution priors fc2b_mu = torch.randn_like(net.fc2.bias) fc2b_sigma = torch.randn_like(net.fc2.bias) fc2b_mu_param = pyro.param("fc2b_mu", fc2b_mu) fc2b_sigma_param = softplus(pyro.param("fc2b_sigma", fc2b_sigma)) fc2b_prior = Normal(loc=fc2b_mu_param, scale=fc2b_sigma_param) priors = { 'embedding.weight': embedding_prior, 'gru.weight_ih_l0': gruihw_prior, 'gru.bias_ih_l0': gruihb_prior, 'gru.weight_hh_l0': gruhhw_prior, 'gru.bias_hh_l0': gruhhb_prior, 'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior, 'fc2.weight': fc2w_prior, 'fc2.bias': fc2b_prior } lifted_module = pyro.random_module("module", net, priors) return lifted_module()
def reparameterize(self, mu, log_var): std = torch.exp(log_var/2) eps = torch.randn_like(std) return mu + eps * std
def sample_prediction(self, x): mu, sigma = self(x) eps = torch.randn_like(sigma) return mu + sigma * eps
def reparameterize(mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(mu) return mu + eps * std
def reparameterize(self, mu, log_var): std = torch.exp(log_var/2) eps = torch.randn_like(std) z = mu + eps * std return z
def reparameterize(mean, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return mean + eps * std
def reparameterize(self, mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return eps.mul(std).add_(mu)
def calc_msssim(n_images, model, checkpoint): '''calculate image diversity with ms-ssim (multi-scale structural similarity) metric and reconstruction quality with rmse. reconstuctions (x_r), samples (x_p). Args: n_images (int) : number of images to generate to compare against real model (model) : vae model to encode and decode image tensors checkpoint (str) : stem for checkpoint file to load Returns: avg_msssim (float) : average ms-ssim score for generated images (smaller is more diverse) avg_rmse (float) : average root mean square error for recon to real (smaller is more accurate) ''' # dataloader # normalizes to [0-1] trans = transforms.ToTensor() dataset = datasets.CelebA(transforms=trans) dataloader = DataLoader(dataset, batch_size=32, shuffle=True) # load model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utils.load_checkpoint(f'checkpoints/{checkpoint}.pt', model) model = model.to(device) # loop over loader and calc ms-ssim and rmse for each batch until hits n_images image_count = 0 m_scores = [] rmse_scores = [] while image_count < n_images: for images in dataloader: images = images.to(device) # real image recon z_mu, z_log_var = model.encoder(images) z = model.reparameterize(z_mu, z_log_var) x_r = model.decoder(z) # calc rmse # sum of all pixel loss per image rmse = torch.sqrt(F.mse_loss(x_r, images, reduction='sum')) / len(images) rmse_scores.append(rmse.item()) # sampled z_p = torch.randn_like(z) x_p = model.decoder(z_p) # calc ms-ssim from recon to sampled # good model will have low similarity between recon and sampled, indicates not memorizing m = ms_ssim(x_r, x_p, data_range=1.0, size_average=True, win_size=7) m_scores.append(m.item()) # up counter image_count += len(images) # averaged results avg_msssim = sum(m_scores) / len(m_scores) avg_rmse = sum(rmse_scores) / len(rmse_scores) # write output = { 'checkpoint': checkpoint, 'ms_ssim': avg_msssim, 'avg_rmse': avg_rmse } Path(f'assets/{checkpoint}.yaml').write_text(yaml.dump(output))
def optimize_network(args, model, y, mask, mode, **kwargs): assert mode in ['train', 'test'] print(y.shape) # load appropriate hyper-parameters if mode == 'train': n_epochs = args['n_train_epochs'] n_epochs = 4000 batch_size = args['batch_size'] param_init = args['latent_param_init'] elif mode == 'test': n_epochs = args['n_test_epochs'] if args.get('test_batch_size') is not None: batch_size = args['test_batch_size'] else: batch_size = args['batch_size'] param_init = args['test_latent_param_init'] #n_epochs = 1 #batch_size = 50 print(f"Mode: {mode}") print(f"Batch size: {batch_size}") n_points = y.size()[0] # initialize latent variables if param_init == 'pca': pca = PCA(model.latent_size) pca.fit(y.cpu()) latents = torch.tensor(pca.explained_variance_ratio_, dtype=torch.float, device=args['device']) latents = latents.repeat(n_points, 1) print(latents.size()) elif param_init == 'train': assert mode != 'train' print("Initializing latents using training latents as mean", file=sys.stderr) train_latents = kwargs['train_latents'] train_means = torch.mean(train_latents, 0) train_std = torch.std(train_latents, 0) latents = torch.tensor(np.random.normal(train_means, train_std, size=(n_points, model.latent_size)), device=args['device']) else: latents = model.init_latents(n_points, args['device'], param_init) # latent parameters to update latents.requires_grad = True latent_params = [latents] if args['model'] == 'vae_free': # randomly init log_var latent_log_var = torch.randn_like(latents, device=args['device']) latent_log_var.requires_grad = True latent_params.append(latent_log_var) epoch = 0 if mode == 'test': # freeze the network weights model.freeze_hiddens() if mode == 'train': lr = args['net_lr'] latent_lr = args['latent_param_lr'] if args['use_adam']: net_optimizer = optim.Adam(model.parameters(), lr=lr) latent_optimizer = optim.Adam(latent_params, lr=latent_lr) else: net_optimizer = optim.SGD(model.parameters(), lr=lr) latent_optimizer = optim.SGD(latent_params, lr=latent_lr) # for reduce lr on plateau net_scheduler = optim.lr_scheduler.ReduceLROnPlateau(net_optimizer, mode='min', factor=0.5, patience=10, verbose=True) latent_scheduler = optim.lr_scheduler.ReduceLROnPlateau( latent_optimizer, mode='min', factor=0.5, patience=10, verbose=True) optimizers = [net_optimizer, latent_optimizer] schedulers = [net_scheduler, latent_scheduler] print(f"Optimizer: {net_optimizer}, {latent_optimizer}", file=sys.stderr) elif mode == 'test': latent_lr = args['test_latent_param_lr'] if args['use_adam']: optimizer = optim.Adam(latent_params, lr=latent_lr) else: optimizer = optim.SGD(latent_params, lr=latent_lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True) optimizers = [optimizer] schedulers = [scheduler] print(f"Test optimizer: {optimizer}", file=sys.stderr) # start optimization loop start_time = time.time() losses = [] while True: epoch += 1 order = np.random.permutation(n_points) cumu_loss = 0 cumu_total_loss = 0 cumu_kl_loss = 0 # n_batches = n_points // batch_size # model.set_verbose(False) for i in range(n_batches): # model.zero_grad() for op in optimizers: op.zero_grad() # net_optimizer.zero_grad() # latent_optimizer.zero_grad() idxes = order[i * batch_size:(i + 1) * batch_size] if args['model'] == 'vae_free': pred_y = model(latents[idxes], latent_log_var[idxes]) elif args['sfm_transform']: pred_y, transform_mat = model(latents[idxes]) else: pred_y = model(latents[idxes]) # model.set_verbose(False) masked_train = y[idxes] * mask[idxes] # loss with masking loss = torch_mse_mask(y[idxes], pred_y, mask[idxes]) if args['kl']: if args['model'] == 'vae': z_var = torch.full_like(latents[idxes], args['log_var']) kl_loss = 0.5 * torch.sum( torch.exp(z_var) + latents[idxes]**2 - 1. - z_var) / batch_size total_loss = loss + args['ratio_kl'] * kl_loss elif args['model'] == 'vae_free': kl_loss = 0.5 * torch.sum( torch.exp(latent_log_var[idxes]) + latents[idxes]**2 - 1. - latent_log_var[idxes]) kl_loss /= batch_size total_loss = loss + args['ratio_kl'] * kl_loss else: raise NotImplementedError else: kl_loss = 0. total_loss = loss # loss = loss_fn(pred_y, train_y[idxes] # loss *= train_mask[idxes] cumu_total_loss += float(total_loss) cumu_loss += float(loss) cumu_kl_loss += float(kl_loss) total_loss.backward() for op in optimizers: op.step() # net_optimizer.step() # latent_optimizer.step() curr_time = time.time() - start_time avg_loss = cumu_loss / n_batches avg_kl_loss = cumu_kl_loss / n_batches avg_total_loss = cumu_total_loss / n_batches print( "Epoch {} - Average loss: {:.6f}, Cumulative loss: {:.6f}, KL loss: {:.6f}, Average total loss: {:.6f} ({:.2f} s)" .format(epoch, avg_loss, cumu_loss, avg_kl_loss, avg_total_loss, curr_time), file=sys.stderr) losses.append( [float(avg_loss), float(avg_kl_loss), float(avg_total_loss)]) # early stopping etc. if epoch >= n_epochs: print("Max number of epochs reached!", file=sys.stderr) break if args.get('reduce', False): for sch in schedulers: sch.step(cumu_loss) # net_scheduler.step(cumu_loss) # latent_scheduler.step(cumu_loss) sys.stderr.flush() sys.stdout.flush() if mode == 'train': # return final latent variables, to possibly initialize during testing if args['model'] == 'vae_free': train_latents = latents, latent_log_var else: train_latents = latents return train_latents, losses elif mode == 'test': print("Final test loss: {}".format(losses[-1]), file=sys.stderr) # get final predictions to get loss wrt unmasked test data all_pred = [] with torch.no_grad(): idxes = np.arange(n_points) n_batches = math.ceil(n_points / batch_size) for i in range(n_batches): idx = idxes[i * batch_size:(i + 1) * batch_size] if args['model'] == 'vae_free': pred_y = model(latents[idx], latent_log_var[idx]) elif args['sfm_transform']: pred_y, transform_mat = model(latents[idx]) else: pred_y = model(latents[idx]) all_pred.append(pred_y) all_pred = torch.cat(all_pred, dim=0) if kwargs['clean_y'] is not None: clean_y = kwargs['clean_y'] #final_test_loss = float(loss_fn(all_pred * test_mask, clean_y * test_mask)) #final_clean_loss = float(loss_fn(all_pred, clean_y)) final_test_loss = float(torch_mse_mask(clean_y, all_pred, mask)) final_clean_loss = float( torch_mse_mask(clean_y, all_pred, torch.ones_like(all_pred))) print("Masked test loss: {}".format(final_test_loss), file=sys.stderr) print("Clean test loss: {}".format(final_clean_loss), file=sys.stderr) mse = torch.mean(torch.mean((all_pred - clean_y)**2, -1), -1) print("Manual calculation: {}".format(mse), file=sys.stderr) if args['model'] == 'vae_free': test_latents = latents, latent_log_var else: test_latents = latents return losses, (final_test_loss, final_clean_loss), all_pred, test_latents
def mix_match(X, U, eval_net, K, T, alpha, mixup_mode, aug_factor): # X is labeled data of size BATCH_SIZE, and U is unlabeled data # X is list of tuples (data, label), and U is list of data # where data and label are of shape (C, D, H, W), numpy array. C of data is 1 and C of label is 2 (one hot) b = len(X) #step 1: Augmentation X_cap = [(augmentation(x[0], aug_factor), x[1]) for x in X] #shape unchanged #U_cap = [[augmentation(u, aug_factor) for i in range(K)] for u in U] #U_cap is a list (length b) of list (length K) U = torch.from_numpy(np.array(U)) #[b, 1, D, H, W] if GPU: U = U.cuda() U_cap = U.repeat(K, 1, 1, 1, 1) #[K*b, 1, D, H, W] U_cap += torch.clamp(torch.randn_like(U_cap) * 0.1, -0.2, 0.2) #augmented. #step 2: label guessing with torch.no_grad(): Y_u = eval_net(U_cap) Y_u = F.softmax(Y_u, dim=1) guessed = torch.zeros(U.size()).repeat(1, 2, 1, 1, 1) #empty label [b, 2, D, H, W] if GPU: guessed = guessed.cuda() for i in range(K): guessed += Y_u[i * b:(i + 1) * b] guessed /= K #sharpening guessed = guessed**(1 / T) guessed = guessed / guessed.sum(dim=1, keepdim=True) guessed = guessed.repeat(K, 1, 1, 1, 1) guessed = guessed.detach().cpu().numpy() #shape [b,2,D,H,W] U_cap = U_cap.detach().cpu().numpy() U_cap = list(zip(U_cap, guessed)) ## Now we have X_cap ,list of (data, label) of length b, U_cap, list of (data, guessed_label) of length k*b #step 3: MixUp #original paper mathod x_mixup_mode, u_mixup_mode = mixup_mode[0], mixup_mode[1] W = X_cap + U_cap #length = b+b*k random.shuffle(W) if x_mixup_mode == 'w': X_prime = [mix_up(X_cap[i], W[i], alpha) for i in range(b)] elif x_mixup_mode == 'x': idxs = np.random.permutation(range(b)) X_prime = [mix_up(X_cap[i], X_cap[idxs[i]], alpha) for i in range(b)] elif x_mixup_mode == 'u': idxs = np.random.permutation(range(b * K))[:b] X_prime = [mix_up(X_cap[i], U_cap[idxs[i]], alpha) for i in range(b)] elif x_mixup_mode == '_': X_prime = X_cap else: raise ValueError('wrong mixup_mode') if u_mixup_mode == 'w': U_prime = [mix_up(U_cap[i], W[b + i], alpha) for i in range(b * K)] elif u_mixup_mode == 'x': idxs = np.random.permutation(range(b * K)) % b U_prime = [ mix_up(U_cap[i], X_cap[idxs[i]], alpha) for i in range(b * K) ] elif u_mixup_mode == 'u': idxs = np.random.permutation(range(b * K)) U_prime = [ mix_up(U_cap[i], U_cap[idxs[i]], alpha) for i in range(b * K) ] elif u_mixup_mode == '_': U_prime = U_cap else: raise ValueError('wrong mixup_mode') #if DEBUG: #save_as_image(np.array([x[0] for x in U_prime]), f"../debug_output/u_prime_data") #save_as_image(np.array([x[1][[1], :, :, :] for x in U_prime]), f"../debug_output/u_prime_label") return X_prime, U_prime
""" This is getting started with PyTorch REF --- https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py """ from __future__ import print_function import torch print("x----Construct an empty matrix----x") x = torch.empty(5, 3) print(x) print("x----Construct a zero matrix----x") x = torch.zeros(5, 3, dtype=torch.long) print(x) print("x----Construct a tensor from data----x") x = torch.tensor([5.5, 3]) print(x) print("x----Create a tensor from a tensor----x") x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes print(x) x = torch.randn_like(x, dtype=torch.float) # override dtype! print(x) # result has the same size print("x----Get size----x") print(x.size())
def __call__(self, x): return x + torch.randn_like(x) * self.std
def reparameterize(self, mu, logvar): #done std = torch.exp(0.5*logvar) u = torch.randn_like(std) return mu + u*std
def get_target_action(self, next_obs_batch: torch.tensor) -> torch.tensor: target_action = self.policy_target(next_obs_batch) noise = (torch.randn_like(target_action) * self.noise_std).clamp( -self.noise_clip, self.noise_clip) return target_action + noise
def attack(self, model: nn.Module, inputs: torch.Tensor, labels_true: torch.Tensor) -> torch.Tensor: # gaussian = GaussianBlurConv(channels=3).to(DEVICE) batch_size = inputs.shape[0] delta = torch.zeros_like(inputs, requires_grad=True) # setup optimizer optimizer = optim.SGD([delta], lr=1, momentum=self.momentum) # for choosing best results best_loss = 1e4 * torch.ones( inputs.size(0), dtype=torch.float, device=self.device) best_delta = torch.zeros_like(inputs) for step in range(self.steps): if self.max_norm: delta.data.clamp_(-self.max_norm, self.max_norm) if self.quantize: delta.data.mul_(self.levels - 1).round_().div_(self.levels - 1) adv = inputs + delta div_adv = self.input_diversity(adv, low=self.low) logits = model(div_adv) ce_loss_true = F.cross_entropy(logits, labels_true, reduction='none') # ce_loss_target = F.cross_entropy(logits, labels_target, reduction='none') loss = self.loss_amp - ce_loss_true # if self.loss_type == 'psnr': # psnrloss = self.psnr(delta) # loss -= 0.05*psnrloss # if (step+1)%5 ==0: # print("step:", str(step),": loss = ",str(torch.mean(loss).item()),": lpipscore = ",str(psnrloss.item()),": ce_loss_true = ",str(torch.mean(ce_loss_true).item())) # if self.need_fid == True: # fid = calculate_fid_given_paths(adv, inputs, DEVICE, 2048) # loss += 0.5*fid # print(str(step)," fid:",str(fid.item()), "ce_loss_true",str(torch.mean(ce_loss_true).item())) # loss += max(lpipscore,0.2)*10*self.loss_amp is_better = loss < best_loss best_loss[is_better] = loss[is_better] best_delta[is_better] = delta.data[is_better] loss = torch.mean(loss) optimizer.zero_grad() loss.backward() # renorm gradient grad_norms = delta.grad.view(batch_size, -1).norm(p=2, dim=1) delta.grad.div_(grad_norms.view(-1, 1, 1, 1)) # avoid nan or inf if gradient is 0 if (grad_norms == 0).any(): delta.grad[grad_norms == 0] = torch.randn_like( delta.grad[grad_norms == 0]) # if self.need_gaussianBlur: # delta.data = gaussian(delta.data) optimizer.step() # avoid out of bound delta.data.add_(inputs) delta.data.clamp_(0, 1).sub_(inputs) if self.return_delta: return best_delta else: advs = inputs + best_delta return advs
def sample(self): self.sampled = self.mean + self.stddev * torch.randn_like(self.mean)
def calculate_loss(self, t1, t2, t3): ## Because the loss is based on variational inference, we need to ## draw samples from the variational distribution in order to estimate ## the loss function. ## sample a state at time t3 t3_z_mu, t3_z_logsigma = self.b_to_z(self.b[:, t3, :]) #t3_z_logsigma = torch.clamp(t3_z_logsigma, min = -20, max = 20) t3_z_epsilon = torch.randn_like(t3_z_mu) t3_z = t3_z_mu + torch.exp(t3_z_logsigma) * t3_z_epsilon ## sample a state at time t2 (see the reparametralization trick is used) t2_qs_z_mu, t2_qs_z_logsigma = self.infer_z( torch.cat( (t3_z.new_zeros(self.b[:, t2, :].size()), self.b[:, t2, :], self.b[:, t3, :], t3_z.new_zeros(t3_z.size()), t3_z), dim=-1)) #t2_qs_z_logsigma = torch.clamp(t2_qs_z_logsigma, min = -20, max = 20) t2_qs_z_epsilon = torch.randn_like(t2_qs_z_mu) t2_qs_z = t2_qs_z_mu + torch.exp(t2_qs_z_logsigma) * t2_qs_z_epsilon t2_z_mu, t2_z_logsigma = self.b_to_z(self.b[:, t2, :]) #t2_z_logsigma = torch.clamp(t2_z_logsigma, min = -20, max = 20) t2_z_epsilon = torch.randn_like(t2_z_mu) t2_z = t2_z_mu + torch.exp(t2_z_logsigma) * t2_z_epsilon ## sample a state at time t1 ## infer state at time t1 based on states at time t2 t1_qs_z_mu, t1_qs_z_logsigma = self.infer_z( torch.cat((self.b[:, t1, :], self.b[:, t2, :], self.b[:, t3, :], t2_z, t3_z), dim=-1)) #t1_qs_z_logsigma = torch.clamp(t1_qs_z_logsigma, min = -20, max = 20) t1_qs_z_epsilon = torch.randn_like(t1_qs_z_mu) t1_qs_z = t1_qs_z_mu + torch.exp(t1_qs_z_logsigma) * t1_qs_z_epsilon #### After sampling states z from the variational distribution, we can calculate #### the loss. ## state distribution at time t1 based on belief at time 1 t1_pb_z_mu, t1_pb_z_logsigma = self.b_to_z(self.b[:, t1, :]) #t1_pb_z_logsigma = torch.clamp(t1_pb_z_logsigma, min = -20, max = 20) ## state distribution at time t2 based on states at time t1 and state transition t2_t_z_mu, t2_t_z_logsigma = self.transition_z(t1_qs_z) #t2_t_z_logsigma = torch.clamp(t2_t_z_logsigma, min = -20, max = 20) ## state distribution at time t3 based on states at time t1, t2 and state transition t3_t_z_mu, t3_t_z_logsigma = self.transition_z(t2_qs_z) #t3_t_z_logsigma = torch.clamp(t3_t_z_logsigma, min = -20, max = 20) ## observation distribution at time t2 based on state at time t2 t3_x_prob = self.z_to_x(t3_z).view(self.batch_size, -1) #+ 1e-8 t2_x_prob = self.z_to_x(t2_z).view(self.batch_size, -1) #+ 1e-8 #### start calculating the loss #### KL divergence between z distribution at time t1 based on variational distribution #### (inference model) and z distribution at time t1 based on belief. #### This divergence is between two normal distributions and it can be calculated analytically ## KL divergence between t1_l2_pb_z, and t1_l2_qs_z #loss = 0.5*torch.sum(((t1_pb_z_mu - t1_qs_z)/torch.exp(t1_pb_z_logsigma))**2,-1) + \ #torch.sum(t1_pb_z_logsigma, -1) - torch.sum(t1_qs_z_logsigma, -1) loss = kl_div_gaussian(t1_qs_z_mu, t1_qs_z_logsigma, t1_pb_z_mu, t1_pb_z_logsigma) #.mean() a = kl_div_gaussian(t1_qs_z_mu, t1_qs_z_logsigma, t1_pb_z_mu, t1_pb_z_logsigma) print("kl loss 1: ", a) #### The following four terms estimate the KL divergence between the z distribution at time t2 #### based on variational distribution (inference model) and z distribution at time t2 based on transition. #### In contrast with the above KL divergence for z distribution at time t1, this KL divergence #### can not be calculated analytically because the transition distribution depends on z_t1, which is sampled #### after z_t2. Therefore, the KL divergence is estimated using samples ## state log probabilty at time t2 based on belief #loss += torch.sum(-0.5*t2_z_epsilon**2 - 0.5*t2_z_epsilon.new_tensor(2*np.pi) - t2_z_logsigma, dim = -1) loss += kl_div_gaussian(t2_qs_z_mu, t2_qs_z_logsigma, t2_t_z_mu, t2_t_z_logsigma) #.mean() a = kl_div_gaussian(t2_qs_z_mu, t2_qs_z_logsigma, t2_t_z_mu, t2_t_z_logsigma) print("kl loss 2: ", a) ## state log probabilty at time t2 based on transition #loss += torch.sum(0.5*((t2_z - t2_t_z_mu)/torch.exp(t2_t_z_logsigma))**2 + 0.5*t2_z.new_tensor(2*np.pi) + t2_t_z_logsigma, -1) loss += gaussian_log_prob(t3_z_mu, t3_z_logsigma, t3_z) #.mean() a = gaussian_log_prob(t3_z_mu, t3_z_logsigma, t3_z) print("gaussian loss 1: ", a) loss += -gaussian_log_prob(t3_t_z_mu, t3_t_z_logsigma, t3_z) #.mean() a = gaussian_log_prob(t3_t_z_mu, t3_t_z_logsigma, t3_z) print("gaussian loss 2: ", a) #loss -= F.binary_cross_entropy(t3_x_prob, self.x[:,t3,:]) ## observation prob at time t2 #print("self.x size: ", self.x.size()) self.x = self.x.view(self.batch_size, 20, -1) loss += torch.sum((self.x[:, t3, :] - t3_x_prob)**2, dim=-1) a = torch.sum((self.x[:, t3, :] - t3_x_prob)**2, dim=-1) print("reconstruct loss 1", a) loss += torch.sum((self.x[:, t2, :] - t2_x_prob)**2, dim=-1) a = torch.sum((self.x[:, t2, :] - t2_x_prob)**2, dim=-1) print("reconstruct loss 2", a) #loss += -torch.sum(self.x[:,t3,:]*torch.log(t3_x_prob) + (1-self.x[:,t3,:])*torch.log(1-t3_x_prob), -1) #loss += -torch.sum(self.x[:,t2,:]*torch.log(t2_x_prob) + (1-self.x[:,t2,:])*torch.log(1-t2_x_prob), -1) #loss += F.binary_cross_entropy(t3_x_prob, self.x[:,t3,:], reduction = 'sum') / self.batch_size loss = torch.mean(loss) return loss
def default_target_policy_smoothing_func(batch_action): """Add noises to actions for target policy smoothing.""" noise = torch.clamp(0.2 * torch.randn_like(batch_action), -0.5, 0.5) return torch.clamp(batch_action + noise, -1, 1)
def reparameterize(log_mix, mean, log_std): epsilon = torch.randn_like(log_std.exp()) recon = torch.sum(log_mix.exp() * (mean + log_std.exp() * epsilon), dim=1) recon = recon.view(-1, 32) return recon
def __init__(self, root: str, normal_class: int = 0, data_augmentation: bool = False, normalize: bool = False, outlier_exposure: bool = False, oe_n_classes: int = 100, seed: int = 0): super().__init__(root) self.image_size = (3, 32, 32) self.n_classes = 2 # 0: normal, 1: outlier self.shuffle = True random.seed(seed) # set seed if outlier_exposure: self.normal_classes = None self.outlier_classes = list(range(0, 100)) self.known_outlier_classes = tuple( random.sample(self.outlier_classes, oe_n_classes)) else: # Define normal and outlier classes self.normal_classes = tuple([normal_class]) self.outlier_classes = list(range(0, 100)) self.outlier_classes.remove(normal_class) self.outlier_classes = tuple(self.outlier_classes) # CIFAR-100 preprocessing: feature scaling to [0, 1], data normalization, and data augmentation train_transform = [] test_transform = [] if data_augmentation: # only augment training data train_transform += [ transforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01, hue=0.01), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomCrop(32, padding=4) ] train_transform += [transforms.ToTensor()] test_transform += [transforms.ToTensor()] if data_augmentation: train_transform += [ transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x)) ] if normalize: train_transform += [ transforms.Normalize((0.491373, 0.482353, 0.446667), (0.247059, 0.243529, 0.261569)) ] test_transform += [ transforms.Normalize((0.491373, 0.482353, 0.446667), (0.247059, 0.243529, 0.261569)) ] train_transform = transforms.Compose(train_transform) test_transform = transforms.Compose(test_transform) target_transform = transforms.Lambda( lambda x: int(x in self.outlier_classes)) # Get train set train_set = MyCIFAR100(root=self.root, train=True, transform=train_transform, target_transform=target_transform, download=True) if outlier_exposure: idx = np.argwhere( np.isin(np.array(train_set.targets), self.known_outlier_classes)) idx = idx.flatten().tolist() train_set.semi_targets[idx] = -1 * torch.ones( len(idx)).long() # set outlier exposure labels # Subset train_set to selected classes self.train_set = Subset(train_set, idx) self.train_set.shuffle_idxs = False self.test_set = None else: # Subset train_set to normal_classes idx = np.argwhere( np.isin(np.array(train_set.targets), self.normal_classes)) idx = idx.flatten().tolist() train_set.semi_targets[idx] = torch.zeros(len(idx)).long() self.train_set = Subset(train_set, idx) # Get test set self.test_set = MyCIFAR100(root=self.root, train=False, transform=test_transform, target_transform=target_transform, download=True)
def norm_one_gaussian(in_tensor): tensor_size = torch.numel(in_tensor) return torch.randn_like(in_tensor) / np.sqrt(tensor_size)
def simple_backward_setup(output, seed=None): assert isinstance(output, torch.Tensor) if seed: torch.manual_seed(seed) grad_output = torch.randn_like(output) return output, grad_output
def reparameterize(self, mu, logvar): std = torch.exp(0.5*logvar) eps = torch.randn_like(std) return eps.mul(std).add_(mu)
def reparameterize(self, mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return mu + std * eps
def do_train(self, paths, dataset, optimiser, epochs, batch_size, step, lr=1e-4, valid_index=[], use_half=False, do_clip=False): if use_half: import apex optimiser = apex.fp16_utils.FP16_Optimizer(optimiser, dynamic_loss_scale=True) for p in optimiser.param_groups: p['lr'] = lr criterion = nn.NLLLoss().cuda() k = 0 saved_k = 0 pad_left = self.pad_left() pad_left_encoder = self.pad_left_encoder() pad_left_decoder = self.pad_left_decoder() if self.noise_x: extra_pad_right = 127 else: extra_pad_right = 0 pad_right = self.pad_right() + extra_pad_right window = 16 * self.total_scale() logger.log( f'pad_left={pad_left_encoder}|{pad_left_decoder}, pad_right={pad_right}, total_scale={self.total_scale()}' ) for e in range(epochs): trn_loader = DataLoader( dataset, collate_fn=lambda batch: env.collate_samples( pad_left, window, pad_right, batch), batch_size=16, num_workers=0, shuffle=True, pin_memory=True) start = time.time() running_loss_c = 0. running_loss_f = 0. running_loss_vq = 0. running_loss_vqc = 0. running_entropy = 0. running_max_grad = 0. running_loss_ce_label = 0. running_max_grad_name = "" iters = len(trn_loader) # enumerate mfcc, mel, quant for search, mfcc for query, and label # search_wave16 : quant for i, (search_wave16, search_mel16, query_mfcc16, label) in enumerate(trn_loader): search_wave16 = search_wave16.cuda() search_mel16 = search_mel16.cuda() query_mfcc16 = query_mfcc16.cuda() label = label.cuda() coarse = (search_wave16 + 2**15) // 256 fine = (search_wave16 + 2**15) % 256 coarse_f = coarse.float() / 127.5 - 1. fine_f = fine.float() / 127.5 - 1. total_f = (search_wave16.float() + 0.5) / 32767.5 if self.noise_y: noisy_f = total_f * ( 0.02 * torch.randn(total_f.size(0), 1).cuda() ).exp() + 0.003 * torch.randn_like(total_f) else: noisy_f = total_f if use_half: coarse_f = coarse_f.half() fine_f = fine_f.half() noisy_f = noisy_f.half() x = torch.cat([ coarse_f[:, pad_left - pad_left_decoder:-pad_right].unsqueeze(-1), fine_f[:, pad_left - pad_left_decoder:-pad_right].unsqueeze(-1), coarse_f[:, pad_left - pad_left_decoder + 1:1 - pad_right].unsqueeze(-1), ], dim=2) y_coarse = coarse[:, pad_left + 1:1 - pad_right] y_fine = fine[:, pad_left + 1:1 - pad_right] if self.noise_x: # Randomly translate the input to the encoder to encourage # translational invariance total_len = coarse_f.size(1) translated = [] for j in range(coarse_f.size(0)): shift = random.randrange(256) - 128 translated.append( noisy_f[j, pad_left - pad_left_encoder + shift:total_len - extra_pad_right + shift]) translated = torch.stack(translated, dim=0) else: translated = noisy_f[:, pad_left - pad_left_encoder:] p_cf, vq_pen, encoder_pen, entropy, prediction = self.forward( x, translated, search_mel16, query_mfcc16, label) p_c, p_f = p_cf loss_c = criterion(p_c.transpose(1, 2).float(), y_coarse) loss_f = criterion(p_f.transpose(1, 2).float(), y_fine) ce_loss = nn.BCELoss(nn.Sigmoid(prediction), label) encoder_weight = 0.01 * min(1, max(0.1, step / 1000 - 1)) loss = loss_c + loss_f + vq_pen + encoder_weight * encoder_pen + ce_loss optimiser.zero_grad() if use_half: optimiser.backward(loss) if do_clip: raise RuntimeError( "clipping in half precision is not implemented yet" ) else: loss.backward() if do_clip: max_grad = 0 max_grad_name = "" for name, param in self.named_parameters(): if param.grad is not None: param_max_grad = param.grad.data.abs().max( ) if param_max_grad > max_grad: max_grad = param_max_grad max_grad_name = name if 1000000 < param_max_grad: logger.log( f'Very large gradient at {name}: {param_max_grad}' ) if 100 < max_grad: for param in self.parameters(): if param.grad is not None: if 1000000 < max_grad: param.grad.data.zero_() else: param.grad.data.mul_(100 / max_grad) if running_max_grad < max_grad: running_max_grad = max_grad running_max_grad_name = max_grad_name if 100000 < max_grad: torch.save(self.state_dict(), "bad_model.pyt") raise RuntimeError( "Aborting due to crazy gradient (model saved to bad_model.pyt)" ) optimiser.step() running_loss_c += loss_c.item() running_loss_f += loss_f.item() running_loss_vq += vq_pen.item() running_loss_vqc += encoder_pen.item() running_entropy += entropy running_loss_ce_label += ce_loss.item() self.after_update() speed = (i + 1) / (time.time() - start) avg_loss_c = running_loss_c / (i + 1) avg_loss_f = running_loss_f / (i + 1) avg_loss_vq = running_loss_vq / (i + 1) avg_loss_vqc = running_loss_vqc / (i + 1) avg_entropy = running_entropy / (i + 1) avg_loss_ce = running_loss_ce_label / (i + 1) step += 1 k = step // 1000 # // track cross entropy loss as well logger.status( f'Epoch: {e + 1}/{epochs} -- Batch: {i + 1}/{iters} -- Loss: c={avg_loss_c:#.4} ' f'ce_label_loss={avg_loss_ce:#.4} f={avg_loss_f:#.4} vq={avg_loss_vq:#.4} ' f'vqc={avg_loss_vqc:#.4} -- Entropy: {avg_entropy:#.4} -- Grad: ' f'{running_max_grad:#.1} {running_max_grad_name} Speed: {speed:#.4} steps/sec -- Step: {k}k ' ) os.makedirs(paths.checkpoint_dir, exist_ok=True) torch.save(self.state_dict(), paths.model_path()) np.save(paths.step_path(), step) logger.log_current_status() logger.log( f' <saved>; w[0][0] = {self.overtone.wavernn.gru.weight_ih_l0[0][0]}' ) if k > saved_k + 50: torch.save(self.state_dict(), paths.model_hist_path(step)) saved_k = k self.do_generate(paths, step, optimiser, dataset.path, valid_index)
def safe_jac(cx): jac = torch.autograd.functional.jacobian(func, cx) jac = torch.randn_like(jac) * 1e-7 + jac return jac
############################################################### # Construct a tensor directly from data: x = torch.tensor([5.5, 3]) print(x) ############################################################### # or create a tensor based on an existing tensor. These methods # will reuse properties of the input tensor, e.g. dtype, unless # new values are provided by user x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes print(x) x = torch.randn_like(x, dtype=torch.float) # override dtype! print(x) # result has the same size ############################################################### # Get its size: print(x.size()) ############################################################### # .. note:: # ``torch.Size`` is in fact a tuple, so it supports all tuple operations. # # Operations # ^^^^^^^^^^ # There are multiple syntaxes for operations. In the following # example, we will take a look at the addition operation.
exp.append_episode(*ret, policy_params=params_) exp.save(results_filename) if it < n_rnd - 1: continue ps_it = it - n_rnd + 1 def on_iteration(i, loss, states, actions, rewards, discount): writer.add_scalar('mc_pilco/episode_%d/training loss' % ps_it, loss, i) if i % 100 == 0: states = states.transpose(0, 1).cpu().detach().numpy() actions = actions.transpose(0, 1).cpu().detach().numpy() rewards = rewards.transpose(0, 1).cpu().detach().numpy() utils.plot_trajectories(states, actions, rewards, plot_samples=False) # train agent agent.fit(exp, H, 120, batch_size=N_particles) # plot rollout x0 = torch.tensor(exp.sample_states(N_particles, timestep=0)).to( agent.dyn.X.device).float() x0 = x0 + 1e-1 * x0.std(0) * torch.randn_like(x0) x0 = x0.detach() utils.plot_rollout(x0, agent.dyn, agent.actor_target, H) writer.add_scalar('robot/evaluation_loss', torch.tensor(ret[2]).sum(), ps_it + 1)
# fig, ax = plt.subplots(1, 2, figsize=(10, 5)) # ax[0].imshow(np.angle(psi_model[0].cpu())) # ax[1].imshow(np.abs(psi_model[0].cpu())) # plt.show() # %% Ap = Ap0.cuda() q = q.cuda() # %% T = T1.unsqueeze(0).cuda() r = th.from_numpy(r1).cuda() r[1:] += th.randn_like(r[1:]) * 3 r[r < 0] = 0 r[r > 30] = 30 psi_model = psi_model.unsqueeze(0) a_target = A(T, psi_model, r) I_target = a_target**2 #%% print(f'psi_model norm: {th.norm(psi_model)**2}') print(f'I_target norm: {th.sum(I_target[0])}') #%% f, ax = plt.subplots() ax.imshow(a_target[2].cpu()) plt.show() #%% plotmosaic(fftshift(a_target.cpu().numpy(), (1, 2)), cmap='viridis')
def reparameterize(self, mu, std): eps = torch.randn_like(std) return eps.mul(std).add_(mu)
def test_symeig(self): lazy_tensor = self.create_lazy_tensor().detach().requires_grad_(True) lazy_tensor_copy = lazy_tensor.clone().detach().requires_grad_(True) evaluated = self.evaluate_lazy_tensor(lazy_tensor_copy) # Perform forward pass evals_unsorted, evecs_unsorted = lazy_tensor.symeig(eigenvectors=True) evecs_unsorted = evecs_unsorted.evaluate() # since LazyTensor.symeig does not sort evals, we do this here for the check evals, idxr = torch.sort(evals_unsorted, dim=-1, descending=False) evecs = torch.gather(evecs_unsorted, dim=-1, index=idxr.unsqueeze(-2).expand( evecs_unsorted.shape)) evals_actual, evecs_actual = torch.symeig(evaluated.double(), eigenvectors=True) evals_actual = evals_actual.to(dtype=evaluated.dtype) evecs_actual = evecs_actual.to(dtype=evaluated.dtype) # Check forward pass self.assertAllClose(evals, evals_actual, rtol=1e-4, atol=1e-3) lt_from_eigendecomp = evecs @ torch.diag_embed( evals) @ evecs.transpose(-1, -2) self.assertAllClose(lt_from_eigendecomp, evaluated, rtol=1e-4, atol=1e-3) # if there are repeated evals, we'll skip checking the eigenvectors for those any_evals_repeated = False evecs_abs, evecs_actual_abs = evecs.abs(), evecs_actual.abs() for idx in itertools.product( *[range(b) for b in evals_actual.shape[:-1]]): eval_i = evals_actual[idx] if torch.unique(eval_i.detach()).shape[-1] == eval_i.shape[ -1]: # detach to avoid pytorch/pytorch#41389 self.assertAllClose(evecs_abs[idx], evecs_actual_abs[idx], rtol=1e-4, atol=1e-3) else: any_evals_repeated = True # Perform backward pass symeig_grad = torch.randn_like(evals) ((evals * symeig_grad).sum()).backward() ((evals_actual * symeig_grad).sum()).backward() # Check grads if there were no repeated evals if not any_evals_repeated: for arg, arg_copy in zip(lazy_tensor.representation(), lazy_tensor_copy.representation()): if arg_copy.requires_grad and arg_copy.is_leaf and arg_copy.grad is not None: self.assertAllClose(arg.grad, arg_copy.grad, rtol=1e-4, atol=1e-3) # Test with eigenvectors=False _, evecs = lazy_tensor.symeig(eigenvectors=False) self.assertIsNone(evecs)
def reparameterization(mu, logvar): std = torch.exp(logvar/2) eps = torch.randn_like(std) z = mu + std*eps
def test_cuda(self, test_case): if not TEST_CUDA or not self.should_test_cuda: raise unittest.SkipTest('Excluded from CUDA tests') try: cpu_input = self._get_input() type_map = {'torch.DoubleTensor': torch.cuda.FloatTensor} gpu_input = to_gpu(cpu_input, type_map=type_map) cpu_module = self.constructor(*self.constructor_args) gpu_module = self.constructor(*self.constructor_args).float().cuda() cpu_param = test_case._get_parameters(cpu_module) gpu_param = test_case._get_parameters(gpu_module) for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]): gpu_p.data.copy_(cpu_p) test_case._zero_grad_input(cpu_input) test_case._zero_grad_input(gpu_input) test_case._zero_grad_parameters(cpu_module) test_case._zero_grad_parameters(gpu_module) cpu_output = test_case._forward(cpu_module, cpu_input) gpu_output = test_case._forward(gpu_module, gpu_input) test_case.assertEqual(cpu_output, gpu_output, self.precision) # Run backwards on CPU and GPU and compare results for i in range(5): cpu_gradOutput = cpu_output.clone().normal_() gpu_gradOutput = cpu_gradOutput.type('torch.cuda.FloatTensor') cpu_gradInput = test_case._backward(cpu_module, cpu_input, cpu_output, cpu_gradOutput) gpu_gradInput = test_case._backward(gpu_module, gpu_input, gpu_output, gpu_gradOutput) test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision) for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]): test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision) # Run double-backwards on CPU and GPU and compare results if self.check_gradgrad and not self.FIXME_no_cuda_gradgrad_comparison: cpu_output = cpu_module(cpu_input) gpu_output = gpu_module(gpu_input) cpu_gradOutput = torch.randn_like(cpu_output, requires_grad=True) gpu_gradOutput = cpu_gradOutput.type_as(gpu_output).detach() gpu_gradOutput.requires_grad = True cpu_gradInputs = torch.autograd.grad( cpu_output, (cpu_input,) + tuple(cpu_module.parameters()), cpu_gradOutput, create_graph=True) gpu_gradInputs = torch.autograd.grad( gpu_output, (gpu_input,) + tuple(gpu_module.parameters()), gpu_gradOutput, create_graph=True) for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs): test_case.assertEqual(cpu_d_i, gpu_d_i, self.precision) # We mix output into the second backwards computation so that # torch.autograd.grad doesn't complain that some inputs # are unreachable (which can happen if you differentiate # only on the gradient. cpu_gg = torch.autograd.grad( cpu_output.sum() + sum(map(lambda x: x.sum(), cpu_gradInputs)), (cpu_input, cpu_gradOutput) + tuple(cpu_module.parameters()), retain_graph=True) gpu_gg = torch.autograd.grad( gpu_output.sum() + sum(map(lambda x: x.sum(), gpu_gradInputs)), (gpu_input, gpu_gradOutput) + tuple(gpu_module.parameters()), retain_graph=True) test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision) for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg): test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision) self.test_noncontig(test_case, gpu_module, gpu_input) except NotImplementedError: pass # TODO: remove this after CUDA scatter_ is implemented except AttributeError as e: if len(e.args) == 1 and "'FloatTensor' object has no attribute 'scatter_'" in e.args[0]: pass else: raise
def __init__(self, mu, sigma): super().__init__() self.d = torch.distributions.Normal(mu, sigma) self.x = geoopt.ManifoldParameter(torch.randn_like(mu), manifold=geoopt.Stiefel())
def get_probe(self, real_data, fake_data): return real_data + self.scale * torch.randn_like(real_data)
def __init__(self, mu, sigma): super().__init__() self.d = torch.distributions.Normal(mu, sigma) self.x = torch.nn.Parameter(torch.randn_like(mu))
""" Basic tensor creation """ x = torch.empty(5, 3) x = torch.rand(5, 3) x = torch.zeros(5, 3, dtype=torch.long) ## Construct Tensor from data x = torch.tensor([5.5, 3]) #or create a tensor based on an existing tensor. #These methods will reuse properties of the input tensor, e.g. dtype, # unless new values are provided by user x = torch.randn_like(x, dtype=torch.float) print(x) ## Size is a tuple x = torch.rand(5, 3) print(x.size()); print (type(x.size())) """ #################### OPERATIONS ######################## Operations There are multiple syntaxes for operations. In the following example, we will take a look at the addition operation. """
content_image_list.append(file) vgg_style_losses = np.load('vgg_style_losses.npy') vgg_style_losses = vgg_style_losses[()] each_style_loss = np.zeros((len(style_image_list),len(content_image_list))) for i_style in range(len(style_image_list)): for i_content in range(len(content_image_list)): print('processing content', i_content, ' style ', i_style) style = load_image(style_image_list[i_style]).to(device) content = load_image(content_image_list[i_content]).to(device) if TARGET_SOURCE == 'random': target = torch.randn_like(content).requires_grad_(True).to(device) elif TARGET_SOURCE == 'content': target = content.clone().requires_grad_(True).to(device) if ARCHITECTURE == 'vgg19_adding_conv1x1_h': style_weights = {'conv1_1': 1.0, 'conv2_2': 1.0, 'conv3_4': 1.0, 'conv4_4': 1.0, 'conv5_4': 1.0} elif ARCHITECTURE == 'vgg19_adding_conv1x1_h_removing_1conv3x3': style_weights = {'conv1_1': 1.0, 'conv2_2': 1.0, 'conv3_4': 1.0, 'conv4_4': 1.0, 'conv5_4': 1.0}