def setUp(self, length=3, factor=10, count=1000000, seed=None, dtype=torch.float64, device=None): '''Set up the test values. Args: length: Size of the vector. factor: To multiply the mean and standard deviation. count: Number of samples for Monte-Carlo estimation. seed: Seed for the random number generator. dtype: The data type. device: In which device. ''' if seed is not None: torch.manual_seed(seed) # variables self.A = torch.randn(length, length, dtype=dtype, device=device) self.b = torch.randn(length, dtype=dtype, device=device) # input mean and covariance self.mu = torch.randn(length, dtype=dtype, device=device) * factor self.cov = rand.definite(length, dtype=dtype, device=device, positive=True, semi=False, norm=factor**2) # Monte-Carlo estimation of the output mean and variance normal = torch.distributions.MultivariateNormal(self.mu, self.cov) samples = normal.sample((count,)) out_samples = samples.matmul(self.A.t()) + self.b self.mc_mu = torch.mean(out_samples, dim=0) self.mc_var = torch.var(out_samples, dim=0) self.mc_cov = cov(out_samples)
def expected(m, v): if v.dim() != 2: v = v.diag() normal = torch.distributions.MultivariateNormal(m, v) samples = normal.sample((type(self).count,)).clamp(min=0) mu = torch.mean(samples, dim=0) if args['mean'] else None var = torch.var(samples, dim=0) if variance else None cov = compute_cov(samples) if covariance else None return tuple(r for r in (mu, var, cov) if r is not None)
def forward(self, x): n = x.size(2) * x.size(3) t = x.view(x.size(0), x.size(1), n) mean = torch.mean(t, 2).unsqueeze(2).expand_as(x) # Calculate the biased var. torch.var returns unbiased var var = torch.var(t, 2).unsqueeze(2).expand_as(x) * ((n - 1) / float(n)) scale_broadcast = self.weight.unsqueeze(1).unsqueeze(1).unsqueeze(0) scale_broadcast = scale_broadcast.expand_as(x) shift_broadcast = self.bias.unsqueeze(1).unsqueeze(1).unsqueeze(0) shift_broadcast = shift_broadcast.expand_as(x) out = (x - mean) / torch.sqrt(var + self.eps) out = out * scale_broadcast + shift_broadcast return out
def _run_forward(x, scale, shift, eps): # since we hand-roll instance norm it doesn't perform well all in fp16 n = x.size(2) * x.size(3) t = x.view(x.size(0), x.size(1), n) mean = torch.mean(t, 2).unsqueeze(2).unsqueeze(3).expand_as(x) # Calculate the biased var. torch.var returns unbiased var var = torch.var(t, 2).unsqueeze(2).unsqueeze(3).expand_as(x) * ((float(n) - 1) / float(n)) scale_broadcast = scale.unsqueeze(1).unsqueeze(1).unsqueeze(0) scale_broadcast = scale_broadcast.expand_as(x) shift_broadcast = shift.unsqueeze(1).unsqueeze(1).unsqueeze(0) shift_broadcast = shift_broadcast.expand_as(x) out = (x - mean) / torch.sqrt(var + eps) out = out * scale_broadcast + shift_broadcast return out
def _energy_variance(self, pos): el = self.local_energy(pos) return torch.mean(el), torch.var(el)
# data = pp[:, 0, :, :].view(2000, patch_size * patch_size) if Print: print('input data: ', data.shape) train_data = data.view(100, 20, patch_size * patch_size) # Starting Training start_time = time.process_time() model = fit_AE(train_data) print("Processing time = ", time.process_time() - start_time, " s") print("Finishing ...") data_test = pp[:, 1, :, :].view(2000, patch_size * patch_size) average = torch.mean(data_test[0]) sigma = torch.var(data_test[0]) print('average: ', average) print('sigma: ', sigma) out, aver = model(data_test[0]) print('output average: ', aver) print('output: ', out.shape) if visualize: Visualization(data_test[0].view(patch_size, patch_size), out.view(patch_size, patch_size)) #------------------------------------------------------------------------------------------- #------------------------------------------------------------------------------------------- #---------------------------------
def forward(self, X): mu = torch.mean(X, dim=2) var = torch.var(X, dim=2) X_norm = torch.div(X - mu.view(X.shape[0], X.shape[1], 1), torch.sqrt(var.view(X.shape[0], X.shape[1], 1) + 1e-8)) out = self.gamma * X_norm + self.beta return out
def insertion_and_train(ins): model = myModel(ins, args.num_embeddings, args.embedding_dim, commitment_cost, decay).to(device) # optimizer = optim.SGD(model.parameters(), args.learning_rate, momentum = 0.9, weight_decay = 5e-4) optimizer = optim.Adam(model.parameters(), args.learning_rate) epochs_train_res_recon_error = [] epochs_train_res_perplexity = [] epochs_train_res_classif_loss = [] epochs_train_res_vq_loss = [] epochs_Acc1 = [] epochs_Acc5 = [] train_epochs_Acc1 = [] train_epochs_Acc5 = [] with torch.no_grad(): vgg_model = model_vgg_cut(model.is_compression).to(device) ### Whitening on training set for i, (images, target) in enumerate( DataLoader(training_data, batch_size=256, shuffle=True, pin_memory=True)): #50000/256 ~ 200 steps images = images.to(device) x = vgg_model(images) b, c, h, w = x.size(0), x.size(1), x.size(2), x.size(3) if i == 0: mu = torch.zeros((c, h, w)).to(device) mu += (b / 50000) * torch.mean(x, dim=0, keepdim=False) for i, (images, target) in enumerate( DataLoader(training_data, batch_size=256, shuffle=True, pin_memory=True)): #50000/256 ~ 200 steps images = images.to(device) x = vgg_model(images) b, c, h, w = x.size(0), x.size(1), x.size(2), x.size(3) if i == 0: sigma = torch.zeros((h * w, c, c)).to(device) x -= mu x = x.view(b, c, -1) sigma += (1 / 49999) * torch.matmul( x.permute(2, 1, 0).contiguous(), x.permute(2, 0, 1).contiguous() ) #/ (c-1) #row means were estimated from the data. u, s, v = torch.svd(sigma.cpu()) # sigma_inv_rac = torch.matmul(torch.matmul(u, torch.diag_embed(1/torch.sqrt(s+1e-5))), u.transpose(1, 2)) # sigma_inv_rac = torch.matmul(torch.matmul(u, torch.diag_embed(1/torch.sqrt(s+1e-1))), u.transpose(1, 2)) # sigma_inv_rac = torch.matmul(torch.matmul(u, torch.diag_embed(1/torch.sqrt(s+1e-9))), u.transpose(1, 2)) ### for epoch in range(args.epochs): ### adapt lr adjust_learning_rate(optimizer, epoch) ### Switch to train mode model.train() train_res_recon_error = [] train_res_perplexity = [] train_res_classif_loss = [] train_res_vq_loss = [] print('%d epoch' % (epoch + 1)) for i, (images, target) in enumerate(training_loader): #50000/256 ~ 200 steps images = images.to(device) target = target.to(device) optimizer.zero_grad() vq_loss, output, perplexity, data_before, data_recon = model( images, mu.to(device), u.to(device), s.to(device)) data_variance = torch.var(data_before) # print(data_variance.item()) recon_error = F.mse_loss(data_recon, data_before) / data_variance loss = recon_error + vq_loss # print(output.shape, target.shape) classif_loss = nn.CrossEntropyLoss()(output, target) loss.backward() optimizer.step() train_res_recon_error.append(recon_error.item()) train_res_perplexity.append(perplexity.item()) train_res_classif_loss.append(classif_loss.item()) train_res_vq_loss.append(vq_loss.item()) print('%d epoch' % (epoch + 1)) print('recon_error: %.3f' % np.mean(train_res_recon_error)) print('perplexity: %.3f' % np.mean(train_res_perplexity)) print('classif_loss: %.3f' % np.mean(train_res_classif_loss)) print('vq_loss: %.3f' % np.mean(train_res_vq_loss)) print() epochs_train_res_recon_error.append(np.mean(train_res_recon_error)) epochs_train_res_perplexity.append(np.mean(train_res_perplexity)) epochs_train_res_classif_loss.append(np.mean(train_res_classif_loss)) epochs_train_res_vq_loss.append(np.mean(train_res_vq_loss)) ### Evaluate on train set model.eval() train_Acc1 = [] train_Acc5 = [] with torch.no_grad(): for i, (images, target) in enumerate(training_loader): images = images.to(device) target = target.to(device) # compute output vq_loss, output, perplexity, data_before, data_recon = model( images, mu.to(device), u.to(device), s.to(device)) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) train_Acc1.append(acc1.cpu().numpy()) train_Acc5.append(acc5.cpu().numpy()) print('train_accuracy_top_1: %.3f' % np.mean(train_Acc1)) print('train_accuracy_top_5: %.3f' % np.mean(train_Acc5)) train_epochs_Acc1.append(np.mean(train_Acc1)) train_epochs_Acc5.append(np.mean(train_Acc5)) ### Evaluate on validation set model.eval() Acc1 = [] Acc5 = [] with torch.no_grad(): for i, (images, target ) in enumerate(validation_loader): #10000/256 ~ 40 steps images = images.to(device) target = target.to(device) # compute output vq_loss, output, perplexity, data_before, data_recon = model( images, mu.to(device), u.to(device), s.to(device)) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) Acc1.append(acc1.cpu().numpy()) Acc5.append(acc5.cpu().numpy()) print('accuracy_top_1: %.3f' % np.mean(Acc1)) print('accuracy_top_5: %.3f' % np.mean(Acc5)) epochs_Acc1.append(np.mean(Acc1)) epochs_Acc5.append(np.mean(Acc5)) return epochs_train_res_recon_error, epochs_train_res_perplexity, epochs_train_res_classif_loss, epochs_train_res_vq_loss, epochs_Acc1, epochs_Acc5, train_epochs_Acc1, train_epochs_Acc5
def F_VAE_metric_disc(model, args, L=100): list_samples, list_test_samples, num_con_dims, accepted_disc_dims = get_samples_F_VAE_metric_disc( model, L, 800, 800, args) #each element in the list is samples generated with one factor-of-generation fixed num_genfactors = len(list_samples) n_latent = list_samples[0].shape[1] votes_per_factor = torch.zeros((num_genfactors, n_latent)) classifier = torch.zeros(n_latent) classifier = [] for factor_id, samples in enumerate(list_samples): N = samples.shape[0] for idx in range(0, N, L): end_batch = idx + L if end_batch >= N: end_batch = N embds_var = torch.var(samples[idx:end_batch, :num_con_dims], dim=0) if len(accepted_disc_dims) > 0: embds_var_disc = compute_disc_embds_var( samples[idx:end_batch, num_con_dims:], accepted_disc_dims) if len(embds_var_disc) > 1: embds_var_disc = torch.cat(embds_var_disc).view(-1, ) else: embds_var_disc = embds_var_disc[0].view(-1, ) embds_var = torch.cat([embds_var, embds_var_disc]) argmin = torch.argmin(embds_var) votes_per_factor[factor_id, argmin] += 1 classifier = torch.argmax(votes_per_factor, dim=0) acc = float(votes_per_factor[classifier, torch.arange(n_latent)].sum()) / float( votes_per_factor.sum()) for factor_id, samples in enumerate(list_test_samples): N = samples.shape[0] for idx in range(0, N, L): end_batch = idx + L if end_batch >= N: end_batch = N embds_var = torch.var(samples[idx:end_batch, :num_con_dims], dim=0) if len(accepted_disc_dims) > 0: embds_var_disc = compute_disc_embds_var( samples[idx:end_batch, num_con_dims:], accepted_disc_dims) if len(embds_var_disc) > 1: embds_var_disc = torch.cat(embds_var_disc).view(-1, ) else: embds_var_disc = embds_var_disc[0].view(-1, ) embds_var = torch.cat([embds_var, embds_var_disc]) argmin = torch.argmin(embds_var) votes_per_factor[factor_id, argmin] += 1 acc_test = float(votes_per_factor[classifier, torch.arange(n_latent)].sum()) / float( votes_per_factor.sum()) return float(acc), acc_test
def single_point(self, with_tqdm=True, hdf5_group='single_point'): """Performs a single point calculation Args: with_tqdm (bool, optional): use tqdm for samplig. Defaults to True. hdf5_group (str, optional): hdf5 group where to store the data. Defaults to 'single_point'. Returns: SimpleNamespace: contains the local energy, positions, ... """ logd(hvd.rank(), '') logd(hvd.rank(), ' Single Point Calculation : {nw} walkers | {ns} steps'.format( nw=self.sampler.nwalkers, ns=self.sampler.nstep)) # check if we have to compute and store the grads grad_mode = torch.no_grad() if self.wf.kinetic == 'auto': grad_mode = torch.enable_grad() # distribute the calculation num_threads = 1 hvd.broadcast_parameters(self.wf.state_dict(), root_rank=0) torch.set_num_threads(num_threads) with grad_mode: # sample the wave function pos = self.sampler(self.wf.pdf) if self.wf.cuda and pos.device.type == 'cpu': pos = pos.to(self.device) # compute energy/variance/error eloc = self.wf.local_energy(pos) e, s, err = torch.mean(eloc), torch.var( eloc), self.wf.sampling_error(eloc) # gather all data eloc_all = hvd.allgather(eloc, name='local_energies') e, s, err = torch.mean(eloc_all), torch.var( eloc_all), self.wf.sampling_error(eloc_all) # print if hvd.rank() == 0: log.options(style='percent').info( ' Energy : %f +/- %f' % (e.detach().item(), err.detach().item())) log.options(style='percent').info( ' Variance : %f' % s.detach().item()) # dump data to hdf5 obs = SimpleNamespace( pos=pos, local_energy=eloc_all, energy=e, variance=s, error=err ) # dump to file if hvd.rank() == 0: dump_to_hdf5(obs, self.hdf5file, root_name=hdf5_group) add_group_attr(self.hdf5file, hdf5_group, {'type': 'single_point'}) return obs
def _run(self): print("=" * 80) print("New output dir with name %s" % self.plot_dir_name) # Make model # self._initialize() # Loop over epochs # print(self.epochs) for epoch in range(1, self.epochs + 1): print("Epoch %d/%d [%0.2f%%]" % (epoch, self.epochs, epoch / self.epochs * 100)) # loop over models# for model, integrator in self.integrator_dict.items(): if model == "Cuba": cuba_res = self.cuba_integral[ epoch * self.batch_size] # [mean,error,chi2,dof] cuba_pts = self.cuba_points[epoch * self.batch_size] mean_wgt = cuba_res[0] err_wgt = cuba_res[1] x = np.array(cuba_pts) loss = 0. lr = 0. else: if model == "Uniform": # Use uniform sampling x = self.dist.sample((self.batch_size, )) y = self.function(x) x = x.data.numpy() mean = torch.mean(y).item() error = torch.sqrt( torch.var(y) / (self.batch_size - 1.)).item() loss = 0. lr = 0. else: # use NIS # Integrate on one epoch and produce resuts # result_dict = integrator.train_one_step( self.batch_size, lr=True, integral=True, points=True) loss = result_dict['loss'] lr = result_dict['lr'] mean = result_dict['mean'] error = result_dict['uncertainty'] z = result_dict['z'].data.numpy() x = result_dict['x'].data.numpy() # Record values # self.means[model].append(mean) self.errors[model].append(error) # Combine all mean and errors mean_wgt = np.sum(self.means[model] / np.power(self.errors[model], 2), axis=-1) err_wgt = np.sum(1. / (np.power(self.errors[model], 2)), axis=-1) mean_wgt /= err_wgt err_wgt = 1 / np.sqrt(err_wgt) # Record loss # self.loss[model] = loss self.loss['analytic'] = 0 # Record and print # self.mean_wgt[model] = mean_wgt self.err_wgt[model] = err_wgt print("\t" + (model + ' ').ljust(25, '.') + ("Loss = %0.8f" % loss).rjust(20, ' ') + ("\t(LR = %0.8f)" % lr).ljust(20, ' ') + ("Integral = %0.8f +/- %0.8f" % (self.mean_wgt[model], self.err_wgt[model]))) # Curve for all models # dict_loss = {} dict_val = {} dict_error = {} for model in self.mean_wgt.keys(): dict_loss['$Loss^{%s}$' % model] = [self.loss[model], 0] dict_val['$I^{%s}$' % model] = [self.mean_wgt[model], 0] dict_error['$\sigma_{I}^{%s}$' % model] = [self.err_wgt[model], 0] self.visObject.AddCurves(x=epoch, x_err=0, title="Integral value", dict_val=dict_val) self.visObject.AddCurves(x=epoch, x_err=0, title="Integral uncertainty", dict_val=dict_error) # Plot function output # if epoch % self.save_plt_interval == 0: self.visObject.MakePlot(epoch) # Final printout # print("Models results") for model in self.mean_wgt.keys(): print('..... ' + ('Model %s' % model).ljust(40, ' ') + 'Integral : %0.8f +/- %0.8f' % (self.mean_wgt[model], self.err_wgt[model]))
def forward(self, x): f_vars = torch.var(x, dim=(2, 3), keepdim=True) h = x / torch.sqrt(f_vars + 1e-5) out = self.alpha.view(-1, self.num_features, 1, 1) * h return out
def forward(self, X): mu = torch.mean(X, dim=-1, keepdim=True) var = torch.var(X, dim=-1, keepdim=True) X_norm = torch.div(X - mu, torch.sqrt(var + 1e-8)) out = self.gamma * X_norm + self.beta return out
def main(data_dir, source): random.seed(42) torch.manual_seed(42) class ClassificationDataset(): def __init__(self, data_dir="data/classifier", source='ours'): super(ClassificationDataset, self).__init__() with open(f"{data_dir}/negative_{source}.pkl", 'rb') as f: vs, fs = pickle.load(f) self.datas = list(zip(vs, fs)) self.N = len(self.datas) def __len__(self): return self.N def __getitem__(self, index): verts, faces = self.datas[index] for i in range(3): verts[:, i] = verts[:, i] - verts[:, i].mean() #verts[:,i] = verts[:,i] / verts[:,i].abs().mean() inputs = sample_surface(faces, verts.unsqueeze(0), 2500, return_normals=False)[0] return inputs dataset = ShapeNetDataset( root= "/home/kai/data/pointnet.pytorch/shapenetcore_partanno_segmentation_benchmark_v0", classification=True, npoints=2500) num_classes = len(dataset.classes) #print('classes', num_classes) test_dataset = ClassificationDataset(source=source, data_dir=data_dir) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, num_workers=6, shuffle=True) classifier = PointNetCls(k=num_classes, feature_transform=False) classifier.load_state_dict( torch.load("/home/kai/data/pointnet.pytorch/cls/cls_model_249.pth")) classifier.cuda() total_correct = 0 total_testset = 0 classifier = classifier.eval() embs = [] if "table" in data_dir: class_idx = 15 #table else: class_idx = 4 #chair for i, points in tqdm(enumerate(test_loader, 0)): points = points.transpose(2, 1) points = points.cuda() pred, _, _, emb = classifier(points) pred_choice = pred.data.max(1)[1] correct = pred_choice.eq(class_idx).cpu().sum() eqs = torch.where(pred_choice == class_idx) embs.append(emb[eqs].detach().cpu()) total_correct += correct.item() total_testset += points.size()[0] embs = torch.cat(embs, dim=0) #print(embs.size()) return float(torch.var(embs, dim=0).mean()), total_correct / float(total_testset)
def explained_variance(returns, values): """ Calculate how much variance in returns do the values explain """ exp_var = 1 - torch.var(returns - values) / torch.var(returns) return exp_var.item()
def _energy_variance_error(self, pos): '''Return energy variance and sampling error.''' el = self.local_energy(pos) return torch.mean(el), torch.var(el), self.sampling_error(el)
def _energy_variance(self, pos): '''Return energy and variance.''' el = self.local_energy(pos) return torch.mean(el), torch.var(el)
def add_summary(self, name, tensor, verbose_only=True): if self.is_log_iteration() and self.is_logging_enabled() and self.is_verbose(verbose_only): self._root.writer.add_scalar(f'{self._namespace}/{name}/mean', torch.mean(tensor), self.get_iteration()) self._root.writer.add_scalar(f'{self._namespace}/{name}/var', torch.var(tensor), self.get_iteration())
def DTInst_losses( self, labels, reg_targets, logits_pred, reg_pred, ctrness_pred, mask_pred, mask_targets ): num_classes = logits_pred.size(1) labels = labels.flatten() pos_inds = torch.nonzero(labels != num_classes).squeeze(1) num_pos_local = pos_inds.numel() num_gpus = get_world_size() total_num_pos = reduce_sum(pos_inds.new_tensor([num_pos_local])).item() num_pos_avg = max(total_num_pos / num_gpus, 1.0) # prepare one_hot class_target = torch.zeros_like(logits_pred) class_target[pos_inds, labels[pos_inds]] = 1 class_loss = sigmoid_focal_loss_jit( logits_pred, class_target, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / num_pos_avg reg_pred = reg_pred[pos_inds] reg_targets = reg_targets[pos_inds] ctrness_pred = ctrness_pred[pos_inds] mask_pred = mask_pred[pos_inds] # mask_residual_pred = mask_residual[pos_inds] assert mask_pred.shape[0] == mask_targets.shape[0], \ print("The number(positive) should be equal between " "masks_pred(prediction) and mask_targets(target).") ctrness_targets = compute_ctrness_targets(reg_targets) ctrness_targets_sum = ctrness_targets.sum() ctrness_norm = max(reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6) reg_loss = self.iou_loss( reg_pred, reg_targets, ctrness_targets ) / ctrness_norm ctrness_loss = F.binary_cross_entropy_with_logits( ctrness_pred, ctrness_targets, reduction="sum" ) / num_pos_avg total_mask_loss = 0. dtm_pred_, binary_pred_ = self.mask_encoding.decoder(mask_pred, is_train=True) # from sparse coefficients to DTMs/images # code_targets, dtm_targets, weight_maps, hd_maps = self.mask_encoding.encoder(mask_targets) # code_targets = self.mask_encoding.encoder(mask_targets) code_targets, code_targets_var, code_targets_kur = self.mask_encoding.encoder(mask_targets) # dtm_pred_ = mask_residual_pred # + dtm_pred_init # if self.loss_on_mask: # if 'mask_mse' in self.mask_loss_type: # mask_loss = F.mse_loss( # dtm_pred_, # dtm_targets, # reduction='none' # ) # mask_loss = mask_loss.sum(1) * ctrness_targets # mask_loss = mask_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += mask_loss # if 'weighted_mask_mse' in self.mask_loss_type: # mask_loss = F.mse_loss( # dtm_pred_, # dtm_targets, # reduction='none' # ) # mask_loss = torch.sum(mask_loss * weight_maps, 1) / torch.sum(weight_maps, 1) * ctrness_targets * self.mask_size ** 2 # mask_loss = mask_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += mask_loss # if 'mask_difference' in self.mask_loss_type: # w_ = torch.abs(binary_pred_ * 1. - mask_targets * 1) # 1's are inconsistent pixels in hd_maps # md_loss = torch.sum(w_, 1) * ctrness_targets # md_loss = md_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += md_loss # if 'hd_one_side_binary' in self.mask_loss_type: # the first attempt, not really accurate # w_ = torch.abs(binary_pred_ * 1. - mask_targets * 1) # 1's are inconsistent pixels in hd_maps # hausdorff_loss = torch.sum(w_ * hd_maps, 1) / (torch.sum(w_, 1) + 1e-4) * ctrness_targets * self.mask_size ** 2 # hausdorff_loss = hausdorff_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += hausdorff_loss # if 'hd_two_side_binary' in self.mask_loss_type: # the first attempt, not really accurate # w_ = torch.abs(binary_pred_ * 1. - mask_targets * 1) # 1's are inconsistent pixels in hd_maps # hausdorff_loss = torch.sum(w_ * (torch.clamp(dtm_pred_ ** 2, -0.1, 1.1) + torch.clamp(dtm_targets ** 2, -0.1, 1)), 1) / (torch.sum(w_, 1) + 1e-4) * ctrness_targets * self.mask_size ** 2 # hausdorff_loss = hausdorff_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += hausdorff_loss # if 'hd_weighted_one_side_dtm' in self.mask_loss_type: # dtm_diff = (dtm_pred_ - dtm_targets) ** 2 # 1's are inconsistent pixels in hd_maps # hausdorff_loss = torch.sum(dtm_diff * weight_maps * hd_maps, 1) / (torch.sum(weight_maps, 1) + 1e-4) * ctrness_targets * self.mask_size ** 2 # hausdorff_loss = hausdorff_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += hausdorff_loss # if 'hd_weighted_two_side_dtm' in self.mask_loss_type: # dtm_diff = (dtm_pred_ - dtm_targets) ** 2 # 1's are inconsistent pixels in hd_maps # hausdorff_loss = torch.sum(dtm_diff * weight_maps * (dtm_pred_ ** 2 + dtm_targets ** 2), 1) / (torch.sum(weight_maps, 1) + 1e-4) * ctrness_targets * self.mask_size ** 2 # hausdorff_loss = hausdorff_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += hausdorff_loss # if 'hd_one_side_dtm' in self.mask_loss_type: # dtm_diff = (dtm_pred_ - dtm_targets) ** 2 # 1's are inconsistent pixels in hd_maps # hausdorff_loss = torch.sum(dtm_diff * hd_maps, 1) * ctrness_targets # hausdorff_loss = hausdorff_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += hausdorff_loss # if 'hd_two_side_dtm' in self.mask_loss_type: # dtm_diff = (dtm_pred_ - dtm_targets) ** 2 # 1's are inconsistent pixels in hd_maps # hausdorff_loss = torch.sum(dtm_diff * (torch.clamp(dtm_pred_, -1.1, 1.1) ** 2 + dtm_targets ** 2), 1) * ctrness_targets # hausdorff_loss = hausdorff_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += hausdorff_loss # if 'contour_dice' in self.mask_loss_type: # pred_contour = (dtm_pred_ + 0.9 < 0.55) * 1. * (0.5 <= dtm_pred_ + 0.9) # contour pixels with 0.05 tolerance # target_contour = (dtm_targets < 0.05) * 1. * (dtm_targets < 0.05) # # pred_contour = 0.5 <= dtm_pred_ + 0.9 < 0.55 # contour pixels with 0.05 tolerance # # target_contour = 0. <= dtm_targets < 0.05 # overlap_ = torch.sum(pred_contour * 2. * target_contour, 1) # union_ = torch.sum(pred_contour ** 2, 1) + torch.sum(target_contour ** 2, 1) # dice_loss = (1. - overlap_ / (union_ + 1e-4)) * ctrness_targets * self.mask_size ** 2 # dice_loss = dice_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += dice_loss # if 'mask_dice' in self.mask_loss_type: # overlap_ = torch.sum(binary_pred_ * 2. * mask_targets, 1) # union_ = torch.sum(binary_pred_ ** 2, 1) + torch.sum(mask_targets ** 2, 1) # dice_loss = (1. - overlap_ / (union_ + 1e-5)) * ctrness_targets * self.mask_size ** 2 # dice_loss = dice_loss.sum() / max(ctrness_norm * self.mask_size ** 2, 1.0) # total_mask_loss += dice_loss if self.loss_on_code: # m*m mask labels --> n_components encoding labels if 'mse' in self.mask_loss_type: mask_loss = F.mse_loss( mask_pred, code_targets, reduction='none' ) mask_loss = mask_loss.sum(1) * ctrness_targets mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) if self.mask_sparse_weight > 0.: if self.sparsity_loss_type == 'L1': sparsity_loss = torch.sum(torch.abs(mask_pred), 1) * ctrness_targets sparsity_loss = sparsity_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) mask_loss = mask_loss * self.mask_loss_weight + \ sparsity_loss * self.mask_sparse_weight elif self.sparsity_loss_type == 'weighted_L1': w_ = (torch.abs(code_targets) < 1e-4) * 1. # inactive codes, put L1 regularization on them sparsity_loss = torch.sum(torch.abs(mask_pred) * w_, 1)* ctrness_targets sparsity_loss = sparsity_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) mask_loss = mask_loss * self.mask_loss_weight + \ sparsity_loss * self.mask_sparse_weight elif self.sparsity_loss_type == 'weighted_L2': w_ = (torch.abs(code_targets) < 1e-4) * 1. # inactive codes, put L2 regularization on them sparsity_loss = torch.sum(mask_pred ** 2. * w_, 1) / torch.sum(w_, 1) \ * ctrness_targets * self.num_codes sparsity_loss = sparsity_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) mask_loss = mask_loss * self.mask_loss_weight + \ sparsity_loss * self.mask_sparse_weight else: raise NotImplementedError total_mask_loss += mask_loss if 'smooth' in self.mask_loss_type: mask_loss = F.smooth_l1_loss( mask_pred, code_targets, reduction='none' ) mask_loss = mask_loss.sum(1) * ctrness_targets mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) total_mask_loss += mask_loss if 'L1' in self.mask_loss_type: mask_loss = F.l1_loss( mask_pred, code_targets, reduction='none' ) mask_loss = mask_loss.sum(1) * ctrness_targets mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) total_mask_loss += mask_loss if 'cosine' in self.mask_loss_type: mask_loss = loss_cos_sim( mask_pred, code_targets ) mask_loss = mask_loss * ctrness_targets * self.num_codes mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) total_mask_loss += mask_loss if 'kl_softmax' in self.mask_loss_type: mask_loss = loss_kl_div_softmax( mask_pred, code_targets ) mask_loss = mask_loss.sum(1) * ctrness_targets * self.num_codes mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) total_mask_loss += mask_loss if 'kurtosis' in self.mask_loss_type or 'variance' in self.mask_loss_type: mask_pred_m1 = torch.mean(mask_pred, dim=1, keepdim=True) mask_pred_m2 = torch.var(mask_pred, dim=1, keepdim=True) + 1e-4 mask_pred_central = mask_pred - mask_pred_m1 mask_pred_m4 = torch.mean(mask_pred_central ** 2. * mask_pred_central ** 2, dim=1, keepdim=True) if 'kurtosis' in self.mask_loss_type: mask_pred_kur = mask_pred_m4 / (mask_pred_m2 ** 2.) - 3. # mask_loss = F.mse_loss( mask_loss = F.l1_loss( mask_pred_kur, code_targets_kur, reduction='none' ) mask_loss = mask_loss.sum(1) * ctrness_targets * self.num_codes mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) total_mask_loss += mask_loss * self.code_kur_weight if 'variance' in self.mask_loss_type: mask_pred_m2 = torch.mean(mask_pred_central ** 2., dim=1, keepdim=True) + 1e-4 mask_pred_m2 = torch.sqrt(mask_pred_m2) # mask_loss = F.mse_loss( mask_loss = F.l1_loss( mask_pred_m2, code_targets_var, reduction='none' ) mask_loss = mask_loss.sum(1) * ctrness_targets * self.num_codes mask_loss = mask_loss.sum() / max(ctrness_norm * self.num_codes, 1.0) total_mask_loss += mask_loss * self.code_var_weight losses = { "loss_DTInst_cls": class_loss, "loss_DTInst_loc": reg_loss, "loss_DTInst_ctr": ctrness_loss, "loss_DTInst_mask": total_mask_loss } return losses, {}
def getdata(): ###Load data #path = 'movielens-20m-dataset' path = 'ml-latest-small' print("===========================================================") print("Loading ratings") rating = pd.read_csv("./"+path+"/rating.csv") print(rating[:3]) rating = np.array(rating) print("===========================================================") print("Loading movies") movie = pd.read_csv("./"+path+"/movie.csv") print(movie[:3]) movie = np.array(movie) print("===========================================================") print("Loading tags") tag = pd.read_csv("./"+path+"/tag.csv") print(tag[:3]) tag = np.array(tag) print("===========================================================") print("Loading genome_scores") genome_scores = pd.read_csv("./"+path+"/genome_scores.csv") print(genome_scores[:3]) genome_scores = np.array(genome_scores) print("===========================================================") print("Loading genome_tags") genome_tags = pd.read_csv("./"+path+"/genome_tags.csv") print(genome_tags[:3]) genome_tags = np.array(genome_tags) print("===========================================================") print("Ok (*^_^*)") ### get index of movieId dict_m = {1:0} def getIndex(movieId): index = dict_m.get(movieId) if(index!=None): return int(index) index = np.where(movie[:,0] == movieId)[0] if(index >= 0 ): dict_m[movieId]=index return int(index); else: return -1; ###get n_user and n_movie print("movie.shape",movie.shape) users1 = np.unique(rating[:,0]) print("users who rated movies:",users1.shape) users2 = np.unique(tag[:,0]) print("users who taged movies",users2.shape) users = np.unique(np.hstack((users1,users2))) print("number of users:",users.shape) n_user = users.shape[0] n_movie = movie.shape[0] ###Form data with shape(users, movies, features) n_f = 3 X = torch.zeros(n_user,n_movie, n_f) # | rate ########| mean_rate | genre(int) | matrix_rate = np.zeros((n_movie,2)) #mean_rate print(rating.shape) #(20000263, 4) for i in range(rating.shape[0]): movieId,rate = rating[i,1:3] movieId = int(movieId) movieIndex = getIndex(movieId) if(movieIndex <= n_movie and movieIndex != -1 and rate != str): matrix_rate[movieIndex,0] = matrix_rate[movieIndex,0] + rate matrix_rate[movieIndex,1] = matrix_rate[movieIndex,1] + 1 if(i%1000000 == 0): print('Conting mean rates /(ㄒoㄒ)/~~') zero_index = (matrix_rate[:,1] == 0) matrix_rate[zero_index] = 1 mean_rate = matrix_rate[:,0]/matrix_rate[:,1] #save user's rate for i in range(rating.shape[0]): userId,movieId,rate = rating[i,0:3] movieId =int( movieId) movieIndex = getIndex(movieId) if(movieIndex <= n_movie and movieIndex != -1 and rate != str): #有的rating里的movieId,MOVIE表里没有 X[userId - 1,movieIndex,0] = rate if(i%1000000 == 0): print('Saving users rates /(ㄒoㄒ)/~~') #save mean_rate for i in range(n_movie): X[:,i,1] = mean_rate[i] if(i%1000000 == 0): print('Saving mean rates /(ㄒoㄒ)/~~') genre_list = np.array(['Action','Adventure','Animation','Children','Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller','War','Western']) #save genre for i in range(movie.shape[0]): movieId, genres = movie[i,(0,2)] genres_split = genres.split('|') for genre in genres_split: x = 0 index = np.where(genre_list ==genre)[0] if(index.size > 0): x = x + 2**index movieIndex = getIndex(movieId) if(movieIndex <= n_movie and movieIndex != -1 and movieIndex != -1): X[:,movieIndex,2] = float(x) ###normalize data X_org = X.clone() Xmean = torch.mean(X, 1, True) Xstd = torch.var(X, 1, True) zero_index1 = (Xstd == 0) Xstd[zero_index1] = 1 torchvision.transforms.Normalize(Xmean, Xstd)(X) print('Loading pregress secceeded!!! *★,°*:.☆( ̄▽ ̄)/$:*.°★* 。',X.size()) #estimate num of features F = X_org[:,:,0] #F.size() torch.Size([671, 9125]) F_0 = (F!=0) F_sum = torch.sum(F_0,1) num_F = torch.sum(F_sum)//F_sum.size()[0] return X_org, X,num_F
def _forward(self, input): # exponential_average_factor is self.momentum set to # (when it is available) only so that if gets updated # in ONNX graph when this node is exported to ONNX. if self.momentum is None: exponential_average_factor = 0.0 else: exponential_average_factor = self.momentum if self.training and not self.freeze_bn and self.track_running_stats: # TODO: if statement only here to tell the jit to skip emitting this when it is None if self.num_batches_tracked is not None: self.num_batches_tracked += 1 if self.momentum is None: # use cumulative moving average exponential_average_factor = 1.0 / float( self.num_batches_tracked) else: # use exponential moving average exponential_average_factor = self.momentum # we use running statistics from the previous batch, so this is an # approximation of the approach mentioned in the whitepaper, but we only # need to do one convolution in this case instead of two running_std = torch.sqrt(self.running_var + self.eps) scale_factor = self.gamma / running_std scaled_weight = self.weight * scale_factor.reshape([-1, 1, 1, 1]) if self.bias is not None: zero_bias = torch.zeros_like(self.bias) else: zero_bias = torch.zeros(self.out_channels, device=scaled_weight.device) conv = self._conv_forward(input, self.weight_fake_quant(scaled_weight), zero_bias) if self.training and not self.freeze_bn: # recovering original conv to get original batch_mean and batch_var if self.bias is not None: conv_orig = conv / scale_factor.reshape( [1, -1, 1, 1]) + self.bias.reshape([1, -1, 1, 1]) else: conv_orig = conv / scale_factor.reshape([1, -1, 1, 1]) batch_mean = torch.mean(conv_orig, dim=[0, 2, 3]) batch_var = torch.var(conv_orig, dim=[0, 2, 3], unbiased=False) n = float(conv_orig.numel() / conv_orig.size()[1]) unbiased_batch_var = batch_var * (n / (n - 1)) batch_rstd = torch.ones_like( batch_var, memory_format=torch.contiguous_format) / torch.sqrt(batch_var + self.eps) conv = (self.gamma * batch_rstd).reshape([1, -1, 1, 1]) * conv_orig + \ (self.beta - self.gamma * batch_rstd * batch_mean).reshape([1, -1, 1, 1]) self.running_mean = exponential_average_factor * batch_mean.detach() + \ (1 - exponential_average_factor) * self.running_mean self.running_var = exponential_average_factor * unbiased_batch_var.detach() + \ (1 - exponential_average_factor) * self.running_var else: if self.bias is None: conv = conv + (self.beta - self.gamma * self.running_mean / running_std).reshape([1, -1, 1, 1]) else: conv = conv + (self.gamma * (self.bias - self.running_mean) / running_std + self.beta).reshape([1, -1, 1, 1]) return conv
def forward(self, input): # 训练态 if self.training: # 先做普通卷积得到A,以取得BN参数 output = F.conv2d( input=input, weight=self.weight, bias=self.bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups ) # 更新BN统计参数(batch和running) dims = [dim for dim in range(4) if dim != 1] batch_mean = torch.mean(output, dim=dims) batch_var = torch.var(output, dim=dims) with torch.no_grad(): if self.first_bn == 0: self.first_bn.add_(1) self.running_mean.add_(batch_mean) self.running_var.add_(batch_var) else: self.running_mean.mul_(1 - self.momentum).add_(batch_mean * self.momentum) self.running_var.mul_(1 - self.momentum).add_(batch_var * self.momentum) # BN融合 if self.bias is not None: bias = reshape_to_bias(self.beta + (self.bias - batch_mean) * (self.gamma / torch.sqrt(batch_var + self.eps))) else: bias = reshape_to_bias(self.beta - batch_mean * (self.gamma / torch.sqrt(batch_var + self.eps)))# b融batch weight = self.weight * reshape_to_weight(self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running # 测试态 else: #print(self.running_mean, self.running_var) # BN融合 if self.bias is not None: bias = reshape_to_bias(self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias(self.beta - self.running_mean * (self.gamma / torch.sqrt(self.running_var + self.eps))) # b融running weight = self.weight * reshape_to_weight(self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running # 量化A和bn融合后的W if not self.first_layer: input = self.activation_quantizer(input) q_input = input q_weight = self.weight_quantizer(weight) # 量化卷积 if self.training: # 训练态 output = F.conv2d( input=q_input, weight=q_weight, bias=self.bias, # 注意,这里不加bias(self.bias为None) stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups ) # (这里将训练态下,卷积中w融合running参数的效果转为融合batch参数的效果)running ——> batch output *= reshape_to_activation(torch.sqrt(self.running_var + self.eps) / torch.sqrt(batch_var + self.eps)) output += reshape_to_activation(bias) else: # 测试态 output = F.conv2d( input=q_input, weight=q_weight, bias=bias, # 注意,这里加bias,做完整的conv+bn stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups ) return output
def score(self, X, y, X_val=None, y_val=None, task=None): y = y.clone().detach().to(self.device).double() return 1.0 - th.mean((self.predict(X) - y)**2) / th.var(y)
def update(self, x): batch_mean = torch.mean(x, axis=0) batch_var = torch.var(x, axis=0) batch_count = x.shape[0] self.update_from_moments(batch_mean, batch_var, batch_count)
def forward(self, user, item_i, item_j): users_embedding = self.embed_user.weight items_embedding = self.embed_item.weight gcn1_users_embedding = ( torch.sparse.mm(self.user_item_matrix, items_embedding) + users_embedding.mul(self.d_i_train)) #*2. #+ users_embedding gcn1_items_embedding = ( torch.sparse.mm(self.item_user_matrix, users_embedding) + items_embedding.mul(self.d_j_train)) #*2. #+ items_embedding gcn2_users_embedding = ( torch.sparse.mm(self.user_item_matrix, gcn1_items_embedding) + gcn1_users_embedding.mul(self.d_i_train)) #*2. + users_embedding gcn2_items_embedding = ( torch.sparse.mm(self.item_user_matrix, gcn1_users_embedding) + gcn1_items_embedding.mul(self.d_j_train)) #*2. + items_embedding gcn3_users_embedding = ( torch.sparse.mm(self.user_item_matrix, gcn2_items_embedding) + gcn2_users_embedding.mul(self.d_i_train) ) #*2. + gcn1_users_embedding gcn3_items_embedding = ( torch.sparse.mm(self.item_user_matrix, gcn2_users_embedding) + gcn2_items_embedding.mul(self.d_j_train) ) #*2. + gcn1_items_embedding gcn4_users_embedding = ( torch.sparse.mm(self.user_item_matrix, gcn3_items_embedding) + gcn3_users_embedding.mul(self.d_i_train) ) #*2. + gcn1_users_embedding gcn4_items_embedding = ( torch.sparse.mm(self.item_user_matrix, gcn3_users_embedding) + gcn3_items_embedding.mul(self.d_j_train) ) #*2. + gcn1_items_embedding gcn_users_embedding = torch.cat( (users_embedding, gcn1_users_embedding, gcn2_users_embedding, gcn3_users_embedding, gcn4_users_embedding), -1) #+gcn4_users_embedding gcn_items_embedding = torch.cat( (items_embedding, gcn1_items_embedding, gcn2_items_embedding, gcn3_items_embedding, gcn4_items_embedding), -1) #+gcn4_items_embedding# g0_mean = torch.mean(users_embedding) g0_var = torch.var(users_embedding) g1_mean = torch.mean(gcn1_users_embedding) g1_var = torch.var(gcn1_users_embedding) g2_mean = torch.mean(gcn2_users_embedding) g2_var = torch.var(gcn2_users_embedding) g3_mean = torch.mean(gcn3_users_embedding) g3_var = torch.var(gcn3_users_embedding) g4_mean = torch.mean(gcn4_users_embedding) g4_var = torch.var(gcn4_users_embedding) # g5_mean=torch.mean(gcn5_users_embedding) # g5_var=torch.var(gcn5_users_embedding) g_mean = torch.mean(gcn_users_embedding) g_var = torch.var(gcn_users_embedding) i0_mean = torch.mean(items_embedding) i0_var = torch.var(items_embedding) i1_mean = torch.mean(gcn1_items_embedding) i1_var = torch.var(gcn1_items_embedding) i2_mean = torch.mean(gcn2_items_embedding) i2_var = torch.var(gcn2_items_embedding) i3_mean = torch.mean(gcn3_items_embedding) i3_var = torch.var(gcn3_items_embedding) i4_mean = torch.mean(gcn4_items_embedding) i4_var = torch.var(gcn4_items_embedding) # i5_mean=torch.mean(gcn5_items_embedding) # i5_var=torch.var(gcn5_items_embedding) i_mean = torch.mean(gcn_items_embedding) i_var = torch.var(gcn_items_embedding) # pdb.set_trace() str_user = str(round(g0_mean.item(), 7)) + ' ' str_user += str(round(g0_var.item(), 7)) + ' ' str_user += str(round(g1_mean.item(), 7)) + ' ' str_user += str(round(g1_var.item(), 7)) + ' ' str_user += str(round(g2_mean.item(), 7)) + ' ' str_user += str(round(g2_var.item(), 7)) + ' ' str_user += str(round(g3_mean.item(), 7)) + ' ' str_user += str(round(g3_var.item(), 7)) + ' ' str_user += str(round(g4_mean.item(), 7)) + ' ' str_user += str(round(g4_var.item(), 7)) + ' ' # str_user+=str(round(g5_mean.item(),7))+' ' # str_user+=str(round(g5_var.item(),7))+' ' str_user += str(round(g_mean.item(), 7)) + ' ' str_user += str(round(g_var.item(), 7)) + ' ' str_item = str(round(i0_mean.item(), 7)) + ' ' str_item += str(round(i0_var.item(), 7)) + ' ' str_item += str(round(i1_mean.item(), 7)) + ' ' str_item += str(round(i1_var.item(), 7)) + ' ' str_item += str(round(i2_mean.item(), 7)) + ' ' str_item += str(round(i2_var.item(), 7)) + ' ' str_item += str(round(i3_mean.item(), 7)) + ' ' str_item += str(round(i3_var.item(), 7)) + ' ' str_item += str(round(i4_mean.item(), 7)) + ' ' str_item += str(round(i4_var.item(), 7)) + ' ' # str_item+=str(round(i5_mean.item(),7))+' ' # str_item+=str(round(i5_var.item(),7))+' ' str_item += str(round(i_mean.item(), 7)) + ' ' str_item += str(round(i_var.item(), 7)) + ' ' print(str_user) print(str_item) return gcn_users_embedding, gcn_items_embedding, str_user, str_item
def concord_cc2(r1, r2): mean_pred = torch.mean((r1 - torch.mean(r1))*(r2 - torch.mean(r2))) return (2*mean_pred)/(torch.var(r1) + torch.var(r2) + (torch.mean(r1)- torch.mean(r2))**2)
def get_samples_F_VAE_metric_v2(model, L, num_votes, args, used_smaples=None): """This is the one that is used in the paper """ dataset_loader = return_data(args) N = len(dataset_loader.dataset) # number of data samples K = args.z_dim # number of latent variables nparams = 2 qz_params = torch.Tensor(N, K, nparams) n = 0 with torch.no_grad(): for xs, _ in dataset_loader: batch_size = xs.shape[0] qz_params[n:n + batch_size] = model.module.encoder(xs.cuda()).view( batch_size, model.module.z_dim, nparams).data n += batch_size mu, logstd_var = qz_params.select(-1, 0), qz_params.select(-1, 1) z = model.module.reparam(mu, logstd_var) KLDs = model.module.kld_unit_guassians_per_sample(mu, logstd_var).mean(0) # discarding latent dimensions with small KLD # idx = torch.where(KLDs>1e-2)[0] global_var = torch.var(mu, axis=0) idx = torch.where(global_var > 5e-2)[0] mu = mu[:, idx] K = mu.shape[1] list_samples = [] global_var = global_var[idx] if args.dataset == 'dsprites': if used_smaples == None: used_smaples = [] factors = [3, 6, 40, 32, 32] for f in factors: used_smaples.append([0 for _ in range(f)]) # 5 is the number of generative factors num_votes_per_factor = num_votes / 5 num_samples_per_factor = int(num_votes_per_factor * L) mu = mu.view(3, 6, 40, 32, 32, K) # for factor in range(3): # if factor == 0: # shape_fixed = mu[0,:,:,:,:].view(1,6*40*32*32,K) # else: # shape_fixed = torch.cat([shape_fixed, mu[factor,:,:,:,:].view(1,6*40*32*32,K)],dim=0) shape_fixed = torch.zeros((num_samples_per_factor, K)) for idx in range(0, num_samples_per_factor, L): fixed = torch.randint(0, 3, (1, )) shape_fixed[idx:idx + L] = mu[fixed, :, :, :, :].view( 6 * 40 * 32 * 32, K)[torch.randint(0, 6 * 40 * 32 * 32, (L, )), :] list_samples.append(shape_fixed) del shape_fixed scale_fixed = torch.zeros((num_samples_per_factor, K)) for idx in range(0, num_samples_per_factor, L): fixed = torch.randint(0, 6, (1, )) scale_fixed[idx:idx + L] = mu[:, fixed, :, :, :].view( 3 * 40 * 32 * 32, K)[torch.randint(0, 3 * 40 * 32 * 32, (L, )), :] list_samples.append(scale_fixed) del scale_fixed orientation_fixed = torch.zeros((num_samples_per_factor, K)) for idx in range(0, num_samples_per_factor, L): fixed = torch.randint(0, 40, (1, )) orientation_fixed[idx:idx + L] = mu[:, :, fixed, :, :].view( 3 * 6 * 32 * 32, K)[torch.randint(0, 3 * 6 * 32 * 32, (L, )), :] list_samples.append(orientation_fixed) del orientation_fixed posx_fixed = torch.zeros((num_samples_per_factor, K)) for idx in range(0, num_samples_per_factor, L): fixed = torch.randint(0, 32, (1, )) posx_fixed[idx:idx + L] = mu[:, :, :, fixed, :].view( 3 * 6 * 40 * 32, K)[torch.randint(0, 3 * 6 * 40 * 32, (L, )), :] list_samples.append(posx_fixed) del posx_fixed posy_fixed = torch.zeros((num_samples_per_factor, K)) for idx in range(0, num_samples_per_factor, L): idx = used_smaples[4][fixed] posy_fixed[idx:idx + L] = mu[:, :, :, :, fixed].view( 3 * 6 * 40 * 32, K)[torch.randint(0, 3 * 6 * 40 * 32, (L, )), :] list_samples.append(posy_fixed) del posy_fixed else: pass return list_samples, global_var, used_smaples
def getDistilData(teacher_model, dataset, batch_size, num_batch=1, for_inception=False): """ Generate distilled data according to the BatchNorm statistics in the pretrained single-precision model. Currently only support a single GPU. teacher_model: pretrained single-precision model dataset: the name of the dataset batch_size: the batch size of generated distilled data num_batch: the number of batch of generated distilled data for_inception: whether the data is for Inception because inception has input size 299 rather than 224 """ # initialize distilled data with random noise according to the dataset dataloader = getRandomData(dataset=dataset, batch_size=batch_size, for_inception=for_inception) eps = 1e-6 # initialize hooks and single-precision model hooks, hook_handles, bn_stats, refined_gaussian = [], [], [], [] if torch.cuda.is_available(): teacher_model = teacher_model.cuda() teacher_model = teacher_model.eval() # get number of BatchNorm layers in the model layers = sum([ 1 if isinstance(layer, nn.BatchNorm2d) else 0 for layer in teacher_model.modules() ]) for n, m in teacher_model.named_modules(): if isinstance(m, nn.Conv2d) and len(hook_handles) < layers: # register hooks on the convolutional layers to get the intermediate output after convolution and before BatchNorm. hook = output_hook() hooks.append(hook) hook_handles.append(m.register_forward_hook(hook.hook)) if isinstance(m, nn.BatchNorm2d): # get the statistics in the BatchNorm layers if torch.cuda.is_available(): bn_stats.append( (m.running_mean.detach().clone().flatten().cuda(), torch.sqrt(m.running_var + eps).detach().clone().flatten().cuda())) else: bn_stats.append((m.running_mean.detach().clone().flatten(), torch.sqrt(m.running_var + eps).detach().clone().flatten())) assert len(hooks) == len(bn_stats) for i, gaussian_data in enumerate(dataloader): if i == num_batch: break # initialize the criterion, optimizer, and scheduler if torch.cuda.is_available(): gaussian_data = gaussian_data.cuda() gaussian_data.requires_grad = True crit = nn.CrossEntropyLoss().cuda() optimizer = optim.Adam([gaussian_data], lr=0.5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, min_lr=1e-4, verbose=False, patience=100) if torch.cuda.is_available(): input_mean = torch.zeros(1, 3).cuda() input_std = torch.ones(1, 3).cuda() else: input_mean = torch.zeros(1, 3) input_std = torch.ones(1, 3) for it in range(500): teacher_model.zero_grad() optimizer.zero_grad() for hook in hooks: hook.clear() output = teacher_model(gaussian_data) mean_loss = 0 std_loss = 0 # compute the loss according to the BatchNorm statistics and the statistics of intermediate output for cnt, (bn_stat, hook) in enumerate(zip(bn_stats, hooks)): tmp_output = hook.outputs bn_mean, bn_std = bn_stat[0], bn_stat[1] tmp_mean = torch.mean(tmp_output.view(tmp_output.size(0), tmp_output.size(1), -1), dim=2) tmp_std = torch.sqrt( torch.var(tmp_output.view(tmp_output.size(0), tmp_output.size(1), -1), dim=2) + eps) mean_loss += own_loss(bn_mean, tmp_mean) std_loss += own_loss(bn_std, tmp_std) tmp_mean = torch.mean(gaussian_data.view(gaussian_data.size(0), 3, -1), dim=2) tmp_std = torch.sqrt( torch.var(gaussian_data.view(gaussian_data.size(0), 3, -1), dim=2) + eps) mean_loss += own_loss(input_mean, tmp_mean) std_loss += own_loss(input_std, tmp_std) total_loss = mean_loss + std_loss # update the distilled data total_loss.backward() optimizer.step() scheduler.step(total_loss.item()) refined_gaussian.append(gaussian_data.detach().clone()) for handle in hook_handles: handle.remove() return refined_gaussian
# convert to torch tensor wd_tensor_x = torch.from_numpy(wine_data_x) wd_tensor_y = torch.from_numpy(wine_data_y) # print(wd_tensor_y) # torch can provide one-hot encoding mostly used for qualitative labels colors etc.. # target_onehot = torch.zeros(wd_tensor_y.shape[0], 10) # target_onehot.scatter_(1, wd_tensor_y.unsqueeze(1), 1.0) # 2: Normalize your data d_mean = torch.mean( wd_tensor_x, dim=0) # dim=0 refers to reduction is performed at 0 dimension d_var = torch.var(wd_tensor_x, dim=0) d_normalized = (wd_tensor_x - d_mean) / torch.sqrt(d_var) # print(d_normalized) # 3: separate the target data (bad, mid, good) bad_indexes = torch.le(wd_tensor_y, 3) # get all the value's indices less than 3 in y # print(bad_indexes.shape, bad_indexes.dtype, bad_indexes.sum()) bad_data = wd_tensor_x[bad_indexes] # binary operator based on condition mid_data = wd_tensor_x[torch.gt(wd_tensor_y, 3) & torch.le(wd_tensor_y, 7)] good_data = wd_tensor_x[torch.gt(wd_tensor_y, 7)] # calculate means of all columns bad_mean = torch.mean(bad_data, dim=0) # dimension must be entered mid_mean = torch.mean(mid_data, dim=0)
def variance(self, pos): '''Variance of the energy at the sampling points.''' return torch.var(self.local_energy(pos))
def compute_uncertainty_batch(model, input_images, input_states, actions, targets=None, car_sizes=None, npred=200, n_models=10, Z=None, dirname=None, detach=True, compute_total_loss=False): """ Compute variance over n_models prediction per input + action :param model: predictive model :param input_images: input context states (traffic + lanes) :param input_states: input states (position + velocity) :param actions: expert / policy actions (longitudinal + transverse acceleration) :param npred: number of future predictions :param n_models: number of predictions per given input + action :param Z: predictive model latent samples :param detach: do not retain computational graph :param compute_total_loss: return overall loss :return: """ bsize = input_images.size(0) if Z is None: Z = model.sample_z(bsize * npred, method='fp') if type(Z) is list: Z = Z[0] Z = Z.view(bsize, npred, -1) input_images.unsqueeze_(0) input_states.unsqueeze_(0) actions.unsqueeze_(0) Z_rep = Z.unsqueeze(0) input_images = input_images.expand(n_models, bsize, model.opt.ncond, 3, model.opt.height, model.opt.width) input_states = input_states.expand(n_models, bsize, model.opt.ncond, 4) actions = actions.expand(n_models, bsize, npred, 2) Z_rep = Z_rep.expand(n_models, bsize, npred, -1) input_images = input_images.contiguous() input_states = input_states.contiguous() actions = actions.contiguous() Z_rep = Z_rep.contiguous() input_images = input_images.view(bsize * n_models, model.opt.ncond, 3, model.opt.height, model.opt.width) input_states = input_states.view(bsize * n_models, model.opt.ncond, 4) actions = actions.view(bsize * n_models, npred, 2) Z_rep = Z_rep.view(n_models * bsize, npred, -1) model.train() # turn on dropout, for uncertainty estimation pred_images, pred_states, pred_costs = [], [], [] for t in range(npred): z = Z_rep[:, t] pred_image, pred_state = model.forward_single_step( input_images, input_states, actions[:, t], z) if detach: pred_image.detach_() pred_state.detach_() input_images = torch.cat((input_images[:, 1:], pred_image), 1) input_states = torch.cat( (input_states[:, 1:], pred_state.unsqueeze(1)), 1) pred_images.append(pred_image) pred_states.append(pred_state) if npred > 1: pred_images = torch.stack(pred_images, 1).squeeze() pred_states = torch.stack(pred_states, 1).squeeze() else: pred_images = torch.stack(pred_images, 1)[:, 0] pred_states = torch.stack(pred_states, 1)[:, 0] if hasattr(model, 'cost'): pred_costs = model.cost(pred_images.view(-1, 3, 117, 24), pred_states.data.view(-1, 4)) pred_costs = pred_costs.view(n_models, bsize, npred, 2) pred_costs = pred_costs[:, :, :, 0] + model.opt.lambda_l * pred_costs[:, :, :, 1] if detach: pred_costs.detach_() # pred_costs, _ = utils.proximity_cost(pred_images, pred_states.data, car_sizes.unsqueeze(0).expand( # n_models, bsize, 2).contiguous().view(n_models * bsize, 2), unnormalize=True, s_mean=model.stats['s_mean'], # s_std=model.stats['s_std']) pred_images = pred_images.view(n_models, bsize, npred, -1) pred_states = pred_states.view(n_models, bsize, npred, -1) pred_costs = pred_costs.view(n_models, bsize, npred, -1) # use variance rather than standard deviation, since it is not differentiable at 0 due to sqrt pred_images_var = torch.var(pred_images, 0).mean(2) pred_states_var = torch.var(pred_states, 0).mean(2) pred_costs_var = torch.var(pred_costs, 0).mean(2) pred_costs_mean = torch.mean(pred_costs, 0) pred_images = pred_images.view(n_models * bsize, npred, 3, model.opt.height, model.opt.width) pred_states = pred_states.view(n_models * bsize, npred, 4) if hasattr(model, 'value_function'): pred_v = model.value_function( pred_images[:, -model.value_function.opt.ncond:], pred_states[:, -model.value_function.opt.ncond:].data) if detach: pred_v = pred_v.data pred_v = pred_v.view(n_models, bsize) pred_v_var = torch.var(pred_v, 0).mean() pred_v_mean = torch.mean(pred_v, 0) else: pred_v_mean = torch.zeros(bsize).cuda() pred_v_var = torch.zeros(bsize).cuda() if compute_total_loss: # this is the uncertainty loss of different terms together. We don't include the uncertainty # of the value function, it's normal to have high uncertainty there. u_loss_costs = torch.relu((pred_costs_var - model.u_costs_mean) / model.u_costs_std - model.opt.u_hinge) u_loss_states = torch.relu((pred_states_var - model.u_states_mean) / model.u_states_std - model.opt.u_hinge) u_loss_images = torch.relu((pred_images_var - model.u_images_mean) / model.u_images_std - model.opt.u_hinge) total_u_loss = u_loss_costs.mean() + u_loss_states.mean( ) + u_loss_images.mean() else: total_u_loss = None return pred_images_var, pred_states_var, pred_costs_var, pred_v_var, pred_costs_mean, pred_v_mean, total_u_loss
def _value_function(self, data_dict): values = data_dict['values'] rewards = data_dict['rewards'] explained_variance = 1 - torch.var(rewards - values) / torch.var(rewards) return explained_variance.item()
def check(): x = t.torch() assert tn.relative_error(tn.mean(t), torch.mean(x)) <= 1e-3 assert tn.relative_error(tn.var(t), torch.var(x)) <= 1e-3 assert tn.relative_error(tn.norm(t), torch.norm(x)) <= 1e-3
def forward(self, x, v, v_q, candidates, writer=None, i=None): assert(v_q.shape[0]==x.shape[0]) # Obtain a bs x node_state_dim embedding for each query pose embedded_query_frames = self.composer(x, v, v_q, unsqueeze=False) embedded_query_frames = embedded_query_frames.view(-1, embedded_query_frames.shape[2]) assert(embedded_query_frames.shape == (v_q.shape[0]*v_q.shape[1], 256 + (self.number_of_coordinates_copies*7))) if writer: writer.add_scalar('mean of norm of raw embedding with concat coords (train)', torch.mean(torch.norm(embedded_query_frames, dim=1, keepdim=True)), i) writer.add_scalar('std of norm of raw embedding with concat coords (train)', torch.std(torch.norm(embedded_query_frames, dim=1, keepdim=True)), i) writer.add_scalar('var of norm of raw embedding with concat coords (train)', torch.var(torch.norm(embedded_query_frames, dim=1, keepdim=True)), i) # Feed to 2-layer feed forward to get to size 254 embedded_query_frames = self.post_embedding_processor(embedded_query_frames) assert(embedded_query_frames.shape == (v_q.shape[0]*v_q.shape[1], 254)) # Convolutional encoding of candidates without the composer, to size 254 candidates_embeddings = self.candidates_encoder(candidates) assert(candidates_embeddings.shape == (candidates.shape[0], 254)) return embedded_query_frames, candidates_embeddings