def fit_transform(self, X, y): N, d = X.shape classes = y.unique() # compute S_w S_w = torch.zeros(d, d) for c in classes: x_c = X[y == c] pi_c = x_c.shape[0] / N S_w += pi_c * cov(x_c.t()) # compute S_b C = cov(X.t()) S_b = C - S_w M = S_w.inverse() @ S_b # compute eigen value and eigen vector eigvals, eigvecs = M.eig(True) indices = eigvals[:, 0].sort(descending=True)[1][:self.n_dims] self.w = eigvecs[:, indices] return X @ self.w
def minvar_nls_loo(sim): T, N = sim.shape X = sim.X P = np.zeros((N, N)) q = np.zeros(N) for k in range(T): _k = list(range(T)) del _k[k] S_k = cov(X[_k, :]) _, U_k = eig(S_k) Xk = X[k].reshape(N, 1) C_k = U_k.T @ Xk @ Xk.T @ U_k alpha_k = U_k.T @ np.ones(N) A_k = np.diag(alpha_k) P += A_k @ C_k.T @ C_k @ A_k q += -A_k @ C_k.T @ alpha_k #@for z = np.linalg.solve(P, -q) d = 1 / z return d
def hook_fn(self, module, input, output): # self.layer_channels[module] = output # here not needed b/c not used again # pool = nn.AdaptiveAvgPool2d(10) pool = nn.AvgPool2d(output.size()[2:]) analyse = pool(output) analyse = analyse.view(analyse.size()[0], -1, 1) if 0 < len(analyse): analyse = torch.cat((analyse[0], analyse[1]), dim=1) covm = cov(analyse) self.covariance_matrices.append(covm) self.eigenvalues.append(torch.symeig(covm))
def fit_transform(self, X): self.mean = X.mean(0, keepdim=True) X -= self.mean C = cov(X.t()) eigvals, eigvecs = C.eig(True) indices = eigvals[:, 0].sort(descending=True)[1][:self.n_dims] self.w = eigvecs[:, indices] return X @ self.w
def __init__(self, n=100, N=1000, T=1.00, a=-0.4): """ Constructor for class """ # Basic assignments self.T = T # Maturity self.n = n # Number of time steps self.dt = 1.0 / self.n # Step size self.s = int(self.n * self.T) # Steps self.t = np.linspace(0, self.T, 1 + self.s)[np.newaxis, :] # Time grid self.a = a # Alpha = H - 0.5 self.N = N # Paths # Construct hybrid scheme correlation structure self.e = np.array([0, 0]) self.c = utils.cov(self.a, self.n)
def stepTraining(self, batch_x): this_batch_size = batch_x.size()[0] batch_x = batch_x.to(self.device) self.G.train() self.E.train() with torch.enable_grad(): r""" \mathbb{E}_{q_{\phi}(z \mid x )} \log p_\theta(x \mid z) - \mathrm{KL}(q_{\phi}(z \mid x ) \| p(z)) - \lambda (\sum_{i \neq j} cov( \mu(x) )_{ij}^2 + 10 * \sum_i ( cov( \mu(x) )_{ii} - 1)^2 ) """ # encode hidden_code = self.E(batch_x) mu, log_var = torch.chunk(hidden_code, 2, dim=1) # mean and log variance. z = self._reparametrize(mu, log_var) # decode out = self.G(z) # two losses of vae reconstruction_loss = F.mse_loss( out, batch_x, reduction='sum').div(this_batch_size) disentangled_loss = self._kl_divergence( mu, log_var).div(this_batch_size) # the moments matching cov_matching_loss = utils.cov(mu.t()).triu(diagonal=1).pow(2).sum() \ + 10 * torch.var(mu, 0).sub(1).pow(2).sum() # final loss total_loss = reconstruction_loss + disentangled_loss + self.lambda_ * cov_matching_loss self.vae_optimizer.zero_grad() total_loss.backward() self.vae_optimizer.step() loss_dict = { 'reconstruction_loss': reconstruction_loss.item(), 'disentangled_loss': disentangled_loss.item(), 'cov_matching_loss': cov_matching_loss.item(), 'total_loss': total_loss.item(), } return loss_dict
err_t4 = [] err_t5 = [] for i, n in enumerate(n_l): args.n = int(n) non_pr = [] covs_t1 = [] covs_t2 = [] covs_t3 = [] covs_t4 = [] covs_t5 = [] print(n) for i in range(100): if i % 50 == 0: print(i) X = torch.distributions.MultivariateNormal(dist_mean, dist_cov).sample((args.n,)) non_pr.append(mahalanobis_dist(utils.cov(X.clone()), dist_cov)) args.t = 1 args.rho = Ps1 covs_t1.append(mahalanobis_dist(cov_est(X.clone(), args), dist_cov)) args.t = 2 args.rho = Ps2 covs_t2.append(mahalanobis_dist(cov_est(X.clone(), args), dist_cov)) args.t = 3 args.rho = Ps3 covs_t3.append(mahalanobis_dist(cov_est(X.clone(), args), dist_cov)) args.t = 4 args.rho = Ps4
def cov_est(self): ''' Calculates sample eigenvalues and eigenvectors from matrix of returns X ''' self.S = S = cov(self.X) self.lam, self.U = eig(S)
def doc_word_embed_content_noise(content_path, noise_path, whiten_path=None, content_lines=None, noise_lines=None, opt=None): no_add_set = set() doc_word_embed_f = doc_word_embed_sen content_words_ar, content_word_embeds = doc_word_embed_f( content_path, no_add_set, content_lines=content_lines) words_set = set(content_words_ar) noise_words_ar, noise_word_embeds = doc_word_embed_f( noise_path, set(content_words_ar), content_lines=noise_lines) content_words_ar.extend(noise_words_ar) words_ar = content_words_ar word_embeds = torch.cat((content_word_embeds, noise_word_embeds), dim=0) whitening = opt.whiten if opt is not None else True #True #April, temporary normalize by inlier covariance! if whitening and whiten_path is not None: #use an article of data in the inliers topic to whiten data. whiten_ar, whiten_word_embeds = doc_word_embed_f( whiten_path, set() ) #, content_lines=content_lines)#,content_lines=content_lines) ######april!! whiten_cov = utils.cov(whiten_word_embeds) fast_whiten = False #True if not fast_whiten: U, D, V_t = linalg.svd(whiten_cov) #D_avg = D.mean() #D[len(D)//2] #print('D_avg! {}'.format(D_avg)) cov_inv = torch.from_numpy( np.matmul(linalg.pinv(np.diag(np.sqrt(D))), U.transpose())).to(utils.device) #cov_inv = torch.from_numpy(np.matmul(U, np.matmul(linalg.pinv(np.diag(np.sqrt(D))), V_t))).to(utils.device) word_embeds0 = word_embeds #change multiplication order! word_embeds = torch.mm(cov_inv, word_embeds.t()).t() if False: after_cov = utils.cov(word_embeds) U1, D1, V_t1 = linalg.svd(after_cov) pdb.set_trace() content_whitened = torch.mm(cov_inv, content_word_embeds.t()).t() after_cov2 = utils.cov(content_whitened) _, D1, _ = linalg.svd(after_cov2) print('after whitening D {}'.format(D1[:7])) else: #### faster whitening sv = decom.TruncatedSVD(30) sv.fit(whiten_cov.cpu().numpy()) top_evals, top_evecs = sv.singular_values_, sv.components_ top_evals = torch.from_numpy(1 / np.sqrt(top_evals)).to( utils.device) top_evecs = torch.from_numpy(top_evecs).to(utils.device) #pdb.set_trace() X = word_embeds projected = torch.mm(top_evecs.t() / (top_evecs**2).sum(-1), torch.mm(top_evecs, X.t())).t() #eval_ones = torch.eye(len(top_evals), device=top_evals.device) ##projected = torch.mm(torch.mm(top_evecs.t(), eval_ones), torch.mm(top_evecs, X.t())).t() #(d x k) * (k x d) * (d x n), project onto and squeeze the components along top evecs ##word_embeds = torch.mm((top_evecs/top_evals.unsqueeze(-1)).t(), torch.mm(top_evecs, X.t())).t() + (X-torch.mm(top_evecs.t(), torch.mm(top_evecs, X.t()) ).t()) #pdb.set_trace() ##word_embeds = torch.mm((top_evecs/(top_evals*(top_evecs**2).sum(-1)).unsqueeze(-1)).t(), torch.mm(top_evecs, X.t())).t() + (X-projected ) #word_embeds = torch.mm((top_evecs/(top_evals*(top_evecs**2).sum(-1)).unsqueeze(-1)).t(), torch.mm(top_evecs, X.t())).t() + (X-projected ) word_embeds = torch.mm(torch.mm(top_evecs.t(), top_evals.diag()), torch.mm(top_evecs, X.t())).t() + (X - projected) noise_idx = torch.LongTensor( list(range(len(content_word_embeds), len(word_embeds)))).to(utils.device) if False: #normalie per direction word_embeds_norm = ((word_embeds - word_embeds.mean(0))**2).sum( dim=1, keepdim=True).sqrt() debug_top_dir = False if debug_top_dir: w1 = (content_word_embeds - word_embeds.mean(0) ) #/word_embeds_norm[:len(content_word_embeds)] w2 = (noise_word_embeds - word_embeds.mean(0) ) #/word_embeds_norm[len(content_word_embeds):] mean_diff = ((w1.mean(0) - w2.mean(0))**2).sum().sqrt() w1_norm = (w1**2).sum(-1).sqrt().mean() w2_norm = (w2**2).sum(-1).sqrt().mean() X = (word_embeds - word_embeds.mean(0)) #/word_embeds_norm cov = torch.mm(X.t(), X) / word_embeds.size(0) U, D, V_t = linalg.svd(cov.cpu().numpy()) U1 = torch.from_numpy(U[1]).to(utils.device) mean1_dir = w1.mean(0) mean1_proj = (mean1_dir * U1).sum() mean2_dir = w2.mean(0) mean2_proj = (mean2_dir * U1).sum() diff_proj = ((mean1_dir - mean2_dir) * U1).sum() #plot histogram of these projections proj1 = (w1 * U1).sum(-1) proj2 = (w2 * U1).sum(-1) utils.hist(proj1, 'inliers') utils.hist(proj2, 'outliers') pdb.set_trace() #word_embeds=(word_embeds - word_embeds.mean(0))/word_embeds_norm return words_ar, word_embeds, noise_idx
def alpha16(df): """ Alpha#16 (-1 * rank(covariance(rank(high), rank(volume), 5))) """ return (-1 * u.rank(u.cov(u.rank(df.high), u.rank(df.volume), 5)))
def alpha13(df): """ Alpha#13 (-1 * rank(covariance(rank(close), rank(volume), 5))) """ return (-1 * u.rank(u.cov(u.rank(df.close), u.rank(df.volume), 5)))
def sample_batch(self, batch_size, target_rng=255.): """ Sample a batch. batch_size: (int) size of batch Returns batch: (tensor) labels: (tensor) params: (dict) the sampled parameters for images in this batch Hold object properties constant for now across +/- samples. Fix later. """ if not torch.is_tensor(target_rng): target_rng = torch.tensor(target_rng).float() if self.siamese: image_batch = torch.zeros( (batch_size, self.img_size, self.img_size, 2), requires_grad=self.batch_grad) else: image_batch = torch.zeros( (batch_size, self.img_size, self.img_size), requires_grad=self.batch_grad) image_batch = image_batch.to(self.device) label_batch = torch.zeros((batch_size, 1), dtype=torch.long, device=self.device) num_object_ps = self.sample_lambda0_r(batch_size=batch_size, d=self.dists[0]) num_objects = num_object_ps.rsample([batch_size]).abs() if self.dists[0]['family'] == 'categorical': num_objects = self.st_op(num_objects) obj_cat = torch.arange(1, num_objects.shape[-1] + 1, dtype=num_objects.dtype, requires_grad=True).to(self.device) obj_cat = obj_cat.reshape(1, -1, 1, 1) obj_cat = obj_cat.repeat(batch_size, 1, 1, 1) num_objects = (obj_cat * num_objects.reshape( batch_size, self.max_objects, 1, 1)).sum(1, keepdims=True) num_objects = torch.abs( torch.clamp(-(obj_cat - self.min_objects - num_objects), 0, 1)) elif self.dists[0]['family'] == 'relaxed_bernoulli': num_objects = num_object_ps.rsample([batch_size]) num_objects = self.st_op(num_objects) num_objects[:, :self.min_objects] = 1. elif ('gaussian' in self.dists[0]['family'] or 'normal' in self.dists[0]['family']): num_objects = (num_objects.round() - num_objects).detach() + num_objects num_objects = torch.clamp(num_objects.reshape(-1, 1, 1, 1), self.min_objects, self.max_objects) obj_cat = torch.arange(1, self.max_objects + 1, dtype=num_objects.dtype, requires_grad=True).to(self.device) obj_cat = obj_cat.reshape(1, -1, 1, 1) obj_cat = obj_cat.repeat(batch_size, 1, 1, 1) num_objects = torch.abs( torch.clamp(-(obj_cat - self.min_objects + 1 - num_objects), 0, 1)) # noqa else: raise NotImplementedError(self.dists[0]['family']) dynamic_range_ps = self.sample_lambda0_r( batch_size=batch_size, d=self.dists[2], offset=self.min_dynamic_range) # Dist object... used to have + 2 dynamic_range = torch.tanh( dynamic_range_ps.rsample( (batch_size, self.max_objects, self.img_size, self.img_size))) object_size_ps = self.sample_lambda0_r(batch_size=batch_size, d=self.dists[1], offset=1) if self.one_object_size_per_batch: object_sizes = object_size_ps.rsample([batch_size]).abs() if self.dists[1]['family'] == 'categorical': object_sizes = self.argmax(self.st_op(object_sizes)) else: object_sizes = object_size_ps.rsample( [batch_size, self.max_objects]).abs() if self.dists[1]['family'] == 'categorical': object_sizes = self.st_op(object_sizes) object_sizes = self.argmax(object_sizes) elif ('gaussian' in self.dists[1]['family'] or 'normal' in self.dists[1]['family']): object_sizes = (object_sizes.round() - object_sizes).detach() + object_sizes else: raise NotImplementedError(self.dists[1]['family']) object_sizes = object_sizes + self.min_object_size object_radiuses = torch.clamp(object_sizes, self.min_object_size, self.max_object_size) y_range = torch.arange(0, self.img_size).to(self.device) # v1 x_range = torch.arange(0, self.img_size).to(self.device) # v1 yys, xxs = torch.meshgrid(y_range, x_range) # v1 yys = yys.unsqueeze(0).repeat(self.max_objects, 1, 1).float() # v1 xxs = xxs.unsqueeze(0).repeat(self.max_objects, 1, 1).float() # v1 gau = self.sample_lambda0_r(d=self.dists[3], batch_size=batch_size) # Object location grids -- See (1) below for explanation cyys, cxxs = torch.meshgrid(torch.arange(self.grid_res), torch.arange(self.grid_res)) adj_ceil = self.img_size - self.max_object_size # y_offset = (self.img_size - cyys.max()) / 2 # x_offset = (self.img_size - cxxs.max()) / 2 # cyys = cyys + y_offset # cxxs = cxxs + x_offset loc_grid = torch.stack([cyys.reshape(-1), cxxs.reshape(-1)]).to(self.device) for bidx in range(batch_size): # Sample size of objects object_radius = object_radiuses[bidx] lab = (torch.rand(1) > .5).float() if lab == 1 and not self.one_object_size_per_batch: object_radius[1] = object_radius[0] # Copy the sizes # (1) Create a grid of locations, where objects will be placed # Random uniform per location, then select the self.max_objects top locations # Scale the positions of the grid (plus random jitter) # Choose the selected object locations in the masking step below positions = torch.rand(loc_grid.shape[1], requires_grad=False, device=self.device) position_thresh = torch.argsort(positions)[:self.max_objects] # Gradient for spatial scale comes from here: # coords = loc_grid[position_thresh] loc_scale = gau.rsample([2]) # .abs() loc_scale = (loc_scale.ceil() - loc_scale).detach() + loc_scale coords = loc_grid * loc_scale.reshape(-1, 1) max_coords = coords.max(1)[0] y_offset = ((self.img_size - max_coords[0]) / 2).floor() x_offset = ((self.img_size - max_coords[1]) / 2).floor() coords = coords[:, position_thresh] + torch.stack( (y_offset, x_offset)).reshape(-1, 1) coords = torch.clamp(coords, 0, adj_ceil) # Draw objects by = coords[0].reshape(self.max_objects, 1, 1) bx = coords[1].reshape(self.max_objects, 1, 1) obj_d = torch.pow(yys - by, 2) + torch.pow(xxs - bx, 2) if self.one_object_size_per_batch: obj_mask = torch.clamp( ((object_radius.reshape(1, 1, 1) + 1) - obj_d), 0, 1) else: obj_mask = torch.clamp( ((object_radius.reshape(self.max_objects, 1, 1) + 1) - obj_d), 0, 1) obj = obj_mask * dynamic_range[bidx] if lab == 1: q_idx = torch.nonzero(obj[0]) # Query t_idx = torch.nonzero(obj[1]) # Target same_tex = dynamic_range[bidx, 0, q_idx[:, 0], q_idx[:, 1]] obj[1, t_idx[:, 0], t_idx[:, 1]] = same_tex # Mask to only show num_objects locations if self.dists[0]['family'] == 'categorical': obj = obj * num_objects[bidx] else: obj = obj * num_objects[bidx].reshape(self.max_objects, 1, 1) # Aggregate the batch if self.siamese: image_batch[bidx, ..., 0] = obj[0] image_batch[bidx, ..., 1] = obj[1:].sum(0) else: image_batch[bidx] = obj.sum(0) # Change task to SR if requested if self.task == 'sr': masked_coords = coords.detach() * num_objects[bidx].detach( ).squeeze(-1) # noqa masked_coords = masked_coords[torch.nonzero( masked_coords.sum(-1))] # noqa masked_coords = masked_coords.reshape(-1, 2) es, vs = torch.eig(utils.cov(masked_coords), eigenvectors=True) # theta = torch.atan2(v[1, 0], v[0, 0]) * (180. / math.pi) sorted_es = torch.argsort(es[:, 0], dim=0, descending=True) # Only real part vs = vs[:, sorted_es] # Column vectors theta = torch.atan2(torch.abs(vs[1, 0]), vs[0, 0]) * (180. / math.pi) lab = 0 if theta >= 45 and theta < 135 or theta >= 225 and theta < 315: lab = 1 # what is the elegant way of doing this ^^ label_batch[bidx] = lab # Hardcode the normalization image_batch = torch.repeat_interleave(image_batch.unsqueeze(1), 3, dim=1) image_batch = (image_batch + 1.) / 2. image_batch = image_batch - self.norm_mean image_batch = image_batch / self.norm_std # image_batch = utils.normalize_fun( # image_batch, # reshape=self.reshape, # mean=self.norm_mean, # std=self.norm_std) # # Convert labels to one-hot # y = torch.eye(self.num_classes).to(self.device) # label_batch = y[label_batch].squeeze(1).long() del yys # v1 del xxs # v1 del y_range, x_range return image_batch, label_batch.squeeze()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', choices=[ 'swissroll', '8gaussians', 'pinwheel', 'circles', 'moons', '2spirals', 'checkerboard', 'rings' ], type=str, default='moons') parser.add_argument('--niters', type=int, default=10000) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--test_batch_size', type=int, default=1000) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--weight_decay', type=float, default=0) parser.add_argument('--critic_weight_decay', type=float, default=0) parser.add_argument('--save', type=str, default='/tmp/test_lsd') parser.add_argument('--mode', type=str, default="lsd", choices=['lsd', 'sm']) parser.add_argument('--viz_freq', type=int, default=100) parser.add_argument('--save_freq', type=int, default=10000) parser.add_argument('--log_freq', type=int, default=100) parser.add_argument('--base_dist', action="store_true") parser.add_argument('--c_iters', type=int, default=5) parser.add_argument('--l2', type=float, default=10.) parser.add_argument('--exact_trace', action="store_true") parser.add_argument('--n_steps', type=int, default=10) args = parser.parse_args() # logger utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) # fit a gaussian to the training data init_size = 1000 init_batch = sample_data(args, init_size).requires_grad_() mu, std = init_batch.mean(0), init_batch.std(0) base_dist = distributions.Normal(mu, std) # neural netz critic = networks.SmallMLP(2, n_out=2) net = networks.SmallMLP(2) ebm = EBM(net, base_dist if args.base_dist else None) ebm.to(device) critic.to(device) # for sampling init_fn = lambda: base_dist.sample_n(args.test_batch_size) cov = utils.cov(init_batch) sampler = HMCSampler(ebm, .3, 5, init_fn, device=device, covariance_matrix=cov) logger.info(ebm) logger.info(critic) # optimizers optimizer = optim.Adam(ebm.parameters(), lr=args.lr, weight_decay=args.weight_decay, betas=(.0, .999)) critic_optimizer = optim.Adam(critic.parameters(), lr=args.lr, betas=(.0, .999), weight_decay=args.critic_weight_decay) time_meter = utils.RunningAverageMeter(0.98) loss_meter = utils.RunningAverageMeter(0.98) ebm.train() end = time.time() for itr in range(args.niters): optimizer.zero_grad() critic_optimizer.zero_grad() x = sample_data(args, args.batch_size) x.requires_grad_() if args.mode == "lsd": # our method # compute dlogp(x)/dx logp_u = ebm(x) sq = keep_grad(logp_u.sum(), x) fx = critic(x) # compute (dlogp(x)/dx)^T * f(x) sq_fx = (sq * fx).sum(-1) # compute/estimate Tr(df/dx) if args.exact_trace: tr_dfdx = exact_jacobian_trace(fx, x) else: tr_dfdx = approx_jacobian_trace(fx, x) stats = (sq_fx + tr_dfdx) loss = stats.mean() # estimate of S(p, q) l2_penalty = ( fx * fx).sum(1).mean() * args.l2 # penalty to enforce f \in F # adversarial! if args.c_iters > 0 and itr % (args.c_iters + 1) != 0: (-1. * loss + l2_penalty).backward() critic_optimizer.step() else: loss.backward() optimizer.step() elif args.mode == "sm": # score matching for reference fx = ebm(x) dfdx = torch.autograd.grad(fx.sum(), x, retain_graph=True, create_graph=True)[0] eps = torch.randn_like(dfdx) # use hutchinson here as well epsH = torch.autograd.grad(dfdx, x, grad_outputs=eps, create_graph=True, retain_graph=True)[0] trH = (epsH * eps).sum(1) norm_s = (dfdx * dfdx).sum(1) loss = (trH + .5 * norm_s).mean() loss.backward() optimizer.step() else: assert False loss_meter.update(loss.item()) time_meter.update(time.time() - end) if itr % args.log_freq == 0: log_message = ( 'Iter {:04d} | Time {:.4f}({:.4f}) | Loss {:.4f}({:.4f})'. format(itr, time_meter.val, time_meter.avg, loss_meter.val, loss_meter.avg)) logger.info(log_message) if itr % args.save_freq == 0 or itr == args.niters: ebm.cpu() utils.makedirs(args.save) torch.save({ 'args': args, 'state_dict': ebm.state_dict(), }, os.path.join(args.save, 'checkpt.pth')) ebm.to(device) if itr % args.viz_freq == 0: # plot dat plt.clf() npts = 100 p_samples = toy_data.inf_train_gen(args.data, batch_size=npts**2) q_samples = sampler.sample(args.n_steps) ebm.cpu() x_enc = critic(x) xes = x_enc.detach().cpu().numpy() trans = xes.min() scale = xes.max() - xes.min() xes = (xes - trans) / scale * 8 - 4 plt.figure(figsize=(4, 4)) visualize_transform( [p_samples, q_samples.detach().cpu().numpy(), xes], ["data", "model", "embed"], [ebm], ["model"], npts=npts) fig_filename = os.path.join(args.save, 'figs', '{:04d}.png'.format(itr)) utils.makedirs(os.path.dirname(fig_filename)) plt.savefig(fig_filename) plt.close() ebm.to(device) end = time.time() logger.info('Training has finished, can I get a yeet?')