def log_uniform_candidate_sampler(self, targets, choice_func=_choice): # returns sampled, true_expected_count, sampled_expected_count # targets = (batch_size, ) # # samples = (n_samples, ) # true_expected_count = (batch_size, ) # sampled_expected_count = (n_samples, ) # see: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.h # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.cc # algorithm: keep track of number of tries when doing sampling, # then expected count is # -expm1(num_tries * log1p(-p)) # = (1 - (1-p)^num_tries) where p is self._probs[id] np_sampled_ids, num_tries = choice_func(self._num_words, self._num_samples) sampled_ids = torch.from_numpy(np_sampled_ids).to(targets.device) # Compute expected count = (1 - (1-p)^num_tries) = -expm1(num_tries * log1p(-p)) # P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1) target_probs = torch.log((targets.float() + 2.0) / (targets.float() + 1.0)) / self._log_num_words_p1 target_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-target_probs)) - 1.0) sampled_probs = torch.log((sampled_ids.float() + 2.0) / (sampled_ids.float() + 1.0)) / self._log_num_words_p1 sampled_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-sampled_probs)) - 1.0) sampled_ids.requires_grad_(False) target_expected_count.requires_grad_(False) sampled_expected_count.requires_grad_(False) return sampled_ids, target_expected_count, sampled_expected_count
def _kl_uniform_gumbel(p, q): common_term = q.scale / (p.high - p.low) high_loc_diff = (p.high - q.loc) / q.scale low_loc_diff = (p.low - q.loc) / q.scale t1 = common_term.log() + 0.5 * (high_loc_diff + low_loc_diff) t2 = common_term * (torch.exp(-high_loc_diff) - torch.exp(-low_loc_diff)) return t1 - t2
def bbox_transform_inv(boxes, deltas): # Input should be both tensor or both Variable and on the same device if len(boxes) == 0: return deltas.detach() * 0 widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights dx = deltas[:, 0::4] dy = deltas[:, 1::4] dw = deltas[:, 2::4] dh = deltas[:, 3::4] pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) pred_w = torch.exp(dw) * widths.unsqueeze(1) pred_h = torch.exp(dh) * heights.unsqueeze(1) pred_boxes = torch.cat(\ [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\ pred_ctr_y - 0.5 * pred_h,\ pred_ctr_x + 0.5 * pred_w,\ pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1) return pred_boxes
def model(num_particles): with pyro.iarange("particles", num_particles): q3 = pyro.param("q3", torch.tensor(pi3, requires_grad=True)) q4 = pyro.param("q4", torch.tensor(0.5 * (pi1 + pi2), requires_grad=True)) z = pyro.sample("z", dist.Normal(q3, 1.0).expand_by([num_particles])) zz = torch.exp(z) / (1.0 + torch.exp(z)) pyro.sample("y", dist.Bernoulli(q4 * zz))
def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None): num_wrong = wrong.size(1) batch_size = feat.size(0) feat = feat.view(-1, self.ninp, 1) right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat) wrong_dis = torch.bmm(wrong, feat) batch_wrong_dis = torch.bmm(batch_wrong, feat) wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \ + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1) loss_dis = torch.sum(torch.log(wrong_score + 1)) loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm() if fake: fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat) fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask) margin_score = F.relu(torch.log(fake_score + 1) - self.margin) loss_fake = torch.sum(margin_score) loss_dis += loss_fake loss_norm += fake.norm() loss = (loss_dis + 0.1 * loss_norm) / batch_size if fake: return loss, loss_fake.data[0] / batch_size else: return loss
def guide(): mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.334 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( self.analytic_log_sig_n.data - 0.29 * torch.ones(2), requires_grad=True)) mu_q_prime = pyro.param("mu_q_prime", Variable(torch.Tensor([-0.34, 0.52]), requires_grad=True)) kappa_q = pyro.param("kappa_q", Variable(torch.Tensor([0.74]), requires_grad=True)) log_sig_q_prime = pyro.param("log_sig_q_prime", Variable(-0.5 * torch.log(1.2 * self.lam0.data), requires_grad=True)) sig_q, sig_q_prime = torch.exp(log_sig_q), torch.exp(log_sig_q_prime) mu_latent_dist = dist.Normal(mu_q, sig_q, reparameterized=repa2) mu_latent = pyro.sample("mu_latent", mu_latent_dist, baseline=dict(use_decaying_avg_baseline=use_decaying_avg_baseline)) mu_latent_prime_dist = dist.Normal(kappa_q.expand_as(mu_latent) * mu_latent + mu_q_prime, sig_q_prime, reparameterized=repa1) pyro.sample("mu_latent_prime", mu_latent_prime_dist, baseline=dict(nn_baseline=mu_prime_baseline, nn_baseline_input=mu_latent, use_decaying_avg_baseline=use_decaying_avg_baseline)) return mu_latent
def guide(): pyro.module("mymodule", pt_guide) mu_q, tau_q = torch.exp(pt_guide.mu_q_log), torch.exp(pt_guide.tau_q_log) sigma = torch.pow(tau_q, -0.5) pyro.sample("mu_latent", dist.Normal(mu_q, sigma, reparameterized=reparameterized), baseline=dict(use_decaying_avg_baseline=True))
def get_positive_expectation(p_samples, measure, average=True): log_2 = math.log(2.) if measure == 'GAN': Ep = - F.softplus(-p_samples) elif measure == 'JSD': Ep = log_2 - F.softplus(- p_samples) elif measure == 'X2': Ep = p_samples ** 2 elif measure == 'KL': Ep = p_samples + 1. elif measure == 'RKL': Ep = -torch.exp(-p_samples) elif measure == 'DV': Ep = p_samples elif measure == 'H2': Ep = 1. - torch.exp(-p_samples) elif measure == 'W1': Ep = p_samples else: raise_measure_error(measure) if average: return Ep.mean() else: return Ep
def predictive_elbo(self, x, k, s): # No pW or qW self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def guide(num_particles): q1 = pyro.param("q1", torch.tensor(pi1, requires_grad=True)) q2 = pyro.param("q2", torch.tensor(pi2, requires_grad=True)) with pyro.iarange("particles", num_particles): z = pyro.sample("z", dist.Normal(q2, 1.0).expand_by([num_particles])) zz = torch.exp(z) / (1.0 + torch.exp(z)) pyro.sample("y", dist.Bernoulli(q1 * zz))
def get_negative_expectation(q_samples, measure, average=True): log_2 = math.log(2.) if measure == 'GAN': Eq = F.softplus(-q_samples) + q_samples elif measure == 'JSD': Eq = F.softplus(-q_samples) + q_samples - log_2 elif measure == 'X2': Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2) elif measure == 'KL': Eq = torch.exp(q_samples) elif measure == 'RKL': Eq = q_samples - 1. elif measure == 'DV': Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0)) elif measure == 'H2': Eq = torch.exp(q_samples) - 1. elif measure == 'W1': Eq = q_samples else: raise_measure_error(measure) if average: return Eq.mean() else: return Eq
def encode_and_logprob(self, x): for i in range(len(self.first_half_weights)-1): x = self.act_func(self.first_half_weights[i](x)) # pre_act = self.first_half_weights[i](x) #[B,D] # # pre_act_with_noise = Variable(torch.randn(1, self.arch_2[i][1]).type(self.dtype)) * pre_act # probs = torch.ones(1, self.arch_2[i][1]) * .5 # pre_act_with_noise = Variable(torch.bernoulli(probs).type(self.dtype)) * pre_act # x = self.act_func(pre_act_with_noise) mean = self.first_half_weights[-1](x) logvar = self.q_logvar(x) # print (logvar) #Sample eps = Variable(torch.randn(1, self.z_size)) #.type(self.dtype)) # x = (torch.sqrt(torch.exp(W_logvars)) * eps) + W_means x = (torch.exp(.5*logvar) * eps) + mean logq = -torch.mean( logvar.sum(1) + ((x - mean).pow(2)/torch.exp(logvar)).sum(1)) logp = torch.mean( x.pow(2).sum(1)) return x, logq+logp
def sample(self, mu, logvar, k): # print (mu) # print (logvar) if torch.cuda.is_available(): eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()).cuda() #[P,B,Z] # print (mu.size()) # print (logvar.size()) # print (eps.size()) z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size).cuda()), Variable(torch.zeros(self.B, self.z_size)).cuda()) #[P,B] # logqz = lognormal(z, mu, logvar) logqz = lognormal(z, Variable(mu.data), Variable(logvar.data)) else: eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_())#[P,B,Z] z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), Variable(torch.zeros(self.B, self.z_size))) #[P,B] logqz = lognormal(z, mu, logvar) return z, logpz, logqz
def forward(self, true_binary, rule_masks, raw_logits): if cmd_args.loss_type == 'binary': exp_pred = torch.exp(raw_logits) * rule_masks norm = F.torch.sum(exp_pred, 2, keepdim=True) prob = F.torch.div(exp_pred, norm) return F.binary_cross_entropy(prob, true_binary) * cmd_args.max_decode_steps if cmd_args.loss_type == 'perplexity': return my_perp_loss(true_binary, rule_masks, raw_logits) if cmd_args.loss_type == 'vanilla': exp_pred = torch.exp(raw_logits) * rule_masks + 1e-30 norm = torch.sum(exp_pred, 2, keepdim=True) prob = torch.div(exp_pred, norm) ll = F.torch.abs(F.torch.sum( true_binary * prob, 2)) mask = 1 - rule_masks[:, :, -1] logll = mask * F.torch.log(ll) loss = -torch.sum(logll) / true_binary.size()[1] return loss print('unknown loss type %s' % cmd_args.loss_type) raise NotImplementedError
def guide(): alpha_q_log = pyro.param("alpha_q_log", Variable(self.log_alpha_n.data + 0.17, requires_grad=True)) beta_q_log = pyro.param("beta_q_log", Variable(self.log_beta_n.data - 0.143, requires_grad=True)) alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log) pyro.sample("p_latent", dist.beta, alpha_q, beta_q) pyro.map_data("aaa", self.data, lambda i, x: None, batch_size=self.batch_size)
def mmd(Mxx, Mxy, Myy, sigma): scale = Mxx.mean() Mxx = torch.exp(-Mxx / (scale * 2 * sigma * sigma)) Mxy = torch.exp(-Mxy / (scale * 2 * sigma * sigma)) Myy = torch.exp(-Myy / (scale * 2 * sigma * sigma)) mmd = math.sqrt(Mxx.mean() + Myy.mean() - 2 * Mxy.mean()) return mmd
def optimize_cnt(worm_img, skel_prev, skel_width, segment_length, n_epochs = 1000): #this is the variable that is going t obe modified skel_r = skel_prev.data #+ torch.zeros(*skel_prev.size()).normal_() skel_r = torch.nn.Parameter(skel_r) optimizer = optim.Adam([skel_r], lr=0.1) for ii in range(n_epochs): skel_map = get_skel_map(skel_r, skel_width) #skel_map += 1e-3 p_w = (skel_map*worm_img) skel_map_inv = (-skel_map).add_(1) worm_img_inv = (-worm_img).add_(1) p_bng = (skel_map_inv*worm_img_inv) #p_bng = torch.sqrt(p_bng) #c_loss = F.binary_cross_entropy(p_w, p_bng) c_loss = -(p_bng*torch.log(p_w + 1.e-3) + p_w*torch.log(p_bng + 1.e-3)).mean() ds = skel_r[1:] - skel_r[:-1] dds = ds[1:] - ds[:-1] #seg_mean = seg_sizes.mean() cont_loss = ds.norm(p=2) curv_loss = dds.norm(p=2) seg_sizes = ((ds).pow(2)).sum(1).sqrt() d1 = seg_sizes-segment_length*0.9 d2 = seg_sizes-segment_length*1.5 seg_loss = (torch.exp(-d1) + torch.exp(d2)).mean() #(seg_sizes-segment_length).cosh().mean() #seg_loss = ((seg_sizes - segment_length)).cosh().mean() #seg_mean_loss = ((seg_mean-seg_sizes).abs() + 1e-5).mean() loss = 100*c_loss + 50*seg_loss + cont_loss + curv_loss #loss = 50*c_loss + seg_loss optimizer.zero_grad() loss.backward() #torch.nn.utils.clip_grad_norm([skel_r], 0.001) optimizer.step() if ii % 250 == 0: print(ii, loss.data[0], c_loss.data[0], seg_loss.data[0], cont_loss.data[0], curv_loss.data[0] ) return skel_r, skel_map
def guide(): alpha_q_log = pyro.param( "alpha_q_log", Variable(self.log_alpha_n.data + 0.17, requires_grad=True)) beta_q_log = pyro.param( "beta_q_log", Variable(self.log_beta_n.data - 0.143, requires_grad=True)) alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log) pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q)
def guide(): alpha_q_log = pyro.param("alpha_q_log", Variable(self.log_alpha_n.data + 0.17, requires_grad=True)) beta_q_log = pyro.param("beta_q_log", Variable(self.log_beta_n.data - 0.143, requires_grad=True)) alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log) p_latent = pyro.sample("p_latent", dist.beta, alpha_q, beta_q, baseline=dict(use_decaying_avg_baseline=True)) return p_latent
def guide(): alpha_q_log = pyro.param( "alpha_q_log", Variable( self.alpha_q_log_0.clone(), requires_grad=True), tags="guide") beta_q_log = pyro.param( "beta_q_log", Variable( self.beta_q_log_0.clone(), requires_grad=True), tags="guide") alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log) pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q)
def sample(self, fc_feats, att_feats, opt={}): sample_max = opt.get('sample_max', 1) beam_size = opt.get('beam_size', 1) temperature = opt.get('temperature', 1.0) if beam_size > 1: return self.sample_beam(fc_feats, att_feats, opt) batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) # embed fc and att feats fc_feats = self.fc_embed(fc_feats) _att_feats = self.att_embed(att_feats.view(-1, self.att_feat_size)) att_feats = _att_feats.view(*(att_feats.size()[:-1] + (self.rnn_size,))) # Project the attention feats first to reduce memory and computation comsumptions. p_att_feats = self.ctx2att(att_feats.view(-1, self.rnn_size)) p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,))) seq = [] seqLogprobs = [] for t in range(self.seq_length + 1): if t == 0: # input <bos> it = fc_feats.data.new(batch_size).long().zero_() elif sample_max: sampleLogprobs, it = torch.max(logprobs.data, 1) it = it.view(-1).long() else: if temperature == 1.0: prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1) else: # scale logprobs by temperature prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu() it = torch.multinomial(prob_prev, 1).cuda() sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions it = it.view(-1).long() # and flatten indices for downstream processing xt = self.embed(Variable(it, requires_grad=False)) if t >= 1: # stop when all finished if t == 1: unfinished = it > 0 else: unfinished = unfinished * (it > 0) if unfinished.sum() == 0: break it = it * unfinished.type_as(it) seq.append(it) #seq[t] the input of t+2 time step seqLogprobs.append(sampleLogprobs.view(-1)) output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state) logprobs = F.log_softmax(self.logit(output)) return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def _gaussian_kl_divergence(self, p, q): p_mean = p[0][:Z_DIM] p_logstd = p[0][Z_DIM:] p_var = T.sqrt(T.exp(p_logstd)) q_mean = q[0][:Z_DIM] q_logstd = q[0][Z_DIM:] q_var = T.sqrt(T.exp(q_logstd)) kl = (T.log(q_var/p_var) + (p_var + (p_mean-q_mean)*(p_mean-q_mean))/q_var - 1) * 0.5 return T.sum(kl)
def model(): alpha_p_log = pyro.param( "alpha_p_log", Variable( self.alpha_p_log_0.clone(), requires_grad=True), tags="model") beta_p_log = pyro.param( "beta_p_log", Variable( self.beta_p_log_0.clone(), requires_grad=True), tags="model") alpha_p, beta_p = torch.exp(alpha_p_log), torch.exp(beta_p_log) lambda_latent = pyro.sample("lambda_latent", dist.gamma, alpha_p, beta_p) pyro.observe("obs", dist.poisson, self.data, lambda_latent) return lambda_latent
def guide(): mu_q_log = pyro.param( "mu_q_log", Variable( self.log_mu_n.data + 0.17, requires_grad=True)) tau_q_log = pyro.param("tau_q_log", Variable(self.log_tau_n.data - 0.143, requires_grad=True)) mu_q, tau_q = torch.exp(mu_q_log), torch.exp(tau_q_log) pyro.sample("mu_latent", dist.normal, mu_q, torch.pow(tau_q, -0.5))
def mean_kl(self, new_dist_info, old_dist_info): old_log_std = old_dist_info[2] new_log_std = new_dist_info[2] old_std = torch.exp(old_log_std) new_std = torch.exp(new_log_std) old_mean = old_dist_info[1] new_mean = new_dist_info[1] Nr = (old_mean - new_mean) ** 2 + old_std ** 2 - new_std ** 2 Dr = 2 * new_std ** 2 + 1e-8 sample_kl = torch.sum(Nr / Dr + new_log_std - old_log_std, dim=1) return torch.mean(sample_kl)
def plot_dist2(n_components, mixture_weights, true_mixture_weights, exp_dir, name=''): # mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) rows = 1 cols = 1 fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150) col =0 row = 0 ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1) # xs = np.linspace(-9,205, 300) xs = np.linspace(-10,n_components*10 +5, 300) sum_ = np.zeros(len(xs)) # C = 20 for c in range(n_components): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) ys = [] for x in xs: component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().cpu().numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='', c='orange') ax.plot(xs, sum_, label='current', c='r') sum_ = np.zeros(len(xs)) for c in range(n_components): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) ys = [] for x in xs: component_i = (torch.exp(m.log_prob(x) )* true_mixture_weights[c]).detach().cpu().numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='', c='c') ax.plot(xs, sum_, label='true', c='b') ax.legend() ax.set_title(str(mixture_weights) +'\n'+str(true_mixture_weights), size=8, family='serif') # save_dir = home+'/Documents/Grad_Estimators/GMM/' plt_path = exp_dir+'gmm_plot_dist'+name+'.png' plt.savefig(plt_path) print ('saved training plot', plt_path) plt.close()
def bbox_transform(self, boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0), clip_value=4.135166556742356): """Forward transform that maps proposal boxes to predicted ground-truth boxes using bounding-box regression deltas. See bbox_transform_inv for a description of the weights argument. """ if boxes.size(0) == 0: return None #return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) # get boxes dimensions and centers widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights wx, wy, ww, wh = weights dx = deltas[:, 0::4] / wx dy = deltas[:, 1::4] / wy dw = deltas[:, 2::4] / ww dh = deltas[:, 3::4] / wh clip_value = Variable(torch.FloatTensor([clip_value])) if boxes.is_cuda: clip_value = clip_value.cuda() # Prevent sending too large values into np.exp() dw = torch.min(dw,clip_value) dh = torch.min(dh,clip_value) pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) pred_w = torch.exp(dw) * widths.unsqueeze(1) pred_h = torch.exp(dh) * heights.unsqueeze(1) # pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) # x1 pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w # y1 pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w - 1 # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h - 1 pred_boxes = torch.cat((pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2),1) return pred_boxes
def sample(self, mu, logvar, k): if torch.cuda.is_available(): eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()).cuda() #[P,B,Z] z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size).cuda()), Variable(torch.zeros(self.B, self.z_size)).cuda()) #[P,B] logqz = lognormal(z, mu, logvar) else: eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_())#[P,B,Z] z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), Variable(torch.zeros(self.B, self.z_size))) #[P,B] logqz = lognormal(z, mu, logvar) return z, logpz, logqz
def forward(self, x, k, s): self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] logprobs = [[] for _ in range(5)] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = elbo + (logpW*.000001) - (logqW*self.qW_weight) #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) logprobs[0].append(torch.mean(logpx)) logprobs[1].append(torch.mean(logpz)) logprobs[2].append(torch.mean(logqz)) logprobs[3].append(torch.mean(logpW)) logprobs[4].append(torch.mean(logqW)) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] #for printing # logpx = torch.mean(logpx) # logpz = torch.mean(logpz) # logqz = torch.mean(logqz) # self.x_hat_sigmoid = F.sigmoid(x_hat) logprobs2 = [torch.mean(torch.stack(aa)) for aa in logprobs] return elbo, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def rsample(self, sample_shape=torch.Size()): # Implements parallel batched accept-reject sampling. x = self.propose(sample_shape) if sample_shape else self.propose() log_prob_accept = self.log_prob_accept(x) probs = torch.exp(log_prob_accept).clamp_(0.0, 1.0) done = torch.bernoulli(probs).byte() while not done.all(): proposed_x = self.propose(sample_shape) if sample_shape else self.propose() log_prob_accept = self.log_prob_accept(proposed_x) prob_accept = torch.exp(log_prob_accept).clamp_(0.0, 1.0) accept = torch.bernoulli(prob_accept).byte() & ~done if accept.any(): x[accept] = proposed_x[accept] done |= accept return x
def conditional_lognormal_loss(model, x, t, e, pdf_u, pdf_c, hr_loss=False, imbalance_loss=False, elbo=True, risk=1): shape, scale, logits = model.forward(x) lossf = [] losss = [] k_ = shape b_ = scale loss_neg = 0 for g in range(model.k): mu = k_[:, g] sigma = b_[:, g] f = -sigma - 0.5 * np.log(2 * np.pi) f = f - torch.div((torch.log(t) - mu)**2, 2. * torch.exp(2 * sigma)) s = torch.div(torch.log(t) - mu, torch.exp(sigma) * np.sqrt(2)) s = 0.5 - 0.5 * torch.erf(s) s = torch.log(s) lossf.append(f) losss.append(s) # negative partial log likelihood hr = f - s loss_neg += PartialLogLikelihood()(hr, e) losss = torch.stack(losss, dim=1) lossf = torch.stack(lossf, dim=1) if elbo: lossg = nn.Softmax(dim=1)(logits) losss = lossg * losss lossf = lossg * lossf losss = losss.sum(dim=1) lossf = lossf.sum(dim=1) else: lossg = nn.LogSoftmax(dim=1)(logits) losss = lossg + losss lossf = lossg + lossf losss = torch.logsumexp(losss, dim=1) lossf = torch.logsumexp(lossf, dim=1) if imbalance_loss: try: idx_time = t.int().cpu().detach().numpy() idx_time[idx_time >= 10] = 9 pdf_u_ = torch.tensor(pdf_u).cuda() pdf_c_ = torch.tensor(pdf_c).cuda() lossf = lossf * ((1 - pdf_u_[idx_time]).exp()) losss = losss * ((1 - pdf_c_[idx_time]).exp()) except: pass uncens = np.where(e.cpu().data.numpy() == int(risk))[0] cens = np.where(e.cpu().data.numpy() != int(risk))[0] ll = lossf[uncens].sum() + model.discount * losss[cens].sum() if hr_loss and e.sum() > 0: return -ll / float(len(uncens) + len(cens)) + loss_neg * model.gamma else: return -ll / float(len(uncens) + len(cens))
def logisticloss(D): """ k-way logistic loss """ return torch.log2(1 + (torch.exp(D)).squeeze(-1).sum(-1))
def forward( self, x: torch.LongTensor, x_lengths: torch.LongTensor, y_lengths: torch.LongTensor, y: torch.FloatTensor = None, dr: torch.IntTensor = None, pitch: torch.FloatTensor = None, aux_input: Dict = { "d_vectors": None, "speaker_ids": None }, # pylint: disable=unused-argument ) -> Dict: """Model's forward pass. Args: x (torch.LongTensor): Input character sequences. x_lengths (torch.LongTensor): Input sequence lengths. y_lengths (torch.LongTensor): Output sequnce lengths. Defaults to None. y (torch.FloatTensor): Spectrogram frames. Only used when the alignment network is on. Defaults to None. dr (torch.IntTensor): Character durations over the spectrogram frames. Only used when the alignment network is off. Defaults to None. pitch (torch.FloatTensor): Pitch values for each spectrogram frame. Only used when the pitch predictor is on. Defaults to None. aux_input (Dict): Auxiliary model inputs for multi-speaker training. Defaults to `{"d_vectors": 0, "speaker_ids": None}`. Shapes: - x: :math:`[B, T_max]` - x_lengths: :math:`[B]` - y_lengths: :math:`[B]` - y: :math:`[B, T_max2]` - dr: :math:`[B, T_max]` - g: :math:`[B, C]` - pitch: :math:`[B, 1, T]` """ g = self._set_speaker_input(aux_input) # compute sequence masks y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).float() x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), 1).float() # encoder pass o_en, x_mask, g, x_emb = self._forward_encoder(x, x_mask, g) # duration predictor pass if self.args.detach_duration_predictor: o_dr_log = self.duration_predictor(o_en.detach(), x_mask) else: o_dr_log = self.duration_predictor(o_en, x_mask) o_dr = torch.clamp(torch.exp(o_dr_log) - 1, 0, self.max_duration) # generate attn mask from predicted durations o_attn = self.generate_attn(o_dr.squeeze(1), x_mask) # aligner o_alignment_dur = None alignment_soft = None alignment_logprob = None alignment_mas = None if self.use_aligner: o_alignment_dur, alignment_soft, alignment_logprob, alignment_mas = self._forward_aligner( x_emb, y, x_mask, y_mask) alignment_soft = alignment_soft.transpose(1, 2) alignment_mas = alignment_mas.transpose(1, 2) dr = o_alignment_dur # pitch predictor pass o_pitch = None avg_pitch = None if self.args.use_pitch: o_pitch_emb, o_pitch, avg_pitch = self._forward_pitch_predictor( o_en, x_mask, pitch, dr) o_en = o_en + o_pitch_emb # decoder pass o_de, attn = self._forward_decoder( o_en, dr, x_mask, y_lengths, g=None) # TODO: maybe pass speaker embedding (g) too outputs = { "model_outputs": o_de, # [B, T, C] "durations_log": o_dr_log.squeeze(1), # [B, T] "durations": o_dr.squeeze(1), # [B, T] "attn_durations": o_attn, # for visualization [B, T_en, T_de'] "pitch_avg": o_pitch, "pitch_avg_gt": avg_pitch, "alignments": attn, # [B, T_de, T_en] "alignment_soft": alignment_soft, "alignment_mas": alignment_mas, "o_alignment_dur": o_alignment_dur, "alignment_logprob": alignment_logprob, "x_mask": x_mask, "y_mask": y_mask, } return outputs
def log_sum_exp_batch(vecs): maxi = torch.max(vecs, 1)[0] maxi_bc = maxi[:, None].repeat(1, vecs.shape[1]) recti_ = torch.log(torch.sum(torch.exp((vecs - maxi_bc)), 1)) return (maxi + recti_)
def conditional_distributions_loss(model, x, t, e, pdf_u, pdf_c, hr_loss=False, imbalance_loss=False, elbo=True, risk='1'): shape_weibull, scale_weibull, gates_weibull, shape_lognormal, scale_lognormal, logits_lognormal, attention_weights = model.forward( x) lossf_lognormal = [] losss_lognormal = [] hr_lognormal = [] for g in range(model.k): mu = shape_lognormal[:, g] sigma = scale_lognormal[:, g] f = -sigma - 0.5 * np.log(2 * np.pi) f = f - torch.div((torch.log(t) - mu)**2, 2. * torch.exp(2 * sigma)) s = torch.div(torch.log(t) - mu, torch.exp(sigma) * np.sqrt(2)) s = 0.5 - 0.5 * torch.erf(s) s = torch.log(s) lossf_lognormal.append(f) losss_lognormal.append(s) # negative partial log likelihood hr_lognormal.append(f - s) losss_lognormal = torch.stack(losss_lognormal, dim=1) lossf_lognormal = torch.stack(lossf_lognormal, dim=1) hr_lognormal = torch.stack(hr_lognormal, dim=1) if elbo: lossg_lognormal = nn.Softmax(dim=1)(logits_lognormal) losss_lognormal = lossg_lognormal * losss_lognormal lossf_lognormal = lossg_lognormal * lossf_lognormal losss_lognormal = losss_lognormal.sum(dim=1) lossf_lognormal = lossf_lognormal.sum(dim=1) hr_lognormal = lossg_lognormal * hr_lognormal hr_lognormal = hr_lognormal.sum(dim=1) else: lossg_lognormal = nn.LogSoftmax(dim=1)(logits_lognormal) losss_lognormal = lossg_lognormal + losss_lognormal lossf_lognormal = lossg_lognormal + lossf_lognormal losss_lognormal = torch.logsumexp(losss_lognormal, dim=1) lossf_lognormal = torch.logsumexp(lossf_lognormal, dim=1) # Weibull distriubtion shapes_weibull, scales_weibull = shape_weibull.exp(), ( -scale_weibull).exp() lossf_weibull, losss_weibull = [], [] hr_weibull = [] for idx in range(model.k): eta = shapes_weibull[:, idx] beta = scales_weibull[:, idx] log_s_weibull = -(torch.pow(t / beta, eta)) log_f_weibull = torch.log(eta) - torch.log(beta) + ( (eta - 1) * (-torch.log(beta) + torch.log(t))) log_f_weibull = log_f_weibull + log_s_weibull lossf_weibull.append(log_f_weibull) losss_weibull.append(log_s_weibull) # negative partial log likelihood hr_weibull.append(torch.log(eta / beta * (t / beta)**(eta - 1))) losss_weibull = torch.stack(losss_weibull, dim=1) lossf_weibull = torch.stack(lossf_weibull, dim=1) hr_weibull = torch.stack(hr_weibull, dim=1) if elbo: lossg_weibull = nn.Softmax(dim=1)(gates_weibull) losss_weibull = lossg_weibull * losss_weibull lossf_weibull = lossg_weibull * lossf_weibull losss_weibull = losss_weibull.sum(dim=1) lossf_weibull = lossf_weibull.sum(dim=1) hr_weibull = hr_weibull * lossg_weibull hr_weibull = hr_weibull.sum(dim=1) else: lossg_weibull = nn.LogSoftmax(dim=1)(gates_weibull) losss_weibull = lossg_weibull + losss_weibull lossf_weibull = lossg_weibull + lossf_weibull losss_weibull = torch.logsumexp(losss_weibull, dim=1) lossf_weibull = torch.logsumexp(lossf_weibull, dim=1) # Combine lossf, losss = torch.stack([lossf_lognormal, lossf_weibull], dim=1), torch.stack( [losss_lognormal, losss_weibull], dim=1) weights = nn.Softmax(dim=1)(attention_weights) #hr = torch.stack([hr_weibull, hr_lognormal], dim=1) hr = torch.stack( [lossf_lognormal - losss_lognormal, lossf_weibull - losss_weibull], dim=1) hr = hr * weights hr = hr.sum(dim=1) loss_neg = PartialLogLikelihood()(hr, e) lossf = lossf * weights losss = losss * weights lossf = lossf.sum(dim=1) losss = losss.sum(dim=1) # if imbalance_loss: try: idx_time = t.int().cpu().detach().numpy() pdf_u_ = torch.tensor(pdf_u).cuda() pdf_c_ = torch.tensor(pdf_c).cuda() lossf = lossf * (1 - pdf_u_[idx_time]) #.exp() losss = losss * (1 - pdf_c_[idx_time]) #.exp() except: pass uncens = np.where(e.cpu().data.numpy() == int(risk))[0] cens = np.where(e.cpu().data.numpy() != int(risk))[0] ll = lossf[uncens].sum() + model.discount * losss[cens].sum() if hr_loss and e.sum() > 0: return -ll / float(len(uncens) + len(cens)) + loss_neg * model.gamma else: return -ll / float(len(uncens) + len(cens))
def e(self, s): return torch.exp(self.clamp * 0.636 * torch.atan(s / self.clamp))
def mutal_info(self, factors = ['shape', 'scale', 'rotation', 'x', 'y']): nsamps_per_factor = 100 per_class_cnt = {} n_factors = len(self.latent_sizes) fig = plt.figure(figsize=(5, 2*n_factors)) # fig.tight_layout() plt.subplots_adjust(hspace=.5) for fac_id in range(n_factors): n_fac_classes = self.latent_sizes[fac_id] for i in range(n_fac_classes): per_class_cnt.update({i: 0}) dl = DataLoader( self.data_loader.dataset, batch_size=100, shuffle=True, pin_memory=True) # randomly select images (with 100 different samples per class for the fixed factor) fixed_XA = [] for fac_class in range(n_fac_classes): indices = np.where(self.latent_classes[:, fac_id] == fac_class)[0] np.random.shuffle(indices) per_class_idx = indices[:nsamps_per_factor] for i in per_class_idx: img, _ = dl.dataset.__getitem__(i) if self.cuda: img = img.cuda() # img = img.squeeze(0) fixed_XA.append(img) fixed_XA = torch.stack(fixed_XA, dim=0) q = self.enc(fixed_XA, num_samples=1) batch_dim = 1 # for my model batch_size = q[self.latents['private']].value.shape[1] z_private = q[self.latents['private']].value.unsqueeze(batch_dim + 1).transpose(batch_dim, 0) z_shared = q[self.latents['shared']].value.unsqueeze(batch_dim + 1).transpose(batch_dim, 0) q_ziCx_private = torch.exp(q[self.latents['private']].dist.log_prob(z_private).transpose(1, batch_dim + 1).squeeze(2)) q_ziCx_shared = torch.exp(q[self.latents['shared']].dist.log_prob(z_shared).transpose(1, batch_dim + 1).squeeze(2)) q_ziCx = torch.cat((q_ziCx_private, q_ziCx_shared), dim=2) latent_dim = q_ziCx.shape[-1] mi_zi_y = torch.tensor([.0] * latent_dim) for k in range(n_fac_classes): q_ziCxk = q_ziCx[k * nsamps_per_factor:(k + 1) * nsamps_per_factor, k * nsamps_per_factor:(k + 1) * nsamps_per_factor, :] marg_q_ziCxk = q_ziCxk.sum(1) mi_zi_y += (marg_q_ziCxk * (np.log(batch_size / nsamps_per_factor) + torch.log(marg_q_ziCxk) - torch.log( q_ziCx[k * nsamps_per_factor:(k + 1) * nsamps_per_factor, :, :].sum(1)))).mean(0) mi_zi_y = mi_zi_y / batch_size print(mi_zi_y) my_xticks = [] for i in range(latent_dim): my_xticks.append('z' + str(i+1)) ax = fig.add_subplot(n_factors, 1, fac_id + 1) ax.bar(range(latent_dim), mi_zi_y.detach().cpu().numpy()) ax.set_title(factors[fac_id]) plt.xticks(range(latent_dim), my_xticks) plt.show()
def _distribution(self, obs): mu = self.mu_net(obs) std = torch.exp(self.log_std) return Normal(mu, std)
def train_smovement(train_loader, glow, nn_theta, loss_fn, optimizer, scheduler, epoch): print( "ID: exp12_1 testing lr 1e-4 and only one step movement, no glow loss with random patch" ) global global_step loss_meter = AverageMeter() # loss_fn_glow = GlowLoss() for net in glow: net.train() for net in nn_theta: net.train() with tqdm(total=len(train_loader.dataset)) as progress_bar: for itr, sequence in enumerate(train_loader): sequence = sequence.to(device) b_s = sequence.size(0) # start_index = torch.LongTensor(1).random_(0, 2) # random_patch = sequence[:, start_index:start_index + 2, :, :, :] random_patch = [] for n in range(b_s): start_index = torch.LongTensor(1).random_(0, 2) random_patch.append(sequence[n, start_index:start_index + 2, :, :, :]) random_patch = torch.stack(random_patch, dim=0) t0_zi, _, sldj_0 = flow_forward(random_patch[:, 0, :, :, :], glow) # z_glow = recover_z_shape(t0_zi) # loss_glow = loss_fn_glow(z_glow, sldj_0) t1_zi_out, t1_zi_h, sldj_1 = flow_forward( random_patch[:, 1, :, :, :], glow) h12 = t1_zi_h.l3 mu_l3, logsigma_l3 = nn_theta.l3(t0_zi.l3, h12) g3 = Normal(loc=mu_l3, scale=torch.exp(logsigma_l3)) h1 = t1_zi_h.l2 mu_l2, logsigma_l2 = nn_theta.l2(t0_zi.l2, h1) g2 = Normal(loc=mu_l2, scale=torch.exp(logsigma_l2)) mu_l1, logsigma_l1 = nn_theta.l1(t0_zi.l1) g1 = Normal(loc=mu_l1, scale=torch.exp(logsigma_l1)) total_loss = loss_fn(g1, g2, g3, z=t1_zi_out, sldj=sldj_1, input_dim=random_patch[:, 1, :, :, :].size()) # total_loss = loss #+ loss_glow total_loss.backward() clip_grad_value(optimizer) optimizer.step() optimizer.zero_grad() if scheduler is not None: scheduler.step(global_step) loss_meter.update(total_loss.item(), b_s) progress_bar.set_postfix(nll=loss_meter.avg, bpd=bits_per_dim( random_patch[:, 1, :, :, :], loss_meter.avg), lr=optimizer.param_groups[0]['lr']) progress_bar.update(b_s) global_step += 1 print("global step:", global_step) torch.cuda.empty_cache() #save_model(glow, nn_theta, optimizer, scheduler, epoch, PATH) save_model(glow, nn_theta, optimizer, epoch, PATH) writer.add_scalar('data/train_loss', loss_meter.avg, epoch) writer.add_scalar('data/lr', get_lr(optimizer), epoch) context = next(iter(train_loader)).cuda() flow_inverse_smovement(context, glow, nn_theta, epoch)
def evaluate(val_dataset, model, nll_crit, mse_crit, opt): # set mode model.eval() # predict predictions = [] overall_nll = 0 overall_teacher_forcing_acc, overall_teacher_forcing_cnt = 0, 0 overall_mse = 0 Nav_nll = {'object': 0, 'room': 0} Nav_cnt = {'object': 0, 'room': 0} Nav_teacher_forcing_acc = {'object': 0, 'room': 0} Nav_teacher_forcing_cnt = {'object': 0, 'room': 0} for ix in range(len(val_dataset)): # data = {qid, path_ix, house, id, type, phrase, phrase_emb, ego_feats, next_feats, res_feats, # action_inputs, action_outputs, action_masks, ego_imgs} data = val_dataset[ix] ego_feats = torch.from_numpy(data['ego_feats']).cuda().unsqueeze(0) # (1, L, 3200) phrase_embs = torch.from_numpy(data['phrase_emb']).cuda().unsqueeze(0) # (1, 300) action_inputs = torch.from_numpy(data['action_inputs']).cuda().unsqueeze(0) # (1, L) action_outputs = torch.from_numpy(data['action_outputs']).cuda().unsqueeze(0) # (1, L) action_masks = torch.from_numpy(data['action_masks']).cuda().unsqueeze(0) # (1, L) # forward logprobs, _, pred_feats, _ = model(ego_feats, phrase_embs, action_inputs) # (1, L, #actions), (1, L, 3200) nll_loss = nll_crit(logprobs, action_outputs, action_masks) nll_loss = nll_loss.item() mse_loss = 0 if opt['use_next']: next_feats = torch.from_numpy(data['next_feats']).cuda().unsqueeze(0) # (1, L, 3200) mse_loss = mse_crit(pred_feats, next_feats, action_masks) mse_loss = mse_loss.item() if opt['use_residual']: res_feats = torch.from_numpy(data['res_feats']).cuda().unsqueeze(0) # (1, L, 3200) mse_loss = mse_crit(pred_feats, res_feats, action_masks) mse_loss = mse_loss.item() pred_acts = logprobs[0].argmax(1) # (L, ) # entry entry = {} entry['qid'] = data['qid'] entry['house'] = data['house'] entry['id'] = data['id'] entry['type'] = data['type'] entry['path_ix'] = data['path_ix'] entry['pred_acts'] = pred_acts.tolist() # list of L actions entry['pred_acts_probs'] = torch.exp(logprobs[0]).tolist() # (L, #actions) entry['gd_acts'] = action_outputs[0].tolist() # list of L actions entry['nll_loss'] = nll_loss entry['mse_loss'] = mse_loss # accumulate predictions.append(entry) Nav_nll[data['type']] += nll_loss Nav_cnt[data['type']] += 1 acc, cnt = 0, 0 for pa, ga in zip(entry['pred_acts'], entry['gd_acts']): if pa == ga: acc += 1 cnt += 1 if ga == 3: break Nav_teacher_forcing_acc[data['type']] += acc Nav_teacher_forcing_cnt[data['type']] += cnt overall_nll += nll_loss overall_mse += mse_loss overall_teacher_forcing_acc += acc overall_teacher_forcing_cnt += cnt # print if ix % 10 == 0: print('(%s/%s)qid[%s], id[%s], type[%s], nll_loss=%.3f, mse_loss=%.3f' % \ (ix+1, len(val_dataset), entry['qid'], entry['id'], entry['type'], nll_loss, mse_loss)) # summarize overall_nll /= len(val_dataset) overall_mse /= len(val_dataset) overall_teacher_forcing_acc /= overall_teacher_forcing_cnt for _type in ['object', 'room']: Nav_nll[_type] /= (Nav_cnt[_type]+1e-5) Nav_teacher_forcing_acc[_type] /= (Nav_teacher_forcing_cnt[_type]+1e-5) # return return predictions, overall_nll, overall_teacher_forcing_acc, overall_mse, Nav_nll, Nav_teacher_forcing_acc
def load_explicit_dH(self): # write down explicit 3 rd derivatives # write down explicit 3 rd derivatives out = torch.zeros(self.dim, self.dim, self.dim) tau = numpy.asscalar(torch.exp(self.beta[self.J + 1]).data.numpy()) mu = numpy.asscalar(self.beta[self.J].data.numpy()) theta_tilde = self.beta[:(self.J)].data theta = theta_tilde * tau + mu sigma = self.sigma.data y = self.y.data # case 1 # dH_i i = (0,..,self.J-1] #dtheta_tilde dtheta_tilde dtau' out[:self.J, :self.J, self.J + 1] = torch.diag(-2 * tau * tau / (sigma * sigma)) # dtheta_tilde dmu dtau' out[:self.J, self.J, self.J + 1] = -tau / (sigma * sigma) # dtheta_tilde dtau' dtau' out[:self.J, self.J + 1, self.J + 1] = tau * (y - mu - 4 * theta_tilde * tau) / (sigma * sigma) # fill in rest out[:self.J, self.J + 1, :self.J].copy_(out[:self.J, :self.J, self.J + 1]) out[:self.J, self.J + 1, self.J].copy_(out[:self.J, self.J, self.J + 1]) # case 2 # dH_i i = self.J , beta_i = mu # dmu dtheta_tilde dtau' out[self.J, :self.J, self.J + 1] = -tau / (sigma * sigma) # dmu dtau' dtau' out[self.J, self.J + 1, self.J + 1] = -(theta_tilde / (sigma * sigma)).sum() * tau # fill in out[self.J, self.J + 1, :self.J].copy_(out[self.J, :self.J, self.J + 1]) # case 3 # dH_i i = self.J+1 , beta_i = tau' case_3matrix = torch.zeros(self.dim, self.dim) # dtau' dtheta_tilde dtheta_tilde case_3matrix[:self.J, :self.J].copy_( torch.diag(-2 * tau * tau / (sigma * sigma))) # dtau' dtheta_tilde dmu case_3matrix[:self.J, self.J] = (-tau) / (sigma * sigma) # dtau' dtheta_tilde dtau' case_3matrix[:self.J, self.J + 1] = tau * (y - mu - 4 * tau * theta_tilde) / (sigma * sigma) # dtau' dmu dtau' case_3matrix[self.J, self.J + 1] = -(theta_tilde / (sigma * sigma)).sum() * tau # dtau'dtau'dtau' case_3matrix[self.J + 1, self.J + 1] = 200*tau*tau*(tau*tau-25)/((tau*tau+25)**3) \ + (theta_tilde*tau*(y-mu-4*tau*theta_tilde)/(sigma*sigma)).sum() # fill in case_3matrix[self.J, :self.J].copy_(case_3matrix[:self.J, self.J]) case_3matrix[self.J + 1, :self.J].copy_(case_3matrix[:self.J, self.J + 1]) case_3matrix[self.J + 1, self.J] = case_3matrix[self.J, self.J + 1] out[self.J + 1, :, :].copy_(case_3matrix) out = -out return (out)
def forward(self, x): val = torch.exp(-torch.pow(x - self.mu, 2) / (2 * self.sigma**2)) return val
def train(self, data_dir, epochs, learning_rate): image_datasets, dataloaders, class_to_idx = self.load_data(data_dir) criterion = nn.NLLLoss() optimizer = optim.Adam(self.model.classifier.parameters(), lr=learning_rate) # gpu or cpu self.model.to(self.device) # start training train_losses = [] test_losses = [] for e in range(epochs): running_train_loss = 0 self.model.train() for images, labels in dataloaders['train']: images, labels = images.to(self.device), labels.to(self.device) optimizer.zero_grad() # get log probs log_ps = self.model.forward(images) # get loss loss = criterion(log_ps, labels) running_train_loss += loss.item() # print(f'running_train_loss: {running_train_loss}') # back propagation loss.backward() # adjust weights optimizer.step() else: self.model.eval() running_test_loss = 0 accuracy = 0 with torch.no_grad(): for images, labels in dataloaders['test']: images, labels = images.to(self.device), labels.to(self.device) # get log probs log_ps = self.model.forward(images) # get loss test_loss = criterion(log_ps, labels) running_test_loss += test_loss.item() # print(f'running_test_loss: {running_test_loss}') # turn log probs into real probs ps = torch.exp(log_ps) # calc accuracy top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)).item() n_test_batches = len(dataloaders['test']) n_train_batches = len(dataloaders['train']) epoch_train_loss = running_train_loss / n_train_batches epoch_test_loss = running_test_loss / n_test_batches train_losses.append(epoch_train_loss) test_losses.append(epoch_test_loss) print(f'Epoch: {e+1}/{epochs}', f'Training Loss {epoch_train_loss:{0}.{4}}', f'Validation Loss {epoch_test_loss:{0}.{4}}', f'Accuracy {(accuracy / n_test_batches):{0}.{4}}' ) #return e+1, train_losses, test_losses self.final_epoch = e+1 self.train_losses = train_losses self.test_losses = test_losses self.class_to_idx = class_to_idx
def log_sum_exp(vec): max_score = vec[(0, argmax(vec))] max_score_broadcast = max_score.view(1, (-1)).expand(1, vec.size()[1]) return (max_score + torch.log(torch.sum(torch.exp((vec - max_score_broadcast)))))
def sample(self, deterministic=False): if deterministic: return self.mean else: return Variable(torch.randn(self.mean.size())) * torch.exp(self.log_var) + self.mean
def sampling(self, mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return eps.mul(std).add_(mu)
def ppo_step(self, weights): cloned_policy = copy.deepcopy(self.policy) for i, weight in enumerate(cloned_policy.parameters()): try: weight.data.copy_(weights[i]) except: weight.data.copy_(weights[i].data) optimizer = optim.Adam(cloned_policy.parameters(), lr=self.ppo_learning_rate) for _ in range(self.n_seq): # s_t, a_t, b(s_t) = v(s_t), \pi_{\theta_{\text{old}}}(a_t|s_t), R_t(\tau) states, actions, rewards, values, logprobs, returns = self.env_function( cloned_policy, max_steps=self.max_steps, gamma=self.gamma) #, stochastic=False) # \hat{A_t}(\tau) = R_t(\tau) - b(s_t) advantages = returns - values advantages = (advantages - advantages.mean()) / advantages.std() for update in range(self.n_updates): sampler = BatchSampler(SubsetRandomSampler( list(range(advantages.shape[0]))), batch_size=self.batch_size, drop_last=False) for i, index in enumerate(sampler): sampled_states = utils.to_var(states[index]) sampled_actions = utils.to_var(actions[index]) sampled_logprobs = utils.to_var(logprobs[index]) sampled_returns = utils.to_var(returns[index]) sampled_advs = utils.to_var(advantages[index]) # v(s_t), \pi_\theta(a_t|s_t), H(\pi(a_t, |a_t)) new_values, new_logprobs, dist_entropy = cloned_policy.evaluate( sampled_states, sampled_actions) ratio = torch.exp(new_logprobs - sampled_logprobs) # print(ratio.sum()) sampled_advs = sampled_advs.view(-1, 1) surrogate1 = ratio * sampled_advs surrogate2 = torch.clamp(ratio, 1 - self.clip, 1 + self.clip) * sampled_advs policy_loss = -torch.min(surrogate1, surrogate2).mean() # # \dfrac{\pi_\theta(a_t|s_t)}{\pi_{\theta_{\text{old}}}(a_t|s_t)} # ratio1 = torch.exp(new_logprobs - sampled_logprobs) # # [\dfrac{\pi_\theta(a_t|s_t)}{\pi_{\theta_{\text{old}}}(a_t|s_t)}]_{\text{clip}} # ratio2 = ratio1.clamp(1-self.clip, 1+self.clip) # # \min\{.,[.]_{\text{clip}}\} # ratio = torch.min(ratio1, ratio2) # # \min\{. \,[.]_{\text{clip}}\} # policy_loss = -sampled_advs.detach() * ratio sampled_returns = sampled_returns.view(-1, 1) new_values = new_values.view(-1, 1) # \frac{1}{2}(v(s_t) - R_t(\tau))^2 value_loss = F.mse_loss(new_values, sampled_returns) loss = policy_loss.mean() + value_loss.mean( ) - self.ent_coeff * dist_entropy.mean() optimizer.zero_grad() loss.backward() optimizer.step() rewards = self.env_function(cloned_policy, stochastic=False, render=False, reward_only=True) new_weights = list(cloned_policy.parameters()) return rewards, new_weights
def test_ntxent_loss(self): temperature = 0.1 loss_funcA = NTXentLoss(temperature=temperature) loss_funcB = NTXentLoss(temperature=temperature, distance=LpDistance()) for dtype in TEST_DTYPES: embedding_angles = [0, 20, 40, 60, 80] embeddings = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles], requires_grad=True, dtype=dtype, ).to(self.device) # 2D embeddings labels = torch.LongTensor([0, 0, 1, 1, 2]) lossA = loss_funcA(embeddings, labels) lossB = loss_funcB(embeddings, labels) pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)] neg_pairs = [ (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 4), (3, 0), (3, 1), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), ] total_lossA, total_lossB = 0, 0 for a1, p in pos_pairs: anchor, positive = embeddings[a1], embeddings[p] numeratorA = torch.exp( torch.matmul(anchor, positive) / temperature) numeratorB = torch.exp( -torch.sqrt(torch.sum( (anchor - positive)**2)) / temperature) denominatorA = numeratorA.clone() denominatorB = numeratorB.clone() for a2, n in neg_pairs: if a2 == a1: negative = embeddings[n] else: continue denominatorA += torch.exp( torch.matmul(anchor, negative) / temperature) denominatorB += torch.exp( -torch.sqrt(torch.sum( (anchor - negative)**2)) / temperature) curr_lossA = -torch.log(numeratorA / denominatorA) curr_lossB = -torch.log(numeratorB / denominatorB) total_lossA += curr_lossA total_lossB += curr_lossB total_lossA /= len(pos_pairs) total_lossB /= len(pos_pairs) rtol = 1e-2 if dtype == torch.float16 else 1e-5 self.assertTrue(torch.isclose(lossA, total_lossA, rtol=rtol)) self.assertTrue(torch.isclose(lossB, total_lossB, rtol=rtol))
def erf_approx(self, x): exp = -x * x * (4 / math.pi + self.a_for_erf * x * x) / (1 + self.a_for_erf * x * x) return torch.sign(x) * torch.sqrt(1 - torch.exp(exp))
def log_likelihood_ratio(self, x, new_dist): ll_new = new_dist.log_likelihood(x) ll_old = self.log_likelihood(x) return torch.exp(ll_new - ll_old)
def forward(self, input): return torch.exp(input)
def reparameterize(mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return mu + eps * std
def train_model(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device_ids=[0,1,2,3] batch_size=args.batch_size input_channels = 1 out_channels = [args.out_channels1, args.out_channels2] kernel_size_cnn = [[args.kernel_size_cnn1, args.kernel_size_cnn2],[args.kernel_size_cnn2, args.kernel_size_cnn1]] stride_size_cnn = [[args.stride_size_cnn1, args.stride_size_cnn2],[args.stride_size_cnn2, args.stride_size_cnn1]] kernel_size_pool = [[args.kernel_size_pool1, args.kernel_size_pool2],[args.kernel_size_pool2, args.kernel_size_pool1]] stride_size_pool = [[args.stride_size_pool1, args.stride_size_pool2],[args.stride_size_pool2, args.stride_size_pool1]] hidden_dim=200 num_layers=2 dropout=0 num_labels=4 hidden_dim_lstm=200 epoch_num=50 num_layers_lstm=2 nfft=[512,1024] weight = args.weight model = MultiSpectrogramModel(input_channels,out_channels, kernel_size_cnn, stride_size_cnn, kernel_size_pool, stride_size_pool, hidden_dim,num_layers,dropout,num_labels, batch_size, hidden_dim_lstm,num_layers_lstm,device, nfft, weight, False) print("============================ Number of parameters ====================================") print(str(sum(p.numel() for p in model.parameters() if p.requires_grad))) path="batch_size:{};out_channels:{};kernel_size_cnn:{};stride_size_cnn:{};kernel_size_pool:{};stride_size_pool:{}; weight:{}".format(args.batch_size,out_channels,kernel_size_cnn,stride_size_cnn,kernel_size_pool,stride_size_pool, weight) with open("/scratch/speech/models/classification/spec_multi_joint_stats_weight.txt","a+") as f: f.write("\n"+"============ model starts ===========") f.write("\n"+"model_parameters: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad))+"\n"+path+"\n") model.cuda() model=DataParallel(model,device_ids=device_ids) model.train() # Use Adam as the optimizer with learning rate 0.01 to make it fast for testing purposes optimizer = optim.Adam(model.parameters(),lr=0.001) optimizer2=optim.SGD(model.parameters(), lr=0.1) scheduler = ReduceLROnPlateau(optimizer=optimizer,factor=0.5, patience=2, threshold=1e-3) #scheduler2=ReduceLROnPlateau(optimizer=optimizer2, factor=0.5, patience=2, threshold=1e-3) #scheduler2 =CosineAnnealingLR(optimizer2, T_max=300, eta_min=0.0001) scheduler3 =MultiStepLR(optimizer, [5,10,15],gamma=0.1) # Load the training data training_data = IEMOCAP(name='mel', nfft=nfft, train=True) train_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True, collate_fn=my_collate, num_workers=0, drop_last=True) testing_data = IEMOCAP(name='mel', nfft=nfft, train=False) test_loader = DataLoader(dataset=testing_data, batch_size=batch_size, shuffle=True, collate_fn=my_collate, num_workers=0,drop_last=True) #print("=================") #print(len(training_data)) #print("===================") test_acc=[] train_acc=[] test_loss=[] train_loss=[] for epoch in range(epoch_num): # again, normally you would NOT do 300 epochs, it is toy data #print("===================================" + str(epoch+1) + "==============================================") losses = 0 correct=0 model.train() for j, (input_lstm, input1, input2, target, seq_length) in enumerate(train_loader): #if (j+1)%20==0: #print("=================================Train Batch"+ str(j+1)+str(weight)+"===================================================") model.zero_grad() losses_batch,correct_batch= model(input_lstm, input1, input2, target, seq_length) loss = torch.mean(losses_batch,dim=0) correct_batch=torch.sum(correct_batch,dim=0) losses += loss.item() * batch_size loss.backward() weight=model.module.state_dict()["weight"] weight=torch.exp(10*weight)/(1+torch.exp(10*weight)).item() optimizer.step() correct += correct_batch.item() accuracy=correct*1.0/((j+1)*batch_size) losses=losses / ((j+1)*batch_size) #scheduler3.step() losses_test = 0 correct_test = 0 #torch.save(model.module.state_dict(), "/scratch/speech/models/classification/spec_full_joint_checkpoint_epoch_{}.pt".format(epoch+1)) model.eval() with torch.no_grad(): for j,(input_lstm, input1, input2, target, seq_length) in enumerate(test_loader): #if (j+1)%10==0: print("=================================Test Batch"+ str(j+1)+ "===================================================") #input_lstm = pad_sequence(sequences=input_lstm,batch_first=True) losses_batch,correct_batch= model(input_lstm,input1, input2, target, seq_length) loss = torch.mean(losses_batch,dim=0) correct_batch=torch.sum(correct_batch,dim=0) losses_test += loss.item() * batch_size correct_test += correct_batch.item() #print("how many correct:", correct_test) accuracy_test = correct_test * 1.0 / ((j+1)*batch_size) losses_test = losses_test / ((j+1)*batch_size) # data gathering test_acc.append(accuracy_test) train_acc.append(accuracy) test_loss.append(losses_test) train_loss.append(losses) print("Epoch: {}-----------Training Loss: {} -------- Testing Loss: {} -------- Training Acc: {} -------- Testing Acc: {}".format(epoch+1,losses,losses_test, accuracy, accuracy_test)+"\n") with open("/scratch/speech/models/classification/spec_multi_joint_stats_weight.txt","a+") as f: #f.write("Epoch: {}-----------Training Loss: {} -------- Testing Loss: {} -------- Training Acc: {} -------- Testing Acc: {}".format(epoch+1,losses,losses_test, accuracy, accuracy_test)+"\n") if epoch==epoch_num-1: f.write("Best Accuracy:{:06.5f}".format(max(test_acc))+"\n") f.write("Average Top 10 Accuracy:{:06.5f}".format(np.mean(np.sort(np.array(test_acc))[-10:]))+"\n") f.write("=============== model ends ==================="+"\n") print("success:{}, Best Accuracy:{}".format(path,max(test_acc)))
def sigmoid(z): return 1 / (1 + torch.exp(-z))
def k2(kesi,f_x,f_y,mean_logk,lamda): logk=mean_logk+torch.sum(torch.sqrt(lamda)*f_x*f_y*kesi,1) kk=torch.exp(logk) return kk
def reparameterize(self, mu, logvar): #done std = torch.exp(0.5*logvar) u = torch.randn_like(std) return mu + u*std
def evaluate_meta_parameters(model, test_loader, args, train=False, name=None, n_recons = 8, n_steps = 100): print(' - Evaluate meta parameters.') latent_dims = model.ae_model.latent_dims fig = plt.figure(figsize=(10, 20)) outer = gridspec.GridSpec(latent_dims + 1, 3, wspace=0.2, hspace=0.4) # Select 5 random points from the test set fixed_data, fixed_params, fixed_meta, fixed_audio = next(iter(test_loader)) in_data = fixed_data[np.random.randint(0, fixed_data.shape[0], size=(32))].to(args.device) # Find corresponding params _, in_data, _ = model.ae_model(in_data) if (args.semantic_dim > -1): in_data, _ = model.disentangling(in_data) z_var = 0 for l in range(latent_dims): var_z = torch.linspace(-4, 4, n_steps) fake_batch = torch.zeros(n_steps, latent_dims) fake_batch[:, l] = var_z fake_batch = fake_batch.to(args.device) # Generate VAE outputs x_tilde_full = model.ae_model.decode(fake_batch) # Perform regression out = model.regression_model(fake_batch) if (args.loss in ['multinomial']): tmp = out.view(out.shape[0], -1, latent_dims).max(dim=1)[1] out = tmp.float() / (args.n_classes - 1.) if (args.loss in ['multi_mse']): out = out.view(out.shape[0], -1, latent_dims) out = out[:, -1, :] # Select parameters var_param = out.std(dim=0) idx = torch.argsort(var_param, descending=True) # To keep coloring consistent we blank out all parameters above 5 most varying out[:, idx[5:]] = torch.zeros(out.shape[0], len(idx[5:])).to(out.device) ax = plt.Subplot(fig, outer[l*3]) ax.plot(out.detach().cpu().numpy()) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if (hasattr(args, 'z_vars')): z_var = args.z_vars[l].item() ax.set_title('$z_{' + str(l) + '}$ - %.2f - %.3f'%((z_var), (var_param[idx[:5]].mean().item()))) fig.add_subplot(ax) # Reconstruct a handful of points fake_batch = torch.zeros(n_recons, latent_dims) fake_batch[:, l] = torch.linspace(-4, 4, n_recons) fake_batch = fake_batch.to(args.device) # Reconstruct with the VAE x_tilde = model.ae_model.decode(fake_batch) # Reconstruct with the synth engine if (args.synthesize == True and train == False and ((var_param[idx[:5]].mean().item() > 0.15) or ((args.semantic_dim > -1) and (l == 0)))): out_batch = model.regression_model(fake_batch) if (args.loss in ['multinomial']): tmp = out_batch.view(out_batch.shape[0], -1, args.latent_dims).max(dim=1)[1] out_batch = tmp.float() / (args.n_classes - 1.) if (args.loss in ['multi_mse']): out_batch = out_batch.view(out_batch.shape[0], -1, args.latent_dims) out_batch = out_batch[:, -1, :] print(' - Generate audio for latent ' + str(l)) from synth.synthesize import synthesize_batch audio = synthesize_batch(out_batch.cpu(), test_loader.dataset.final_params, args.engine, args.generator, args.param_defaults, args.rev_idx, orig_wave=None, name=None) save_batch_audio(audio, args.base_audio + '_meta_parameters_z' + str(l) + '_v' + str(var_param[idx[:5]].mean().item())) # Now check how this parameter act on various sounds n_ins = ((args.semantic_dim > -1) and (l == 0)) and 32 or 4 for s in range(n_ins): print(' - Generate audio for meta-modified ' + str(s)) tmp_data = in_data[s].clone().unsqueeze(0).repeat(n_recons, 1) tmp_data[:, l] = torch.linspace(-4, 4, n_recons) tmp_data = model.regression_model(tmp_data) if (args.loss in ['multinomial']): tmp = tmp_data.view(tmp_data.shape[0], -1, args.latent_dims).max(dim=1)[1] tmp_data = tmp.float() / (args.n_classes - 1.) if (args.loss in ['multi_mse']): tmp_data = tmp_data.view(tmp_data.shape[0], -1, args.latent_dims) tmp_data = tmp_data[:, -1, :] # Synthesize meta-modified test example :) audio = synthesize_batch(tmp_data.cpu(), test_loader.dataset.final_params, args.engine, args.generator, args.param_defaults, args.rev_idx, orig_wave=None, name=None) save_batch_audio(audio, args.base_audio + '_meta_parameters_z' + str(l) + '_b' + str(s)) if len(x_tilde.shape) > 3: x_tilde = x_tilde[:,0] inner = gridspec.GridSpecFromSubplotSpec(1, 8, subplot_spec=outer[l*3+1], wspace=0.1, hspace=0.1) for n in range(n_recons): ax = plt.Subplot(fig, inner[n]) ax.imshow(x_tilde[n].detach().cpu().numpy(), aspect='auto') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) fig.add_subplot(ax) # Unscale and un-log output x_tilde_full = (x_tilde_full * test_loader.dataset.vars["mel"]) + test_loader.dataset.means["mel"] if (args.data in ['mel',"mel_mfcc"]): x_tilde_full = torch.exp(x_tilde_full) x_tilde_full = x_tilde_full[:,0] # Compute descriptors descs = compute_descriptors(x_tilde_full.detach().cpu().numpy()) ax = plt.Subplot(fig, outer[l*3+2]) ax.plot(descs) fig.add_subplot(ax) # Just fake plots for legends fake = torch.linspace(1, len(idx), len(idx)).repeat(out.shape[0], 1) ax = plt.Subplot(fig, outer[latent_dims*3]) ax.plot(fake.numpy()) ax.legend(test_loader.dataset.final_params) fig.add_subplot(ax) fake = torch.linspace(1, len(descriptors), len(descriptors)).repeat(out.shape[0], 1) ax = plt.Subplot(fig, outer[latent_dims*3+2]) ax.plot(fake.numpy()) ax.legend(descriptors) fig.add_subplot(ax) # Just generate a legend for kicks if (name is not None): plt.savefig(name + '_meta_parameters.pdf') plt.close() if (train == False and name is None): plt.savefig(args.base_img + '_meta_parameters.pdf') plt.close()
running_loss += loss.item() if steps % print_every == 0: test_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for inputs, labels in testloader: inputs, labels = inputs.to(device), labels.to(device) logps = model.forward(inputs) batch_loss = criterion(logps, labels) test_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() print(f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Test loss: {test_loss/len(testloader):.3f}.. " f"Test accuracy: {accuracy/len(testloader)}") running_loss = 0 model.train() PATH = Path("./model_q1.pth") torch.save(model, PATH)
def run_SAM(in_data, skeleton=None, is_mixed=False, device="cpu", train=10000, test=1, batch_size=-1, lr_gen=.001, lr_disc=.01, lambda1=0.001, lambda2=0.0000001, nh=None, dnh=None, verbose=True, losstype="fgan", functionalComplexity="n_hidden_units", sampletype="sigmoidproba", dagstart=0, dagloss=False, dagpenalization=0.05, dagpenalization_increase=0.0, categorical_threshold=50, linear=False, numberHiddenLayersG=2, numberHiddenLayersD=2, idx=0): list_nodes = list(in_data.columns) if is_mixed: onehotdata = [] for i in range(len(list_nodes)): # print(pd.get_dummies(in_data.iloc[:, i]).values.shape[1]) if pd.get_dummies( in_data.iloc[:, i]).values.shape[1] < categorical_threshold: onehotdata.append(pd.get_dummies(in_data.iloc[:, i]).values) else: onehotdata.append(scale(in_data.iloc[:, [i]].values)) cat_sizes = [i.shape[1] for i in onehotdata] data = np.concatenate(onehotdata, 1) else: data = scale(in_data[list_nodes].values) cat_sizes = None nb_var = len(list_nodes) data = data.astype('float32') data = th.from_numpy(data).to(device) if batch_size == -1: batch_size = data.shape[0] lambda1 = lambda1 / data.shape[0] lambda2 = lambda2 / data.shape[0] rows, cols = data.size() # Get the list of indexes to ignore if skeleton is not None: skeleton = th.from_numpy(skeleton.astype('float32')) sam = SAM_generators((batch_size, cols), nh, skeleton=skeleton, cat_sizes=cat_sizes, linear=linear, numberHiddenLayersG=numberHiddenLayersG).to(device) sam.reset_parameters() g_optimizer = th.optim.Adam(list(sam.parameters()), lr=lr_gen) if losstype != "mse": discriminator = SAM_discriminator( cols, dnh, numberHiddenLayersD, mask=sam.categorical_matrix, ).to(device) discriminator.reset_parameters() d_optimizer = th.optim.Adam(discriminator.parameters(), lr=lr_disc) criterion = th.nn.BCEWithLogitsLoss() else: criterion = th.nn.MSELoss() disc_loss = th.zeros(1) if sampletype == "sigmoid": graph_sampler = SimpleMatrixConnection(len(list_nodes), mask=skeleton).to(device) elif sampletype == "sigmoidproba": graph_sampler = MatrixSampler(len(list_nodes), mask=skeleton, gumble=False).to(device) elif sampletype == "gumbleproba": graph_sampler = MatrixSampler(len(list_nodes), mask=skeleton, gumble=True).to(device) else: raise ValueError('Unknown Graph sampler') graph_sampler.weights.data.fill_(2) graph_optimizer = th.optim.Adam(graph_sampler.parameters(), lr=lr_gen) if not linear and functionalComplexity == "n_hidden_units": neuron_sampler = MatrixSampler((nh, len(list_nodes)), mask=False, gumble=True).to(device) neuron_optimizer = th.optim.Adam(list(neuron_sampler.parameters()), lr=lr_gen) _true = th.ones(1).to(device) _false = th.zeros(1).to(device) output = th.zeros(len(list_nodes), len(list_nodes)).to(device) data_iterator = DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True) # RUN if verbose: pbar = tqdm(range(train + test)) else: pbar = range(train + test) for epoch in pbar: for i_batch, batch in enumerate(data_iterator): if losstype != "mse": d_optimizer.zero_grad() # Train the discriminator drawn_graph = graph_sampler() if not linear and functionalComplexity == "n_hidden_units": drawn_neurons = neuron_sampler() if linear or functionalComplexity != "n_hidden_units": generated_variables = sam(batch, drawn_graph) else: generated_variables = sam(batch, drawn_graph, drawn_neurons) if losstype != "mse": disc_vars_d = discriminator(generated_variables.detach(), batch) true_vars_disc = discriminator(batch) if losstype == "gan": disc_loss = sum([criterion(gen, _false.expand_as(gen)) for gen in disc_vars_d]) / nb_var \ + criterion(true_vars_disc, _true.expand_as(true_vars_disc)) # Gen Losses per generator: multiply py the number of channels elif losstype == "fgan": disc_loss = th.mean(th.exp(disc_vars_d - 1), [0, 2]).sum( ) / nb_var - th.mean(true_vars_disc) disc_loss.backward() d_optimizer.step() ### OPTIMIZING THE GENERATORS g_optimizer.zero_grad() graph_optimizer.zero_grad() if not linear and functionalComplexity == "n_hidden_units": neuron_optimizer.zero_grad() if losstype == "mse": gen_loss = criterion(generated_variables, batch) else: disc_vars_g = discriminator(generated_variables, batch) if losstype == "gan": # Gen Losses per generator: multiply py the number of channels gen_loss = sum([ criterion(gen, _true.expand_as(gen)) for gen in disc_vars_g ]) elif losstype == "fgan": gen_loss = -th.mean(th.exp(disc_vars_g - 1), [0, 2]).sum() filters = graph_sampler.get_proba() struc_loss = lambda1 * drawn_graph.sum() if linear: func_loss = 0 else: if functionalComplexity == "n_hidden_units": func_loss = lambda2 * drawn_neurons.sum() elif functionalComplexity == "l2_norm": l2_reg = th.Tensor([0.]).to(device) for param in sam.parameters(): l2_reg += th.norm(param) func_loss = lambda2 * l2_reg regul_loss = struc_loss + func_loss # Optional: prune edges and sam parameters before dag search if dagloss and epoch > train * dagstart: dag_constraint = notears_constr(filters * filters) #dag_constraint = notears_constr(drawn_graph) loss = gen_loss + regul_loss + ( dagpenalization + (epoch - train * dagstart) * dagpenalization_increase) * dag_constraint else: loss = gen_loss + regul_loss if verbose and epoch % 20 == 0 and i_batch == 0: pbar.set_postfix(gen=gen_loss.item() / cols, disc=disc_loss.item(), regul_loss=regul_loss.item(), tot=loss.item()) if epoch < train + test - 1: loss.backward() if epoch >= train: output.add_(filters.data) g_optimizer.step() graph_optimizer.step() if not linear and functionalComplexity == "n_hidden_units": neuron_optimizer.step() return output.div_(test).cpu().numpy()
def image_histogram2d( image: torch.Tensor, min: float = 0.0, max: float = 255.0, n_bins: int = 256, bandwidth: Optional[float] = None, centers: Optional[torch.Tensor] = None, return_pdf: bool = False, kernel: str = "triangular", eps: float = 1e-10, ) -> Tuple[torch.Tensor, torch.Tensor]: """Estimate the histogram of the input image(s). The calculation uses triangular kernel density estimation. Args: image: Input tensor to compute the histogram with shape :math:`(H, W)`, :math:`(C, H, W)` or :math:`(B, C, H, W)`. min: Lower end of the interval (inclusive). max: Upper end of the interval (inclusive). Ignored when :attr:`centers` is specified. n_bins: The number of histogram bins. Ignored when :attr:`centers` is specified. bandwidth: Smoothing factor. If not specified or equal to -1, :math:`(bandwidth = (max - min) / n_bins)`. centers: Centers of the bins with shape :math:`(n_bins,)`. If not specified or empty, it is calculated as centers of equal width bins of [min, max] range. return_pdf: If True, also return probability densities for each bin. kernel: kernel to perform kernel density estimation ``(`triangular`, `gaussian`, `uniform`, `epanechnikov`)``. Returns: Computed histogram of shape :math:`(bins)`, :math:`(C, bins)`, :math:`(B, C, bins)`. Computed probability densities of shape :math:`(bins)`, :math:`(C, bins)`, :math:`(B, C, bins)`, if return_pdf is ``True``. Tensor of zeros with shape of the histogram otherwise. """ if image is not None and not isinstance(image, torch.Tensor): raise TypeError( f"Input image type is not a torch.Tensor. Got {type(image)}.") if centers is not None and not isinstance(centers, torch.Tensor): raise TypeError( f"Bins' centers type is not a torch.Tensor. Got {type(centers)}.") if centers is not None and len(centers.shape) > 0 and centers.dim() != 1: raise ValueError( f"Bins' centers must be a torch.Tensor of the shape (n_bins,). Got {centers.shape}." ) if not isinstance(min, float): raise TypeError( f'Type of lower end of the range is not a float. Got {type(min)}.') if not isinstance(max, float): raise TypeError( f"Type of upper end of the range is not a float. Got {type(min)}.") if not isinstance(n_bins, int): raise TypeError( f"Type of number of bins is not an int. Got {type(n_bins)}.") if bandwidth is not None and not isinstance(bandwidth, float): raise TypeError( f"Bandwidth type is not a float. Got {type(bandwidth)}.") if not isinstance(return_pdf, bool): raise TypeError( f"Return_pdf type is not a bool. Got {type(return_pdf)}.") if bandwidth is None: bandwidth = (max - min) / n_bins if centers is None: centers = min + bandwidth * (torch.arange( n_bins, device=image.device, dtype=image.dtype).float() + 0.5) centers = centers.reshape(-1, 1, 1, 1, 1) u = torch.abs(image.unsqueeze(0) - centers) / bandwidth if kernel == "triangular": mask = (u <= 1).to(u.dtype) kernel_values = (1 - u) * mask elif kernel == "gaussian": kernel_values = torch.exp(-0.5 * u**2) elif kernel == "uniform": mask = (u <= 1).to(u.dtype) kernel_values = torch.ones_like(u, dtype=u.dtype, device=u.device) * mask elif kernel == "epanechnikov": mask = (u <= 1).to(u.dtype) kernel_values = (1 - u**2) * mask else: raise ValueError(f"Kernel must be 'triangular', 'gaussian', " f"'uniform' or 'epanechnikov'. Got {kernel}.") hist = torch.sum(kernel_values, dim=(-2, -1)).permute(1, 2, 0) if return_pdf: normalization = torch.sum(hist, dim=-1, keepdim=True) + eps pdf = hist / normalization if image.dim() == 2: hist = hist.squeeze() pdf = pdf.squeeze() elif image.dim() == 3: hist = hist.squeeze(0) pdf = pdf.squeeze(0) return hist, pdf if image.dim() == 2: hist = hist.squeeze() elif image.dim() == 3: hist = hist.squeeze(0) return hist, torch.zeros_like(hist, dtype=hist.dtype, device=hist.device)