def generate_samples(self, init_stroke, max_len): prev_state = None prev_strokes = [] init_stroke = init_stroke.unsqueeze(0).unsqueeze(0) for i in range(max_len): e, pi, mu1, mu2, sig1, sig2, ro, prev_state = self.forward( init_stroke, prev_state) #squezee: 1 x seq_len x dim - > seq_len x dim e = e.squeeze(0) samples = self.multibivariate_sampling(pi.squeeze(0), mu1.squeeze(0), mu2.squeeze(0), sig1.squeeze(0), sig2.squeeze(0), ro.squeeze(0)) e = Bernoulli(e) e = e.sample() #e = e.unsqueeze(-1) #print(samples) init_stroke = torch.cat((e, samples.cuda()), 1) prev_strokes.append(init_stroke) init_stroke = init_stroke.unsqueeze(0) #print(pre_strokes.shape) return torch.stack(prev_strokes, 1)
def __init__(self, n_features, mid_dim, embed_agents, policy_type='epsilon_greedy', epsilon_greedy=0.1, eval_epsilon_greedy=0.0): super(Speaker, self).__init__() self._embed_agents = embed_agents self._epsilon_greedy = epsilon_greedy self._eval_epsilon_greedy = eval_epsilon_greedy self._n_features = n_features self._mid_dim = mid_dim self._n_layers = 2 self._policy_type = policy_type # Used to generate agent embedding. self._lstm = nn.LSTM(n_features, mid_dim, batch_first=True, num_layers=self._n_layers) # Q-learning. self._Q = nn.Sequential(nn.Linear(n_features + mid_dim, mid_dim), nn.BatchNorm1d(mid_dim), nn.ELU(), nn.Linear(mid_dim, n_features)) self._h = nn.Parameter( torch.empty(( self._n_layers, 1, mid_dim, ), requires_grad=True)) nn.init.uniform_(self._h, -0.1, 0.1) self._c = nn.Parameter( torch.empty(( self._n_layers, 1, mid_dim, ), requires_grad=True)) nn.init.uniform_(self._c, -0.1, 0.1) # Used to embed state. self._state = nn.Sequential(nn.Linear(n_features, n_features), nn.BatchNorm1d(n_features), nn.ELU(), nn.Linear(n_features, n_features), nn.BatchNorm1d(n_features), nn.ELU()) # Attribute selection policy. self._selection_policy = nn.Sequential( nn.Linear(n_features + mid_dim, mid_dim), nn.BatchNorm1d(mid_dim), nn.ELU(), nn.Linear(mid_dim, n_features), nn.Softmax(dim=1)) self._log_probs = None # Epsilon-greedy attribute selection policy. self._epsilon = Bernoulli(torch.tensor([epsilon_greedy])) self._eval_epsilon = Bernoulli(torch.tensor([eval_epsilon_greedy]))
def forward(self, batch_inputs): # Embed each feature merged_input = [] # Get longest feature # Assume the longest column is the max_words = max(self.feature_lengths) for input, feature_len in zip(batch_inputs, self.feature_lengths): concat_sentence = input concat_sentence = torch.tensor(concat_sentence, dtype=torch.long) embeddings = self.embeddings(concat_sentence) if feature_len == max_words: # Set up success rate (rate of selecting the word) as 1 - dropout rate bernoulli = Bernoulli(1 - self.dropout_rate) rw = bernoulli.sample(torch.Size((embeddings.shape[0], embeddings.shape[1]))).numpy() # Use zeros at where rw is zero embeddings = torch.from_numpy(np.expand_dims(rw, 2)) * embeddings merged_input.append(embeddings) # Final output final_input = torch.cat(merged_input, dim=1) final_input = final_input.view(len(final_input), -1) out = torch.tanh(self.linear1(final_input)) out = torch.tanh(self.linear2(out)) out = torch.tanh(self.linear3(out)) out = torch.tanh(self.linear4(out)) out = F.relu(self.linear5(out)) out = self.output_layer(out) return out
def __init__(self, args, model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), decay_factor=1., attack_ball='Linf', eps=0.3, eps_iter=0.01, n_iter=50, clip_max=1., clip_min=-0.): super(DIM, self).__init__(model, loss_fn=loss_fn, eps=eps, nb_iter=n_iter, decay_factor=decay_factor, eps_iter=eps_iter, clip_min=clip_min, clip_max=clip_max) self.model = model self.eps = eps self.eps_iter = eps_iter self.n_iter = n_iter self.clip_min = clip_min self.clip_max = clip_max self.attack_ball = attack_ball self.momentum = args.momentum self.transform_prob = args.transform_prob self.apply_transform = Bernoulli(torch.tensor([self.transform_prob])) self.resize_factor = args.resize_factor self.args = args
def entropy_loss(arch_params): loss = [] for arch_param in arch_params: probs = Bernoulli(logits=arch_param) loss.append(probs.entropy().mean()) loss = torch.mean(torch.stack(loss)) return loss
def generate_samples(self, init_stroke, char, max_len): # char 1 x char_len prev_state = None prev_offset = None prev_w = None init_stroke = init_stroke.unsqueeze(0).unsqueeze( 0).float().cuda() # 1 x 1 x 3 char_mask = torch.ones_like(char) strokes = [] for i in range(max_len): e, pi, mu1, mu2, sig1, sig2, ro, prev_state, phi, prev_offset, prev_w = self.forward( init_stroke, char, char_mask, prev_state, prev_offset, prev_w) e = e.squeeze(0) sample_mixture = self.multibivariate_sampling( pi.squeeze(0), mu1.squeeze(0), mu2.squeeze(0), sig1.squeeze(0), sig2.squeeze(0), ro.squeeze(0)) #print(e) e = Bernoulli(e) e = e.sample() init_stroke = torch.cat((e, sample_mixture.cuda()), 1) # 1 x 3 strokes.append(init_stroke) init_stroke = init_stroke.unsqueeze(0) if phi.max(1)[1].item() > char.shape[1] - 1: #exit break return torch.stack(strokes, 1)
def __init__(self, params, alpha=0.001, B=100, p=5, sigma=1, delta=0.1, eta=0.01): if alpha < 0.0: raise ValueError("Invalid learning rate: {}".format(alpha)) if B < 0.0: raise ValueError("Invalid B value: {}".format(B)) if p < 0.0: raise ValueError("Invalid p value: {}".format(p)) if sigma < 0.0: raise ValueError("Invalid sigma value: {}".format(sigma)) if delta < 0.0: raise ValueError("Invalid delta value: {}".format(delta)) if eta < 0.0: raise ValueError("Invalid eta value: {}".format(eta)) self.delta = delta self.eta = eta defaults = dict(params=params, alpha=alpha, B=B, p=p, sigma=sigma, delta=delta, eta=eta) super(Natasha2, self).__init__(params, defaults) self.bern = Bernoulli(torch.tensor([0.5]))
def __init__(self, **kwargs): self.gamma = kwargs.get("gamma", 0.5) self.epsilon = kwargs.get("epsilon", 0.1) self.explore = Bernoulli(torch.tensor(self.epsilon)) self.visits = [0, 0, 0] self.rewards = [0., 0., 0.]
def probabalistic_greedy(self, rewards, resample_flag=False): # if we are not re-sampling during this call if not resample_flag: iw = torch.sum(rewards, dim=1) iw = iw - torch.max(iw) iw = torch.exp(iw) iw = iw.reshape(-1) iw = iw / torch.sum(iw) return iw # otherwise else: # set the current iw iw = torch.sum(rewards, dim=1) iw = iw - torch.max(iw) iw = torch.exp(iw) iw = iw.reshape(-1) iw = iw / torch.sum(iw) # set up scaled bernoulli next_iw_dist = Bernoulli(iw**self.alpha) # set iw to zero with prob iw^2 iw = next_iw_dist.sample() * iw # rescale everything iw = iw / torch.sum(iw) # return everything return iw
def __init__(self, in_dim, device, z_dim=64, noise_dim=[150, 100, 50]): super(SIVAE, self).__init__() self.noise = Bernoulli(probs=0.5) self.z_dim = z_dim self.noise_dim = noise_dim self.device = device self.hiddel_l3 = nn.Sequential(nn.Linear(in_dim + noise_dim[0], 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, noise_dim[0]), nn.ReLU()) self.hiddel_l2 = nn.Sequential( nn.Linear(in_dim + noise_dim[0] + noise_dim[1], 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, noise_dim[1]), nn.ReLU()) self.hiddel_l1 = nn.Sequential( nn.Linear(in_dim + noise_dim[1] + noise_dim[2], 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU()) self.mu = nn.Linear(500, z_dim) self.z_logvar = nn.Sequential(nn.Linear(in_dim, 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, z_dim)) self.decoder = nn.Sequential(nn.Linear(z_dim, 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, in_dim))
def optimizer_step(self, sample): sample_observation_initial_context, sample_action_T, sample_next_observation_T, sample_reward_T = sample image_probs, reward_probs = self.model.forward_multiple( sample_observation_initial_context, sample_action_T) # reward loss true_reward = numerical_reward_to_bit_array( sample_reward_T, self.reward_prediction_bits, self.use_cuda) reward_loss = self.reward_criterion(reward_probs, true_reward) # image loss reconstruction_loss = self.frame_criterion(image_probs, sample_next_observation_T) loss = reconstruction_loss + self.reward_loss_coef * reward_loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() # The minimal cross entropy between the distributions p and q is the entropy of p # so if they are equal the loss is equal to the distribution of p true_entropy = Bernoulli(probs=sample_next_observation_T).entropy() normalized_frame_loss = reconstruction_loss - true_entropy.mean() return (normalized_frame_loss, reward_loss), (image_probs, reward_probs)
def forward(self, seq, seq_lens): if self.training: word_dropout = Bernoulli(self.word_dropout).sample(seq.shape) word_dropout = word_dropout.type(torch.LongTensor) seq = seq.cpu() seq = seq * word_dropout seq = seq.cuda() embedded_seq = self.embed(seq) embedded_seq = self.input_dropout(embedded_seq) encoder_input = nn.utils.rnn.pack_padded_sequence(embedded_seq, seq_lens, batch_first=True, enforce_sorted=False) encoder_hidden, (h_0, c_0) = self.encoder(encoder_input) encoder_hidden, _ = nn.utils.rnn.pad_packed_sequence(encoder_hidden, batch_first=True) encoder_hidden = self.output_dropout(encoder_hidden) final_hidden = encoder_hidden[torch.arange(encoder_hidden.size(0)), seq_lens - 1, :] # TODO Highway layers return final_hidden, encoder_hidden
class ucbJanken(): '''UCB algorithm with epsilon-greedy selection kwargs: gamma (float): exploration constant epsilon (float): probability of choosing randomly reset_prob (float): probability of resetting ''' def __init__(self, **kwargs): self.gamma = kwargs.get("gamma", 0.5) self.epsilon = kwargs.get("epsilon", 0.1) self.reset_prob = kwargs.get("reset_prob", 0.2) self.coin = Bernoulli(torch.tensor(self.reset_prob)) self.explore = Bernoulli(torch.tensor(self.epsilon)) self.visits = [0, 0, 0] self.rewards = [0., 0., 0.] def __str__(self): return f"ucb: gamma = {self.gamma:.3f}, epsilon = {self.epsilon:.3f}" def observe(self, move, reward): m = move.item() if isinstance(move, torch.Tensor) else move r = reward.item() if isinstance(reward, torch.Tensor) else reward flip = self.coin.sample() if flip.item() == 1: self.reset() self.rewards[move.item()] += reward.item() def ucb(self, m): if self.visits[m] == 0: return 0 return self.rewards[m]/self.visits[m]\ + self.gamma*sqrt(sum(self.visits))/self.visits[m] def throw(self): if sum(self.visits) == 0: m = randint(0, 2) else: r = self.explore.sample() if r.item() == 1: m = randint(0, 2) else: m = max(MOVES, key=self.ucb) self.visits[m] += 1 return torch.tensor(m) def reset(self): self.visits = [0, 0, 0] self.rewards = [0., 0., 0.] @property def dist(self): if sum(self.visits) == 0: return 1 / 3 * torch.ones(3) best = max(MOVES, key=self.ucb) d = torch.zeros(3) d[best] = 1.0 d = (1 - self.epsilon) * d + (self.epsilon / 3.0) * torch.ones(3) return d
def classify(self, p): be = Bernoulli(torch.tensor([0.5])) if p < 0.5: return 0 elif p > 0.5: return 1 else: return be.sample()
def bald_acq(obj_samples): # the output of objective is of shape num_samples x batch_shape x d_out mean_p = obj_samples.mean(dim=0) posterior_entropies = Bernoulli(mean_p).entropy().squeeze(-1) sample_entropies = Bernoulli(obj_samples).entropy() conditional_entropies = sample_entropies.mean(dim=0).squeeze(-1) return posterior_entropies - conditional_entropies
def test1(): from torch.distributions.bernoulli import Bernoulli # Creates a Bernoulli distribution parameterized by probs dist = Bernoulli(torch.tensor([0.1, 0.5, 0.9])) # Samples are binary (0 or 1). They take the value 1 with probability p dist.sample() # >>> tensor([0., 0., 1.])
def __init__(self, reset_prob=0.5): if isinstance(reset_prob, torch.Tensor): self.coin = Bernoulli(reset_prob) self.reset_prob = reset_prob.item() else: self.reset_prob = reset_prob self.coin = Bernoulli(torch.tensor(reset_prob)) self.move = randint(0, 2)
def fast_jl_mat(self, m, n): bern = Bernoulli(probs=0.5) D = torch.diag(bern.sample([n]) * 2 - 1) H = torch.tensor(hadamard(n)).float() P = self.sampling_mat(m, n) U = P.matmul(H.matmul(D)) / np.sqrt(m) return U
def schedule_sample(prev_logit, prev_tgt, epsilon): prev_out = torch.argmax(prev_logit, dim=1, keepdim=True) prev_choices = torch.cat([prev_out, prev_tgt], dim=1) # [B, 2] batch_size = prev_choices.size(0) prob = Bernoulli(torch.tensor([epsilon]*batch_size).unsqueeze(1)) # sampling sample = prob.sample().long().to(prev_tgt.device) next_inp = torch.gather(prev_choices, 1, sample) return next_inp
def compute_log_pdf_bernoulli(self, fs_samples, target_matrix): """ :param fs_samples: :param target_matrix: :return: """ dist = Bernoulli(torch.sigmoid(fs_samples)) log_pdf = dist.log_prob(target_matrix) return log_pdf
def optimality(self, probabilities): # sample some bernoulli rv under the distribution over probabilities optimality_tensor = torch.zeros( (self.sample_size, self.trajectory_length, 1)) for t in range(self.trajectory_length): for j in range(self.sample_size): optim_dist = Bernoulli(probabilities[t]) optimality_tensor[j, t, 0] = optim_dist.sample() # return return optimality_tensor
def MoG_sample(self): prob = torch.ones(self.input_shape) * .5 bern = Bernoulli(prob) b = bern.sample().cuda() eps = torch.zeros_like(b).normal_().cuda() z1 = self.mean1 + self.logsd * eps z2 = self.mean2 + self.logsd * eps z = b * z1 + (1. - b) * z2 return z
def reward_forward(self, prob, locations, orig_window_length, full_image, other_full_image): """ forward with policy gradient :param prob: probability maps :param locations: locations recording where the patches are extracted :param orig_window_length: original patches length to calculat the replication times :param full_image: ground truth full image :param other_full_image: another ground truth full image :return: """ # Bernoulli samoling batch_size = prob.size(0) bernoulli_dist = Bernoulli(prob) samples = bernoulli_dist.sample() log_probs = bernoulli_dist.log_prob(samples) # put back with torch.no_grad(): repeat_times = int(np.ceil(batch_size / orig_window_length)) target_full_images = other_full_image.repeat(repeat_times, 1, 1, 1) inpaint_full_images = full_image.repeat(repeat_times, 1, 1, 1) # j th full image j = 0 for batch_idx in range(batch_size): sample = samples[batch_idx] y1, x1, y2, x2 = locations[batch_idx] # sample = torch.where(sample >= 0.5, torch.ones_like(sample), torch.zeros_like(sample)) inpaint_full_images[j, :, y1:y2, x1:x2] = sample.detach() if (batch_idx + 1) % orig_window_length == 0: j += 1 # calculate the reward over the re-composed root and ground truth root rewards = self.forward(inpaint_full_images, target_full_images) # broadcast the rewards to each element of the feature maps broadcast_rewards = torch.zeros(batch_size, 1) broadcast_rewards = broadcast_rewards.to(device) # j th full image j = 0 for batch_idx in range(batch_size): broadcast_rewards[batch_idx] = rewards[j] if (batch_idx + 1) % orig_window_length == 0: j += 1 broadcast_rewards = broadcast_rewards.view(broadcast_rewards.size(0), 1, 1, 1) image_size = prob.size(2) broadcast_rewards = broadcast_rewards.repeat(1, 1, image_size, image_size) return log_probs, broadcast_rewards
def get_action(self, state): all_hp_probs, all_anchor_probs = self.forward(state) all_anchor_act, all_hp_act = [], [] for layer_anchor_probs in all_anchor_probs: anchor_sampler = Bernoulli(layer_anchor_probs) layer_anchor_act = anchor_sampler.sample() all_anchor_act.append(layer_anchor_act) for hp_probs in all_hp_probs: sampler = OneHotCategorical(logits=hp_probs) all_hp_act.append(sampler.sample()) return all_hp_act, all_anchor_act
def action(self, x): x = T.from_numpy(x).double().unsqueeze(0) # x = x.double().unsqueeze(0) message_means, message_sds, action_probs = self.forward(x) action_dbn = Bernoulli(action_probs) action = action_dbn.sample() message_dbn = Normal(message_means, message_sds) message = message_dbn.sample() log_prob = action_dbn.log_prob(action) + message_dbn.log_prob( message).sum() x = T.cat((message[0, :], action[0].double())) return x, log_prob
def sequential_data_preparation( input_batch, input_keep=1, start_index=2, end_index=3, dropout_index=1, device=get_device() ): """ Sequential Training Data Builder. Args: input_batch (torch.Tensor): Batch of padded sequences, output of nn.utils.rnn.pad_sequence(batch) of size `[sequence length, batch_size, 1]`. input_keep (float): The probability not to drop input sequence tokens according to a Bernoulli distribution with p = input_keep. Defaults to 1. start_index (int): The index of the sequence start token. end_index (int): The index of the sequence end token. dropout_index (int): The index of the dropout token. Defaults to 1. device (torch.device): Device to be used. Returns: (torch.Tensor, torch.Tensor, torch.Tensor): encoder_seq, decoder_seq, target_seq encoder_seq is a batch of padded input sequences starting with the start_index, of size `[sequence length +1, batch_size]`. decoder_seq is like encoder_seq but word dropout is applied (so if input_keep==1, then decoder_seq = encoder_seq). target_seq (torch.Tensor): Batch of padded target sequences ending in the end_index, of size `[sequence length +1, batch_size]`. """ batch_size = input_batch.shape[1] input_batch = input_batch.long().to(device) decoder_batch = input_batch.clone() # apply token dropout if keep != 1 if input_keep != 1: # build dropout indices consisting of dropout_index dropout_indices = torch.LongTensor( dropout_index * torch.ones(1, batch_size).numpy() ) # mask for token dropout mask = Bernoulli(input_keep).sample((input_batch.shape[0], )) mask = torch.LongTensor(mask.numpy()) dropout_loc = np.where(mask == 0)[0] decoder_batch[dropout_loc] = dropout_indices end_padding = torch.LongTensor(torch.zeros(1, batch_size).numpy()) target_seq = torch.cat((input_batch[1:, :], end_padding), dim=0) target_seq = copy.deepcopy(target_seq).to(device) return input_batch, decoder_batch, target_seq
def __init__(self, base_classifier: torch.nn.Module, num_classes: int, calibrated_alpha: float, K: int): """ :param base_classifier: maps from [batch x channel x height x width] to [batch x num_classes] :param num_classes: :param calibrated_alpha: the noise level hyperparameter """ self.base_classifier = base_classifier self.num_classes = num_classes self.calibrated_alpha = calibrated_alpha self.K = K self.m = Bernoulli(torch.tensor([self.calibrated_alpha]).cuda())
def forward(self, target, output): """ :param output: reconstructed input (B, C, W, H) :param target: initial input (B, C, W, H) :return: mean squared loss """ dist = Bernoulli(logits=output) rec_loss = -dist.log_prob(target) rec_loss = torch.mean(rec_loss.sum(dim=[1, 2, 3])) return rec_loss
def __init__(self, **kwargs): self.delta = kwargs.get("delta", 0.35) self.epsilon = kwargs.get("epsilon", 0.3) reset_prob = kwargs.get("reset_prob", 0.05) self.coin = Bernoulli(torch.tensor(reset_prob)) self.means = [0., 0., 0.] self.arms = {0, 1, 2} self.not_played = [0, 1, 2] self.thresh = int(log(3.0 / self.delta)) self.round = 1 self.best = None
def __init__(self, reset_prob=0.015, **kwargs): #expected reset time = 1/(reset_prob) if reset_prob == 0: self.coin = None else: self.coin = Bernoulli(torch.tensor(reset_prob)) self.dists = kwargs.get("dists") self.bias = kwargs.get("bias") if self.dists: self.policy = Categorical(dists.pop(0)) else: self.policy = self.rand_dist()
def f(self, x, z, logits, hard=False): B = x.shape[0] # image likelihood given b # b = harden(z).detach() x_hat = self.generator.forward(z) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx # prior is constant I think # for q(b|x), we just want to increase its entropy if hard: dist = Bernoulli(logits=logits) else: dist = RelaxedBernoulli(torch.Tensor([1.]).cuda(), logits=logits) logqb = dist.log_prob(z.detach()) logqb = torch.sum(logqb,1) return logpx, logqb, alpha
def make_decisions(logits): dist1 = Bernoulli(logits=logits[:,0]) # Decision 1 b1 = dist1.sample() logprob1 = dist1.log_prob(b1) if b1 ==0: dist2 = Bernoulli(logits=logits[:,1]) else: dist2 = Bernoulli(logits=logits[:,2]) # Decision 2 b2 = dist2.sample() logprob2 = dist2.log_prob(b2) return b1, logprob1, b2, logprob2
# early_stop=5) # print ('Done training\n') # # fada # else: # # net_relax.load_params_v3(save_dir=home+'/Downloads/tmmpp/', step=30551, name='') #.499 # net_relax.load_params_v3(save_dir=home+'/Documents/Grad_Estimators/new/', step=1607, name='') #.4 # print() dist = Bernoulli(bern_param) samps = [] grads = [] logprobgrads = [] for i in range(n): samp = dist.sample() logprob = dist.log_prob(samp.detach()) logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0] # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy()) # fsdfa samps.append(samp.numpy()) grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy()) logprobgrads.append(logprobgrad.numpy())
C=3 N = 2000 prelogits = torch.zeros([B,C]) logits = prelogits - logsumexp(prelogits) # logits = torch.tensor(logits.clone().detach(), requires_grad=True) logits.requires_grad_(True) grads = [] for i in range(N): dist1 = Bernoulli(logits=logits[:,0]) # Decision 1 b1 = dist1.sample() logprob1 = dist1.log_prob(b1) if b1 ==0: dist2 = Bernoulli(logits=logits[:,1]) else: dist2 = Bernoulli(logits=logits[:,2]) # Decision 2 b2 = dist2.sample() logprob2 = dist2.log_prob(b2) if b1 == 0 and b2 == 0:
print() print('REINFORCE') print ('Value:', val) # print ('n:', n) # print ('theta:', theta) print() optim = torch.optim.Adam([bern_param], lr=.004) steps = [] losses= [] for step in range(total_steps): dist = Bernoulli(logits=bern_param) optim.zero_grad() bs = [] for i in range(20): samps = dist.sample() bs.append(H(samps)) bs = torch.FloatTensor(bs).unsqueeze(1) logprob = dist.log_prob(bs) # logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0] loss = torch.mean(f(bs) * logprob) #review the pytorch_toy and the RL code to see how PG was done
def forward(self, grad_est_type, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): outputs = {} B = x.shape[0] #Samples from relaxed bernoulli z, logits, logqz = self.q.sample(x) if isnan(logqz).any(): print(torch.sum(isnan(logqz).float()).data.item()) print(torch.mean(logits).data.item()) print(torch.max(logits).data.item()) print(torch.min(logits).data.item()) print(torch.max(z).data.item()) print(torch.min(z).data.item()) fdsfad # Compute discrete ELBO b = harden(z).detach() logpx_b, logq_b, alpha1 = self.f(x, b, logits, hard=True) fhard = (logpx_b - logq_b).detach() if grad_est_type == 'SimpLAX': # Control Variate logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False) fsoft = logpx_z.detach() #- logq_z c = self.surr(x, z).view(B) # REINFORCE with Control Variate Adv = (fhard - fsoft - c).detach() cost1 = Adv * logqz # Unbiased gradient of fhard/elbo cost_all = cost1 + c + fsoft # + logpx_b # Surrogate loss surr_cost = torch.abs(fhard - fsoft - c)#**2 elif grad_est_type == 'RELAX': #p(z|b) theta = logit_to_prob(logits) v = torch.rand(z.shape[0], z.shape[1]).cuda() v_prime = v * (b - 1.) * (theta - 1.) + b * (v * theta + 1. - theta) # z_tilde = logits.detach() + torch.log(v_prime) - torch.log1p(-v_prime) z_tilde = logits + torch.log(v_prime) - torch.log1p(-v_prime) z_tilde = torch.sigmoid(z_tilde) # Control Variate logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False) fsoft = logpx_z.detach() #- logq_z c_ztilde = self.surr(x, z_tilde).view(B) c_z = self.surr(x, z).view(B) # REINFORCE with Control Variate dist_bern = Bernoulli(logits=logits) logqb = dist_bern.log_prob(b.detach()) logqb = torch.sum(logqb,1) Adv = (fhard - fsoft - c_ztilde).detach() cost1 = Adv * logqb # Unbiased gradient of fhard/elbo cost_all = cost1 + fsoft + c_z - c_ztilde#+ logpx_b # Surrogate loss surr_cost = torch.abs(fhard - fsoft - c_ztilde)#**2 elif grad_est_type == 'SimpLAX_nosoft': # Control Variate logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False) # fsoft = logpx_z.detach() #- logq_z c = self.surr(x, z).view(B) # REINFORCE with Control Variate Adv = (fhard - c).detach() cost1 = Adv * logqz # Unbiased gradient of fhard/elbo cost_all = cost1 + c # + logpx_b # Surrogate loss surr_cost = torch.abs(fhard - c)#**2 elif grad_est_type == 'RELAX_nosoft': #p(z|b) theta = logit_to_prob(logits) v = torch.rand(z.shape[0], z.shape[1]).cuda() v_prime = v * (b - 1.) * (theta - 1.) + b * (v * theta + 1. - theta) z_tilde = logits + torch.log(v_prime) - torch.log1p(-v_prime) z_tilde = torch.sigmoid(z_tilde) # Control Variate logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False) # fsoft = logpx_z.detach() #- logq_z c_ztilde = self.surr(x, z_tilde).view(B) c_z = self.surr(x, z).view(B) # REINFORCE with Control Variate dist_bern = Bernoulli(logits=logits) logqb = dist_bern.log_prob(b.detach()) logqb = torch.sum(logqb,1) Adv = (fhard - c_ztilde).detach() # print (Adv.shape, logqb.shape) cost1 = Adv * logqb # Unbiased gradient of fhard/elbo # print (cost1.shape, c_z.shape, c_ztilde.shape) # fsdf cost_all = cost1 + c_z - c_ztilde#+ logpx_b # Surrogate loss surr_cost = torch.abs(fhard - c_ztilde)#**2 # Confirm generator grad isnt in encoder grad # logprobgrad = torch.autograd.grad(outputs=torch.mean(fhard), inputs=(logits), retain_graph=True)[0] # print (logprobgrad.shape, torch.max(logprobgrad), torch.min(logprobgrad)) # logprobgrad = torch.autograd.grad(outputs=torch.mean(fsoft), inputs=(logits), retain_graph=True)[0] # print (logprobgrad.shape, torch.max(logprobgrad), torch.min(logprobgrad)) # fsdfads outputs['logpx'] = torch.mean(logpx_b) outputs['x_recon'] = alpha1 # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz)) outputs['welbo'] = torch.mean(cost_all) #torch.mean(logpx_b + warmup*(KL)) outputs['elbo'] = torch.mean(logpx_b - logq_b - 138.63) # outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.zeros(1) #torch.mean(logpz) outputs['logqz'] = torch.mean(logq_b) outputs['surr_cost'] = torch.mean(surr_cost) outputs['fhard'] = torch.mean(fhard) # outputs['fsoft'] = torch.mean(fsoft) # outputs['c'] = torch.mean(c) outputs['logq_z'] = torch.mean(logq_z) outputs['logits'] = logits return outputs
reinforce_cat_grad_stds = [] for theta in thetas: print () print ('theta:', theta) # theta = .01 #.99 #.1 #95 #.3 #.9 #.05 #.3 bern_param = torch.tensor([theta], requires_grad=True) dist = Bernoulli(bern_param) samps = [] grads = [] logprobgrads = [] for i in range(n): samp = dist.sample() logprob = dist.log_prob(samp.detach()) logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0] # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy()) # fsdfa samps.append(samp.numpy()) grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy()) logprobgrads.append(logprobgrad.numpy())
# thetas = np.linspace(.97,.999, 12) reinforce_grad_means = [] reinforce_grad_stds = [] pz_grad_means = [] pz_grad_stds = [] for theta in thetas: # print () print ('theta:', theta) # # theta = .01 #.99 #.1 #95 #.3 #.9 #.05 #.3 bern_param = torch.tensor([theta], requires_grad=True) dist = Bernoulli(bern_param) samps = [] grads = [] logprobgrads = [] for i in range(n): samp = dist.sample() logprob = dist.log_prob(samp.detach()) logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0] # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy()) # fsdfa samps.append(samp.numpy()) grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy()) logprobgrads.append(logprobgrad.numpy())