def communication_channel(self, env, agent_a, agent_b, target, perception): # add perceptual noise noise = th.float_var( Normal( torch.zeros(self.batch_size, self.perception_dim), torch.ones(self.batch_size, self.perception_dim) * self.perception_noise).sample()) perception = perception + noise # generate message msg_logits = agent_a(perception=perception) # msg_probs = F.gumbel_softmax(msg_logits, tau=2 / 3) noise = th.float_var( Normal(torch.zeros(self.batch_size, self.msg_dim), torch.ones(self.batch_size, self.msg_dim) * self.com_noise).sample()) msg_probs = F.softmax(msg_logits + noise, dim=1) #msg_probs = F.gumbel_softmax(msg_logits + noise, tau=10 / 3, dim=1) msg_dist = Categorical(msg_probs) msg = msg_dist.sample() # interpret message and sample a guess guess_logits = agent_b(msg=msg) guess_probs = F.softmax(guess_logits, dim=1) #guess_probs = F.gumbel_softmax(msg_logits, tau=10 / 3, dim=1) m = Categorical(guess_probs) guess = m.sample() #compute reward if self.reward_func == 'regier_reward': CIELAB_guess = env.chip_index2CIELAB(guess.data) reward = env.regier_reward(perception, CIELAB_guess, bw_boost=self.bw_boost) elif self.reward_func == 'abs_dist': diff = torch.abs(target.unsqueeze(dim=1) - guess.unsqueeze(dim=1)) reward = 1 - (diff.float() / 100) #1-(diff.float()/50) # compute loss and update model if self.loss_type == 'REINFORCE': sender_loss = -(reward * msg_dist.log_prob(msg)).sum() / self.batch_size receiver_loss = -(reward * m.log_prob(guess)).sum() / self.batch_size #receiver_loss = self.criterion_receiver(guess_logits, target.squeeze()) loss = receiver_loss + sender_loss elif self.loss_type == 'CrossEntropyLoss': loss = self.criterion_receiver(guess_logits, target.squeeze()) return loss
def play(self, env, agent_a, agent_b): agent_a = th.cuda(agent_a) agent_b = th.cuda(agent_b) optimizer = optim.Adam( list(agent_a.parameters()) + list(agent_b.parameters())) for i in range(self.max_epochs): optimizer.zero_grad() color_codes, colors = env.mini_batch(batch_size=self.batch_size) color_codes = th.long_var(color_codes) colors = th.float_var(colors) loss = self.communication_channel(env, agent_a, agent_b, color_codes, colors) loss.backward() optimizer.step() # printing status if self.print_interval != 0 and ((i + 1) % self.print_interval == 0): self.print_status(loss) if self.evaluate_interval != 0 and ((i + 1) % self.evaluate_interval == 0): self.evaluate(env, agent_a) return agent_a.cpu()
def communication_channel(self, env, agent_a, agent_b, target, perception): # add perceptual noise if self.training_mode: noise = th.float_var( Normal( torch.zeros(self.batch_size, self.perception_dim), torch.ones(self.batch_size, self.perception_dim) * self.perception_noise).sample()) perception = perception + noise # Sample message probs = agent_a(perception=perception) m = Categorical(probs) msg = m.sample() # interpret message guess = agent_b(msg=msg) # compute reward if self.reward_func == 'basic_reward': reward = env.basic_reward(target, guess) elif self.reward_func == 'regier_reward': reward = env.regier_reward(perception, guess) elif self.reward_func == 'number_reward': reward = env.number_reward(target, guess) elif self.reward_func == 'inverted_reward': reward = env.inverted_number(target, guess) self.sum_reward += reward.sum() # compute loss self.loss_sender = self.sender_loss_multiplier * ( (-m.log_prob(msg) * reward).sum() / self.batch_size) self.loss_receiver = self.criterion_receiver(guess, target) return self.loss_receiver + self.loss_sender
def play(self, env, agent_a, agent_b): agent_a = th.cuda(agent_a) agent_b = th.cuda(agent_b) receiver_opt = optim.Adam(list(agent_b.parameters())) optimizer = optim.Adam( list(agent_a.parameters()) + list(agent_b.parameters())) for i in range(self.max_epochs): for j in range(50): color_codes, colors = env.mini_batch( batch_size=self.batch_size) color_codes = th.long_var(color_codes) colors = th.float_var(colors) receiver_loss, _, _ = self.communication_channel( env, agent_a, agent_b, color_codes, colors) receiver_loss.backward() receiver_opt.step() receiver_opt.zero_grad() self.board_reward = 0 optimizer.zero_grad() color_codes, colors = env.mini_batch(batch_size=self.batch_size) color_codes = th.long_var(color_codes) colors = th.float_var(colors) receiver_loss, sender_loss, entropy_loss = self.communication_channel( env, agent_a, agent_b, color_codes, colors) loss = receiver_loss + sender_loss + entropy_loss loss.backward() optimizer.step() # Update tensorboard #print(self.tensorboard) if ((i + 1) % self.print_interval == 0): self.tensorboard_update(i, env, agent_a, agent_b) # printing status if self.print_interval != 0 and ((i + 1) % self.print_interval == 0): if self.loss_type == 'REINFORCE': #self.print_status(-loss) self.print_status(loss) # else: #self.print_status(loss) if self.evaluate_interval != 0 and ((i + 1) % self.evaluate_interval == 0): self.evaluate(env, agent_a)
def word2number(self, agent): msg = th.float_var(np.eye(agent.msg_dim)) guess_logits = agent(msg=msg) guess_probs = F.softmax(guess_logits, dim=1) _, guess = guess_probs.max(1) guess = guess.data.numpy() dublicates = [ item for item, count in Counter(guess).items() if count > 1 ] return -len(dublicates) * 2
def play(self, env, agent_a, agent_b): agent_a = th.cuda(agent_a) agent_b = th.cuda(agent_b) optimizer = optim.Adam(list(agent_a.parameters()) + list(agent_b.parameters()), lr=0.0001) for i in range(self.max_epochs): optimizer.zero_grad() # Agent a sends a message color_codes, colors = env.mini_batch(batch_size=self.batch_size) color_codes = th.long_var(color_codes) colors = th.float_var(colors) loss1 = self.communication_channel(env, agent_a, agent_b, color_codes, colors) loss1.backward() # Agent b sends a message color_codes, colors = env.mini_batch(batch_size=self.batch_size) color_codes = th.long_var(color_codes) colors = th.float_var(colors) loss2 = self.communication_channel(env, agent_b, agent_a, color_codes, colors) loss2.backward() # Backprogate #loss.backward() optimizer.step() loss = loss1 + loss2 # printing status if self.print_interval != 0 and ((i + 1) % self.print_interval == 0): #self.tensorboard_update(i, env, agent_a) self.print_status(loss) if self.evaluate_interval != 0 and ((i + 1) % self.evaluate_interval == 0): self.evaluate(env, agent_a) return agent_a.cpu()
def agent_language_map(env, a): V = {} a = th.cuda(a) perception_indices, perceptions = env.full_batch() if isinstance(perceptions, np.ndarray): perceptions = th.float_var( torch.tensor(perceptions, dtype=torch.float32)) probs = a(perception=perceptions) _, terms = probs.max(1) for perception_index in perception_indices: V[perception_index] = terms[perception_index].item() return list(V.values())
def compute_gibson_cost(self, env, a): _, perceptions = env.full_batch() if isinstance(perceptions, np.ndarray): perceptions = th.float_var( torch.tensor(perceptions, dtype=torch.float32)) perceptions = perceptions.cpu() all_terms = th.long_var(range(a.msg_dim), False) p_WC = F.softmax(a(perception=perceptions), dim=1).t().data.numpy() p_CW = F.softmax(a(msg=all_terms), dim=1).data.numpy() S = -np.diag(np.matmul(p_WC.transpose(), (np.log2(p_CW)))) avg_S = S.sum() / len(S) # expectation assuming uniform prior # debug code # s = 0 # c = 43 # for w in range(a.msg_dim): # s += -p_WC[w, c]*np.log2(p_CW[w, c]) # print(S[c] - s) return S, avg_S
def play(self, env, agent_a, agent_b): agent_a = th.cuda(agent_a) agent_b = th.cuda(agent_b) optimizer = optim.Adam(list(agent_a.parameters()) + list(agent_b.parameters()), lr=0.0001) for i in range(self.max_epochs): optimizer.zero_grad() color_codes, colors = env.mini_batch(batch_size=self.batch_size) color_codes = th.long_var(color_codes) colors = th.float_var(colors) loss = self.communication_channel(env, agent_a, agent_b, color_codes, colors) loss.backward() optimizer.step() # Update tensorboard #print(self.tensorboard) # if((i+1) % self.print_interval == 0): # self.tensorboard_update(i, env, agent_a, agent_b) # printing status if self.print_interval != 0 and ((i + 1) % self.print_interval == 0): if self.loss_type == 'REINFORCE': #self.print_status(-loss) self.print_status(loss) else: self.print_status(loss) if self.evaluate_interval != 0 and ((i + 1) % self.evaluate_interval == 0): self.evaluate(env, agent_a) #agent_a.reward_log = self.reward_log #agent_b.reward_log = self.reward_log return agent_a.cpu()
def __init__(self, wcs_path='data/') -> None: super().__init__() baseurl = 'http://www1.icsi.berkeley.edu/wcs/data/' self.get_data(baseurl + 'cnum-maps/cnum-vhcm-lab-new.txt', 'data/cnum-vhcm-lab-new.txt') self.get_data(baseurl + '20021219/txt/term.txt', 'data/term.txt') self.get_data(baseurl + '20041016/txt/dict.txt', 'data/dict.txt') # http://www1.icsi.berkeley.edu/wcs/data/cnum-maps/cnum-vhcm-lab-new.txt # http://www1.icsi.berkeley.edu/wcs/data/20021219/txt/term.txt # http://www1.icsi.berkeley.edu/wcs/data/20041016/txt/dict.txt self.color_chips = pd.read_csv(wcs_path + 'cnum-vhcm-lab-new.txt', sep='\t') self.cielab_map = th.float_var(self.color_chips[['L*', 'a*', 'b*']].values) self.term = pd.read_csv( wcs_path + 'term.txt', sep='\t', names=['lang_num', 'spkr_num', 'chip_num', 'term_abrev']) self.dict = pd.read_csv( wcs_path + 'dict.txt', sep='\t', skiprows=[0], names=['lang_num', 'term_num', 'term', 'term_abrev']) self.term_nums = pd.merge( self.term, self.dict.drop_duplicates(subset=['lang_num', 'term_abrev']), how='inner', on=['lang_num', 'term_abrev']) self.human_mode_maps = self.compute_human_mode_maps(wcs_path) self.plot_with_colors(V=None, save_to_path=wcs_path + 'mode_maps/empty_map.png')
def communication_channel(self, env, agent_a, agent_b, target, perception): # add perceptual noise noise = th.float_var( Normal( torch.zeros(self.batch_size, self.perception_dim), torch.ones(self.batch_size, self.perception_dim) * self.perception_noise).sample()) perception = perception + noise # generate message msg_logits = agent_a(perception=perception) # msg_probs = F.gumbel_softmax(msg_logits, tau=2 / 3) noise = th.float_var( Normal(torch.zeros(self.batch_size, self.msg_dim), torch.ones(self.batch_size, self.msg_dim) * self.com_noise).sample()) msg_probs = F.softmax(msg_logits + noise, dim=1) #ßßßßmsg_probs = F.gumbel_softmax(msg_logits + noise, tau=10 / 3, dim=1) msg_dist = Categorical(msg_probs) msg = msg_dist.sample() # interpret message and sample a guess guess_logits = agent_b(msg=msg) guess_probs = F.softmax(guess_logits, dim=1) #guess_probs = F.gumbel_softmax(msg_logits, tau=10 / 3, dim=1) m = Categorical(guess_probs) guess = m.sample() # Reconstruct( Sanity check) recon_logits = agent_a(msg=msg) recon_probs = F.softmax(recon_logits, dim=1) recon_dist = Categorical(recon_probs) recon_guess = recon_dist.sample() # CrossEntropy or REINFORCE? # This becomes a standard autoencoder ? recon_diff = torch.abs(target - recon_guess.unsqueeze(dim=1)) recon_reward = 1 - (recon_diff.float() / 100) recon_loss = 0.5 * (-recon_dist.log_prob(recon_guess) * recon_reward).sum() / self.batch_size #recon_loss =self.recon_param * self.criterion_receiver(recon_logits, target.squeeze()) #compute reward if self.reward_func == 'regier_reward': CIELAB_guess = env.chip_index2CIELAB(guess.data) reward = env.regier_reward(perception, CIELAB_guess, bw_boost=self.bw_boost) elif self.reward_func == 'abs_dist': diff = torch.abs(target - guess.unsqueeze(dim=1)) reward = 1 - (diff.float() / 100) #1-(diff.float()/50) #reward = 1 /(diff.float()+1)**2 elif self.reward_func == 'exp_reward': diff = torch.abs(target - guess.unsqueeze(dim=1)) reward = 2**(-diff.float()) #1-(diff.float()/50) #reward = 1 /(diff.float()+1)**2 elif self.reward_func == 'number_reward': reward = env.number_reward(target, guess) elif self.reward_func == 'inverted_reward': reward = env.inverted_number(target, guess) elif self.reward_func == 'interval_reward': reward = env.interval_reward(target, guess) elif self.reward_func == 'target_reward': reward = env.target_reward(target, guess) elif self.reward_func == 'sim_index': reward = env.sim_index(target, guess) self.sum_reward += reward.sum() self.board_reward = reward # compute loss and update model if self.loss_type == 'REINFORCE': #receiver_loss = self.criterion_receiver(guess_logits, target.squeeze()) sender_loss = (-msg_dist.log_prob(msg) * reward).sum() / self.batch_size receiver_loss = (-m.log_prob(guess) * reward).sum() / self.batch_size # For tensorboard logging #entropy_loss = -(self.entropy_coef * (msg_dist.entropy().mean() + m.entropy().mean())) # self.sender_loss += sender_loss # self.receiver_loss += receiver_loss #self.entropy_coef = 0.999 * self.entropy_coef loss = receiver_loss + sender_loss + recon_loss # loss = receiver_loss + sender_loss + entropy_loss #loss = receiver_loss elif self.loss_type == 'CrossEntropyLoss': loss = self.criterion_receiver(guess_logits, target.squeeze()) # For tensorboard logging return loss
def communication_channel(self, env, agent_a, agent_b, target, perception): # add perceptual noise noise = th.float_var( Normal( torch.zeros(self.batch_size, self.perception_dim), torch.ones(self.batch_size, self.perception_dim) * self.perception_noise).sample()) perception = perception + noise # generate message msg_logits = agent_a(perception=perception) # msg_probs = F.gumbel_softmax(msg_logits, tau=2 / 3) noise = th.float_var( Normal(torch.zeros(self.batch_size, self.msg_dim), torch.ones(self.batch_size, self.msg_dim) * self.com_noise).sample()) msg_probs = F.softmax(msg_logits + noise, dim=1) #ßßßßmsg_probs = F.gumbel_softmax(msg_logits + noise, tau=10 / 3, dim=1) msg_dist = Categorical(msg_probs) msg = msg_dist.sample() # interpret message and sample a guess guess_logits = agent_b(msg=msg_probs) noise = th.float_var( Normal(torch.zeros(self.batch_size, self.msg_dim), torch.ones(self.batch_size, self.msg_dim) * self.com_noise).sample()) guess_probs = F.softmax(guess_logits, dim=1) #guess_probs = F.gumbel_softmax(msg_logits, tau=10 / 3, dim=1) m = Categorical(guess_probs) guess = m.sample() #compute reward if self.reward_func == 'regier_reward': CIELAB_guess = env.chip_index2CIELAB(guess.data) reward = env.regier_reward(perception, CIELAB_guess, bw_boost=self.bw_boost) elif self.reward_func == 'abs_dist': diff = torch.abs(target - (1 + guess.unsqueeze(dim=1))) reward = 1 - (diff.float() / 100) #1-(diff.float()/50) elif self.reward_func == 'abs_penalty': diff = torch.abs(target - (1 + guess.unsqueeze(dim=1))) reward = 1 - (diff.float() / 100) # Check whether the reciver assigns more than one word to each number reward = reward + env.word2number(agent_b) elif self.reward_func == 'exp_reward': diff = torch.abs(target - guess.unsqueeze(dim=1)) reward = 2**(-0.1 * diff.float()) elif self.reward_func == 'sim_index': reward = env.sim_index(target, guess) self.sum_reward += reward.sum() self.board_reward = reward # compute loss and update model if self.loss_type == 'REINFORCE': # compute baseline self.n_points += 1 self.baseline += (reward.mean() - self.baseline) / self.n_points # receiver_loss = self.criterion_receiver(guess_logits, target.squeeze()) sender_loss = (-msg_dist.log_prob(msg) * (reward - self.baseline)).sum() / self.batch_size receiver_loss = (-m.log_prob(guess) * (reward - self.baseline)).sum() / self.batch_size entropy_loss = -( self.entropy_coef * (1 * msg_dist.entropy().mean() + 3 * m.entropy().mean())) # For tensorboard logging self.sender_loss += sender_loss self.receiver_loss += receiver_loss #self.entropy_coef = 0.999 * self.entropy_coef # loss = receiver_loss + sender_loss #loss = receiver_loss return receiver_loss, sender_loss, entropy_loss elif self.loss_type == 'CrossEntropyLoss': loss = self.criterion_receiver(guess_logits, target.squeeze()) # For tensorboard logging self.receiver_loss += loss return receiver_loss, sender_loss, entropy_loss
def tensorboard_update(self, epoch, env, a_agent, b_agent): # Log scalars writer.add_scalar( 'Loss/sender_loss', self.sender_loss / (self.print_interval * self.batch_size), epoch) writer.add_scalar( 'Loss/receiver_loss', self.receiver_loss / (self.print_interval * self.batch_size), epoch) writer.add_scalar( 'Metrics/Reward_' + str(self.reward_func), self.board_reward.sum() / (self.print_interval * self.batch_size), epoch) # log evaluation metrics V = evaluate.agent_language_map(env, a_agent) # term usage termed_used = evaluate.compute_term_usage(V=V)[-1] writer.add_scalar('Metrics/term_usage', termed_used, epoch) # Agent-stats # perception_layer = a_agent.perception_embedding.weight # msg_layer = a_agent.msg_creator.weight # writer.add_histogram('Sender/perception_layer', perception_layer, epoch) # writer.add_histogram('Sender/msg_layer', msg_layer, epoch) # writer.add_scalar('Sender/perception_layer_grad', torch.abs(perception_layer.grad).sum(), epoch) # writer.add_scalar('Sender/msg_layer_grad', torch.abs(msg_layer.grad).sum(), epoch) # # receiver_layer = b_agent.msg_receiver.weight # guess_layer = b_agent.color_estimator.weight # writer.add_histogram('Receiver/receiver_layer', receiver_layer, epoch) # writer.add_histogram('Receiver/guess_layer', guess_layer, epoch) # writer.add_scalar('Receiver/receiver_layer_grad', torch.abs(receiver_layer.grad).sum(), epoch) # writer.add_scalar('Receiver/guess_layer_grad', torch.abs(guess_layer.grad).sum(), epoch) # add batch #writer.add_text('Batch', str(self.batch), epoch) # Produce partition # if number environment: partition = self.compute_ranges(V) writer.add_text('Partition', str(partition), epoch) writer.flush() self.sender_loss = 0 self.receiver_loss = 0 # Guesses msg = th.float_var(np.eye(a_agent.msg_dim)) guess_logits = b_agent(msg=msg) guess_probs = F.softmax(guess_logits, dim=1) _, guess = guess_probs.max(1) writer.add_text('Reciever guesses', str(guess + 1), epoch) index, perception = env.full_batch() prob = F.softmax(a_agent(th.float_var(perception)), dim=1) prob = prob.detach().numpy() guess_probs = guess_probs.detach().numpy() for i in range(perception.shape[0]): fig, ax = plt.subplots(figsize=(5, 5)) plt.plot(range(a_agent.msg_dim), prob[i, :]) writer.add_figure('prob_words' + str(i + 1) + '/sender', fig, epoch) for i in range(guess_probs.shape[0]): fig, ax = plt.subplots(figsize=(5, 5)) plt.plot(range(guess_probs.shape[1]), guess_probs[i, :]) writer.add_figure('prob_guess' + str(i + 1) + '/receiver', fig, epoch)