def main(params): opts = get_params(params) device = opts.device force_eos = opts.force_eos == 1 if opts.probs == "uniform": probs = [] probs_by_att = np.ones(opts.n_values) probs_by_att /= probs_by_att.sum() for i in range(opts.n_attributes): probs.append(probs_by_att) probs_attributes = [1] * opts.n_attributes agent_1 = AgentBaseline2(vocab_size=opts.vocab_size, n_features=opts.n_features, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_1.load_state_dict( torch.load(opts.agent_1_weights, map_location=torch.device('cpu'))) agent_1.to(device) agent_2 = AgentBaseline2(vocab_size=opts.vocab_size, n_features=opts.n_features, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_2.load_state_dict( torch.load(opts.agent_2_weights, map_location=torch.device('cpu'))) agent_2.to(device) noise_robustness_score_1 = compute_noise_robustness( agent_1, agent_2, opts.n_sampling, opts.noise_prob, opts.max_len, opts.n_features, device) np.save(opts.dir_save + '/training_info/average_train_1.npy', complexity_train_1) core.close()
def main(params): print(torch.cuda.is_available()) opts = get_params(params) print(opts, flush=True) device = opts.device force_eos = opts.force_eos == 1 # Distribution of the inputs if opts.probs=="uniform": probs=[] probs_by_att = np.ones(opts.n_values) probs_by_att /= probs_by_att.sum() for i in range(opts.n_attributes): probs.append(probs_by_att) if opts.probs=="entropy_test": probs=[] for i in range(opts.n_attributes): probs_by_att = np.ones(opts.n_values) probs_by_att[0]=1+(1*i) probs_by_att /= probs_by_att.sum() probs.append(probs_by_att) if opts.probs_attributes=="uniform": probs_attributes=[1]*opts.n_attributes if opts.probs_attributes=="uniform_indep": probs_attributes=[] probs_attributes=[0.2]*opts.n_attributes if opts.probs_attributes=="echelon": probs_attributes=[] for i in range(opts.n_attributes): #probs_attributes.append(1.-(0.2)*i) #probs_attributes.append(0.7+0.3/(i+1)) probs_attributes=[1.,0.95,0.9,0.85] print("Probability by attribute is:",probs_attributes) compo_dataset = build_compo_dataset(opts.n_values, opts.n_attributes) if opts.split_proportion<1.: train_split = np.random.RandomState(opts.random_seed).choice(opts.n_values**opts.n_attributes,size=(int(opts.split_proportion*(opts.n_values**opts.n_attributes))),replace=False) test_split=[] for j in range(opts.n_values**opts.n_attributes): if j not in train_split: test_split.append(j) test_split = np.array(test_split) else: train_split=test_split=np.arange(opts.n_values**opts.n_attributes) train_loader = OneHotLoaderCompositionality(dataset=compo_dataset,split=train_split,n_values=opts.n_values, n_attributes=opts.n_attributes, batch_size=opts.batch_size, batches_per_epoch=opts.batches_per_epoch, probs=probs, probs_attributes=probs_attributes) # single batches with 1s on the diag #test_loader = TestLoaderCompositionality(dataset=compo_dataset,n_values=opts.n_values,n_attributes=opts.n_attributes) test_loader = TestLoaderCompositionality(dataset=compo_dataset,split=test_split,n_values=opts.n_values, n_attributes=opts.n_attributes, batch_size=opts.batch_size, batches_per_epoch=opts.batches_per_epoch, probs=probs, probs_attributes=probs_attributes) agents={} optim_params={} loss_weights={} speaker_parameters={} listener_parameters={} sender_hiddens=[128,1024,512,256,128,64,32,16,8] receiver_hiddens=[128,1024,512,256,128,64,32,16,8] for i in range(max(opts.N_speakers,opts.N_listeners)): agent=AgentBaselineCompositionality(vocab_size=opts.vocab_size, n_attributes=opts.n_attributes, n_values=opts.n_values, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=sender_hiddens[i], receiver_hidden_size=receiver_hiddens[i], sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agents["agent_{}".format(i)] = agent optim_params["agent_{}".format(i)] = {"length_cost":0., "sender_entropy_coeff":opts.sender_entropy_coeff, "receiver_entropy_coeff":opts.receiver_entropy_coeff} loss_weights["agent_{}".format(i)]= {"self":0.,"cross":1.,"imitation":0.} if i<opts.N_speakers: speaker_parameters["agent_{}".format(i)]=list(agent.agent_sender.parameters()) + \ list(agent.sender_norm_h.parameters()) + \ list(agent.sender_norm_c.parameters()) + \ list(agent.hidden_to_output.parameters()) + \ list(agent.sender_embedding.parameters()) + \ list(agent.sender_cells.parameters()) if i<opts.N_listeners: listener_parameters["agent_{}".format(i)]=list(agent.agent_receiver.parameters()) + \ list(agent.receiver_cell.parameters()) + \ list(agent.receiver_embedding.parameters()) game_init = ForwardPassSpeakerMultiAgent(Agents=agents, n_attributes=opts.n_attributes, n_values=opts.n_values, loss_imitation=loss_message_imitation, optim_params=optim_params, device=device) game = DialogReinforceCompositionalityMultiAgent(Agents=agents, n_attributes=opts.n_attributes, n_values=opts.n_values, loss_understanding=loss_understanding_compositionality, optim_params=optim_params, baseline_mode=opts.baseline_mode, reward_mode=opts.reward_mode, loss_weights=loss_weights, device=device) # Optimizers optimizer_speaker={} optimizer_listener={} for i in range(max(opts.N_speakers,opts.N_listeners)): if i<opts.N_speakers: optimizer_speaker["agent_{}".format(i)] = core.build_optimizer(list(speaker_parameters["agent_{}".format(i)]),lr=opts.sender_lr) if i<opts.N_listeners: optimizer_listener["agent_{}".format(i)] = core.build_optimizer(list(listener_parameters["agent_{}".format(i)]),lr=opts.receiver_lr) if opts.K_random: Ks_speakers = [np.random.rand() for _ in range(opts.N_speakers)] Ks_listeners = [np.random.rand() for _ in range(opts.N_listeners)] else: Ks_speakers = [1]*opts.N_speakers Ks_listeners = [1]*opts.N_listeners "Create trainer" list_speakers=[i for i in range(opts.N_speakers)] list_listeners=[i for i in range(opts.N_listeners)] trainer_init = TrainerInitMultiagent(game=game_init, optimizer_speaker=optimizer_speaker list_speakers=list_speakers,save_probs_eval=opts.save_probs,\ train_data=train_loader, \ validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) trainer = TrainerDialogMultiAgentPair(game=game, optimizer_speaker=optimizer_speaker,optimizer_listener=optimizer_listener,\ list_speakers=list_speakers,list_listeners=list_listeners,save_probs_eval=opts.save_probs,\ N_listener_sampled = opts.N_listener_sampled,step_ratio=opts.step_ratio,train_data=train_loader, \ Ks_speakers = Ks_speakers, Ks_listeners = Ks_listeners, \ validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) # Create save dir if not path.exists(opts.dir_save): os.system("mkdir {}".format(opts.dir_save)) os.system("mkdir -p {}/models {}/training_info {}/messages {}/accuracy {}/test".format(opts.dir_save,opts.dir_save,opts.dir_save,opts.dir_save,opts.dir_save)) # Save train split np.save(opts.dir_save+'/training_info/train_split.npy', train_split) np.save(opts.dir_save+'/training_info/test_split.npy', test_split) np.save(opts.dir_save+'/training_info/Ks_speakers.npy', Ks_speakers) np.save(opts.dir_save+'/training_info/Ks_listeners.npy', Ks_listeners) # Main losses training_losses=[] eval_losses=[] training_entropy=[] training_loss_cross=[] eval_loss_cross=[] for epoch in range(10): print("Epoch: "+str(epoch)) if epoch%10==0: if opts.N_speakers<4: compute_similarity=True else: compute_similarity=opts.compute_similarity else: compute_similarity=opts.compute_similarity # Train list_train_loss,list_train_rest = trainer_init.train(n_epochs=1) # Eval eval_loss,eval_rest = trainer.eval() print("Train") if epoch==0: messages=[np.zeros((opts.n_values**opts.n_attributes,opts.max_len)) for _ in range(max(opts.N_speakers,opts.N_listeners))] messages,accuracy_vectors, similarity_messages = dump_compositionality_multiagent(trainer.game,compo_dataset,train_split,list_speakers,list_listeners, opts.n_attributes, opts.n_values, device,epoch,past_messages=messages,compute_similarity=compute_similarity) np_messages = {agent:convert_messages_to_numpy(messages[agent]) for agent in messages} print("Test") if epoch==0: messages_test=[np.zeros((opts.n_values**opts.n_attributes,opts.max_len)) for _ in range(max(opts.N_speakers,opts.N_listeners))] messages_test,accuracy_vectors_test, similarity_messages_test = dump_compositionality_multiagent(trainer.game,compo_dataset,test_split,list_speakers,list_listeners, opts.n_attributes, opts.n_values, device,epoch,past_messages=messages_test,compute_similarity=compute_similarity) np_messages_test = {agent:convert_messages_to_numpy(messages_test[agent]) for agent in messages_test} # Save models if epoch%20==0: for agent in agents: torch.save(agents[agent].state_dict(), f"{opts.dir_save}/models/{agent}_weights_{epoch}.pth") for epoch in range(opts.n_epochs): print("Epoch: "+str(epoch)) if epoch%10==0: if opts.N_speakers<4: compute_similarity=True else: compute_similarity=opts.compute_similarity else: compute_similarity=opts.compute_similarity # Train list_train_loss,list_train_rest = trainer.train(n_epochs=1) # Eval eval_loss,eval_rest = trainer.eval() # Store results training_losses.append(list_train_loss[-1]) eval_losses.append(eval_loss) training_entropy=[-1]*max(opts.N_speakers,opts.N_listeners) training_loss_cross=[-1]*max(opts.N_speakers,opts.N_listeners) eval_loss_cross=[-1]*max(opts.N_speakers,opts.N_listeners) for i in range(max(opts.N_speakers,opts.N_listeners)): if "sender_entropy_{}".format(i) in list_train_rest[-1]: training_entropy[i]=list_train_rest[-1]["sender_entropy_{}".format(i)] if "loss_{}".format(i) in list_train_rest[-1]: training_loss_cross[i]=list_train_rest[-1]["loss_{}".format(i)] if "loss_{}".format(i) in eval_rest: eval_loss_cross[i] = eval_rest["loss_{}".format(i)] print("Train") if epoch==0: messages=[np.zeros((opts.n_values**opts.n_attributes,opts.max_len)) for _ in range(max(opts.N_speakers,opts.N_listeners))] messages,accuracy_vectors, similarity_messages = dump_compositionality_multiagent(trainer.game,compo_dataset,train_split,list_speakers,list_listeners, opts.n_attributes, opts.n_values, device,epoch,past_messages=messages,compute_similarity=compute_similarity) np_messages = {agent:convert_messages_to_numpy(messages[agent]) for agent in messages} print("Test") if epoch==0: messages_test=[np.zeros((opts.n_values**opts.n_attributes,opts.max_len)) for _ in range(max(opts.N_speakers,opts.N_listeners))] messages_test,accuracy_vectors_test, similarity_messages_test = dump_compositionality_multiagent(trainer.game,compo_dataset,test_split,list_speakers,list_listeners, opts.n_attributes, opts.n_values, device,epoch,past_messages=messages_test,compute_similarity=compute_similarity) np_messages_test = {agent:convert_messages_to_numpy(messages_test[agent]) for agent in messages_test} # Save models if epoch%20==0: for agent in agents: torch.save(agents[agent].state_dict(), f"{opts.dir_save}/models/{agent}_weights_{epoch}.pth") # Save training info if epoch%10==0: np.save(opts.dir_save+'/training_info/training_loss_{}.npy'.format(epoch), training_losses) np.save(opts.dir_save+'/training_info/eval_loss_{}.npy'.format(epoch), eval_losses) np.save(opts.dir_save+'/training_info/training_entropy_{}.npy'.format(epoch), training_entropy) np.save(opts.dir_save+'/training_info/training_loss_cross_{}.npy'.format(epoch), training_loss_cross) np.save(opts.dir_save+'/training_info/eval_loss_cross_{}.npy'.format(epoch), eval_loss_cross) np.save(opts.dir_save+'/training_info/similarity_languages_{}.npy'.format(epoch), similarity_messages) np.save(opts.dir_save+'/training_info/similarity_languages_test_{}.npy'.format(epoch), similarity_messages_test) # Save accuracy/message results messages_to_be_saved = np.stack([fill_to_max_len(np_messages[agent],opts.max_len) for agent in np_messages]) accuracy_vectors_to_be_saved = np.zeros((len(list_speakers),len(list_listeners),len(train_split),opts.n_attributes)) for i,agent_speaker in enumerate(accuracy_vectors): for j,agent_listener in enumerate(accuracy_vectors[agent_speaker]): accuracy_vectors_to_be_saved[i,j,:,:] = accuracy_vectors[agent_speaker][agent_listener] np.save(opts.dir_save+'/messages/messages_{}.npy'.format(epoch), messages_to_be_saved) np.save(opts.dir_save+'/accuracy/accuracy_{}.npy'.format(epoch), accuracy_vectors_to_be_saved) # Test set messages_test_to_be_saved = np.stack([fill_to_max_len(np_messages_test[agent],opts.max_len) for agent in np_messages_test]) accuracy_vectors_test_to_be_saved = np.zeros((len(list_speakers),len(list_listeners),len(test_split),opts.n_attributes)) for i,agent_speaker in enumerate(accuracy_vectors_test): for j,agent_listener in enumerate(accuracy_vectors_test[agent_speaker]): accuracy_vectors_test_to_be_saved[i,j,:,:] = accuracy_vectors_test[agent_speaker][agent_listener] np.save(opts.dir_save+'/test/messages_test_{}.npy'.format(epoch), messages_test_to_be_saved) np.save(opts.dir_save+'/test/accuracy_test_{}.npy'.format(epoch), accuracy_vectors_test_to_be_saved) core.close()
def main(params): print(torch.cuda.is_available()) opts = get_params(params) print(opts, flush=True) device = opts.device force_eos = opts.force_eos == 1 compo_dataset = build_compo_dataset(opts.n_values, opts.n_attributes) split = np.sort(np.load(opts.dataset_split)) with open(opts.agents_weights, "rb") as fp: agents_weights = pickle.load(fp) agents={} for i in range(len(agents_weights)): agent=AgentBaselineCompositionality(vocab_size=opts.vocab_size, n_attributes=opts.n_attributes, n_values=opts.n_values, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent.load_state_dict(torch.load(agents_weights[i],map_location=torch.device('cpu'))) agent.to(device) agents["agent_{}".format(i)] = agent #(agent,compo_dataset,split,n_sampling,vocab_size,max_len,device) if opts.by_position: policies = estimate_policy(agents=agents, compo_dataset=compo_dataset, split=split, n_sampling=opts.n_sampling, vocab_size=opts.vocab_size, max_len=opts.max_len, n_attributes=opts.n_attributes, n_values=opts.n_values, device=device, by_position=True) for agent in policies: mean_entropy=0. for i in range(np.shape(policies[agent])[0]): for j in range(np.shape(policies[agent])[1]): probs=[policies[agent][i,j,k] for k in range(np.shape(policies[agent])[2])] mean_entropy+=entropy(probs,base=10) mean_entropy/=(np.shape(policies[agent])[0]*np.shape(policies[agent])[1]) np.save(opts.dir_save+'/training_info/entropy_by_pos_{}.npy'.format(agent),np.array(mean_entropy)) KL_mat = np.zeros((len(policies)-1,len(policies)-1)) L2_mat = np.zeros((len(policies)-1,len(policies)-1)) for a1,agent_1 in enumerate(policies): for a2,agent_2 in enumerate(policies): if agent_1!="mean_policy" and agent_2!="mean_policy": mean_KL=0. mean_L2=0. for i in range(len(policies[agent_1])): for j in range(np.shape(policies[agent_1])[1]): probs_1=[policies[agent_1][i,j,k] for k in range(np.shape(policies[agent_1])[2])] probs_2=[policies[agent_2][i,j,k] for k in range(np.shape(policies[agent_2])[2])] mean_KL+=entropy(np.array(probs_1)+1e-16,qk=np.array(probs_2)+1e-16,base=10) mean_L2+=np.sqrt(np.sum((np.array(probs_1)-np.array(probs_2))**2)) mean_KL/=(np.shape(policies[agent_1])[0]*np.shape(policies[agent_1])[1]) mean_L2/=(np.shape(policies[agent_1])[0]*np.shape(policies[agent_1])[1]) KL_mat[a1,a2]=mean_KL L2_mat[a1,a2]=mean_L2 np.save(opts.dir_save+'/training_info/KLdiv.npy',np.array(KL_mat)) np.save(opts.dir_save+'/training_info/L2.npy',np.array(L2_mat)) KL_mean_mat = np.zeros((len(policies)-1)) L2_mean_mat = np.zeros((len(policies)-1)) for a1,agent_1 in enumerate(policies): for a2,agent_2 in enumerate(policies): if agent_1=="mean_policy" and agent_2!="mean_policy": mean_KL=0. mean_L2=0. for i in range(len(policies[agent_1])): for j in range(np.shape(policies[agent_1])[1]): probs_1=[policies[agent_1][i,j,k] for k in range(np.shape(policies[agent_1])[2])] probs_2=[policies[agent_2][i,j,k] for k in range(np.shape(policies[agent_2])[2])] mean_KL+=entropy(np.array(probs_1)+1e-16,qk=np.array(probs_2)+1e-16,base=10) mean_L2+=np.sqrt(np.sum((np.array(probs_1)-np.array(probs_2))**2)) mean_KL/=(np.shape(policies[agent_1])[0]*np.shape(policies[agent_1])[1]) mean_L2/=(np.shape(policies[agent_1])[0]*np.shape(policies[agent_1])[1]) KL_mean_mat[a2]=mean_KL L2_mean_mat[a2]=mean_L2 np.save(opts.dir_save+'/training_info/KLdiv_meanpol.npy',np.array(KL_mean_mat)) np.save(opts.dir_save+'/training_info/L2_meanpol.npy',np.array(L2_mean_mat)) else: policies = estimate_policy(agents=agents, compo_dataset=compo_dataset, split=split, n_sampling=opts.n_sampling, vocab_size=opts.vocab_size, max_len=opts.max_len, n_attributes=opts.n_attributes, n_values=opts.n_values, device=device) for agent in policies: mean_entropy=0. for i in range(len(policies[agent])): probs=[policies[agent][i][m] for m in policies[agent][i]] mean_entropy+=entropy(probs,base=10) mean_entropy/=len(policies[agent]) np.save(opts.dir_save+'/training_info/entropy_{}.npy'.format(agent),np.array(mean_entropy)) compositionality = estimate_compositionality(agents=agents, compo_dataset=compo_dataset, split=split, n_sampling=opts.n_sampling, n_indices = 1000, vocab_size=opts.vocab_size, max_len=opts.max_len, n_attributes=opts.n_attributes, n_values=opts.n_values, device=device) np.save(opts.dir_save+'/training_info/compositionality.npy',np.array(compositionality)) core.close()
def main(params): print(torch.cuda.is_available()) opts = get_params(params) print(opts, flush=True) device = opts.device force_eos = opts.force_eos == 1 # Distribution of the inputs if opts.probs=="uniform": probs=[] probs_by_att = np.ones(opts.n_values) probs_by_att /= probs_by_att.sum() for i in range(opts.n_attributes): probs.append(probs_by_att) if opts.probs=="entropy_test": probs=[] for i in range(opts.n_attributes): probs_by_att = np.ones(opts.n_values) probs_by_att[0]=1+(1*i) probs_by_att /= probs_by_att.sum() probs.append(probs_by_att) if opts.probs_attributes=="uniform": probs_attributes=[1]*opts.n_attributes if opts.probs_attributes=="uniform_indep": probs_attributes=[] probs_attributes=[0.2]*opts.n_attributes if opts.probs_attributes=="echelon": probs_attributes=[] for i in range(opts.n_attributes): #probs_attributes.append(1.-(0.2)*i) #probs_attributes.append(0.7+0.3/(i+1)) probs_attributes=[1.,0.95,0.9,0.85] print("Probability by attribute is:",probs_attributes) compo_dataset = build_compo_dataset(opts.n_values, opts.n_attributes) train_split = np.random.RandomState(opts.random_seed).choice(opts.n_values**opts.n_attributes,size=(int(opts.split_proportion*(opts.n_values**opts.n_attributes))),replace=False) test_split=[] for j in range(opts.n_values**opts.n_attributes): if j not in train_split: test_split.append(j) test_split = np.array(test_split) train_loader = OneHotLoaderCompositionality(dataset=compo_dataset,split=train_split,n_values=opts.n_values, n_attributes=opts.n_attributes, batch_size=opts.batch_size, batches_per_epoch=opts.batches_per_epoch, probs=probs, probs_attributes=probs_attributes) # single batches with 1s on the diag #test_loader = TestLoaderCompositionality(dataset=compo_dataset,n_values=opts.n_values,n_attributes=opts.n_attributes) test_loader = TestLoaderCompositionality(dataset=compo_dataset,split=test_split,n_values=opts.n_values, n_attributes=opts.n_attributes, batch_size=opts.batch_size, batches_per_epoch=opts.batches_per_epoch, probs=probs, probs_attributes=probs_attributes) agent_1=AgentBaselineCompositionality(vocab_size=opts.vocab_size, n_attributes=opts.n_attributes, n_values=opts.n_values, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_2=AgentBaselineCompositionality(vocab_size=opts.vocab_size, n_attributes=opts.n_attributes, n_values=opts.n_values, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) "Define game" optim_params={"length_cost":0., "sender_entropy_coeff_1":opts.sender_entropy_coeff, "receiver_entropy_coeff_1":opts.receiver_entropy_coeff, "sender_entropy_coeff_2":opts.sender_entropy_coeff, "receiver_entropy_coeff_2":opts.receiver_entropy_coeff} if opts.optim_mode=="cross": loss_weights={"self":0.,"cross":1.,"imitation":0.} elif opts.optim_mode=="cross+self": loss_weights={"self":1.,"cross":1.,"imitation":0.} else: loss_weights={"self":1.,"cross":1.,"imitation":1.} #loss_weights={"self":opts.self_weight,"cross":opts.cross_weight,"imitation":opts.imitation_weight} game = DialogReinforceCompositionality(Agent_1=agent_1, Agent_2=agent_2, n_attributes=opts.n_attributes, n_values=opts.n_values, loss_understanding=loss_understanding_compositionality, optim_params=optim_params, baseline_mode=opts.baseline_mode, reward_mode=opts.reward_mode, loss_weights=loss_weights, device=device) "Create optimizers" if opts.model=="expe_1": optimizer = core.build_optimizer(list(game.parameters())) trainer = TrainerDialogCompositionality(n_attributes=opts.n_attributes,n_values=opts.n_values,game=game, optimizer=optimizer, train_data=train_loader, validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) elif opts.model=="expe_lr": speaker_parameters = list(game.agent_1.agent_sender.parameters()) + \ list(game.agent_1.sender_norm_h.parameters()) + \ list(game.agent_1.sender_norm_c.parameters()) + \ list(game.agent_1.hidden_to_output.parameters()) + \ list(game.agent_1.sender_embedding.parameters()) + \ list(game.agent_1.sender_cells.parameters()) + \ list(game.agent_2.agent_sender.parameters()) + \ list(game.agent_2.sender_norm_h.parameters()) + \ list(game.agent_2.sender_norm_c.parameters()) + \ list(game.agent_2.hidden_to_output.parameters()) + \ list(game.agent_2.sender_embedding.parameters()) + \ list(game.agent_2.sender_cells.parameters()) listener_parameters = list(game.agent_1.agent_receiver.parameters()) + \ list(game.agent_1.receiver_cell.parameters()) + \ list(game.agent_1.receiver_embedding.parameters()) + \ list(game.agent_2.agent_receiver.parameters()) + \ list(game.agent_2.receiver_cell.parameters()) + \ list(game.agent_2.receiver_embedding.parameters()) # SGD #optimizer_speaker=torch.optim.SGD(speaker_parameters, lr=opts.sender_lr, momentum=0.9,nesterov=False) #optimizer_listener=torch.optim.SGD(listener_parameters, lr=opts.receiver_lr, momentum=0.9,nesterov=False) optimizer_speaker = core.build_optimizer(list(speaker_parameters),lr=opts.sender_lr) optimizer_listener = core.build_optimizer(list(listener_parameters),lr=opts.receiver_lr) "Create trainer" trainer = TrainerDialog(game=game, optimizer=optimizer, train_data=train_loader, \ validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) elif opts.model=="expe_step": speaker_parameters = list(game.agent_1.agent_sender.parameters()) + \ list(game.agent_1.sender_norm_h.parameters()) + \ list(game.agent_1.sender_norm_c.parameters()) + \ list(game.agent_1.hidden_to_output.parameters()) + \ list(game.agent_1.sender_embedding.parameters()) + \ list(game.agent_1.sender_cells.parameters()) + \ list(game.agent_2.agent_sender.parameters()) + \ list(game.agent_2.sender_norm_h.parameters()) + \ list(game.agent_2.sender_norm_c.parameters()) + \ list(game.agent_2.hidden_to_output.parameters()) + \ list(game.agent_2.sender_embedding.parameters()) + \ list(game.agent_2.sender_cells.parameters()) listener_parameters = list(game.agent_1.agent_receiver.parameters()) + \ list(game.agent_1.receiver_cell.parameters()) + \ list(game.agent_1.receiver_embedding.parameters()) + \ list(game.agent_2.agent_receiver.parameters()) + \ list(game.agent_2.receiver_cell.parameters()) + \ list(game.agent_2.receiver_embedding.parameters()) # SGD #optimizer_speaker=torch.optim.SGD(speaker_parameters, lr=opts.sender_lr, momentum=0.9,nesterov=False) #optimizer_listener=torch.optim.SGD(listener_parameters, lr=opts.receiver_lr, momentum=0.9,nesterov=False) optimizer_speaker = core.build_optimizer(list(speaker_parameters),lr=opts.sender_lr) optimizer_listener = core.build_optimizer(list(listener_parameters),lr=opts.receiver_lr) "Create trainer" #trainer = TrainerDialog(game=game, optimizer=optimizer, train_data=train_loader, \ # validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) trainer = TrainerDialogAsymStep(game=game, optimizer_speaker=optimizer_speaker,optimizer_listener=optimizer_listener,\ N_speaker=opts.N_speaker,N_listener=opts.N_listener,train_data=train_loader, \ validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) elif opts.model=="expe_single_listener": game = DialogReinforceCompositionalitySingleListener(Agent_1=agent_1, Agent_2=agent_2, n_attributes=opts.n_attributes, n_values=opts.n_values, loss_understanding=loss_understanding_compositionality, optim_params=optim_params, baseline_mode=opts.baseline_mode, reward_mode=opts.reward_mode, loss_weights=loss_weights, device=device) speaker_parameters = list(game.agent_1.agent_sender.parameters()) + \ list(game.agent_1.sender_norm_h.parameters()) + \ list(game.agent_1.sender_norm_c.parameters()) + \ list(game.agent_1.hidden_to_output.parameters()) + \ list(game.agent_1.sender_embedding.parameters()) + \ list(game.agent_1.sender_cells.parameters()) + \ list(game.agent_2.agent_sender.parameters()) + \ list(game.agent_2.sender_norm_h.parameters()) + \ list(game.agent_2.sender_norm_c.parameters()) + \ list(game.agent_2.hidden_to_output.parameters()) + \ list(game.agent_2.sender_embedding.parameters()) + \ list(game.agent_2.sender_cells.parameters()) listener_parameters = list(game.agent_1.agent_receiver.parameters()) + \ list(game.agent_1.receiver_cell.parameters()) + \ list(game.agent_1.receiver_embedding.parameters()) # SGD #optimizer_speaker=torch.optim.SGD(speaker_parameters, lr=opts.sender_lr, momentum=0.9,nesterov=False) #optimizer_listener=torch.optim.SGD(listener_parameters, lr=opts.receiver_lr, momentum=0.9,nesterov=False) optimizer_speaker = core.build_optimizer(list(speaker_parameters),lr=opts.sender_lr) optimizer_listener = core.build_optimizer(list(listener_parameters),lr=opts.receiver_lr) "Create trainer" #trainer = TrainerDialog(game=game, optimizer=optimizer, train_data=train_loader, \ # validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) trainer = TrainerDialogAsymStep(game=game, optimizer_speaker=optimizer_speaker,optimizer_listener=optimizer_listener,\ N_speaker=opts.N_speaker,N_listener=opts.N_listener,train_data=train_loader, \ validation_data=test_loader, callbacks=[EarlyStopperAccuracy(opts.early_stopping_thr)]) else: raise("Model not indicated") # Create save dir if not path.exists(opts.dir_save): os.system("mkdir {}".format(opts.dir_save)) os.system("mkdir -p {}/models {}/training_info {}/messages {}/accuracy {}/test".format(opts.dir_save,opts.dir_save,opts.dir_save,opts.dir_save,opts.dir_save)) # Save train split np.save(opts.dir_save+'/training_info/train_split.npy', train_split) np.save(opts.dir_save+'/training_info/test_split.npy', test_split) # Main losses training_losses=[] eval_losses=[] training_entropy_1=[] training_entropy_2=[] training_loss_12=[] eval_loss_12=[] training_loss_21=[] eval_loss_21=[] # Specific losses training_loss_self_11=[] training_loss_cross_12=[] training_loss_imitation_12=[] training_loss_self_22=[] training_loss_cross_21=[] training_loss_imitation_21=[] eval_loss_self_11=[] eval_loss_cross_12=[] eval_loss_imitation_12=[] eval_loss_self_22=[] eval_loss_cross_21=[] eval_loss_imitation_21=[] # Linguistic similarity_languages=[] similarity_languages_test=[] for epoch in range(int(opts.n_epochs)): print("Epoch: "+str(epoch)) if epoch%10==0: compute_similarity=True else: compute_similarity=opts.print_metrics # Train list_train_loss,list_train_rest = trainer.train(n_epochs=1) # Eval eval_loss,eval_rest = trainer.eval() # Store results training_losses.append(list_train_loss[-1]) eval_losses.append(eval_loss) training_entropy_1.append(list_train_rest[-1]["sender_entropy_1"]) training_entropy_2.append(list_train_rest[-1]["sender_entropy_2"]) training_loss_12.append(list_train_rest[-1]["loss_1"]) eval_loss_12.append(eval_rest["loss_1"]) training_loss_21.append(list_train_rest[-1]["loss_2"]) eval_loss_21.append(eval_rest["loss_2"]) training_loss_self_11.append(list_train_rest[-1]["loss_self_11"]) training_loss_cross_12.append(list_train_rest[-1]["loss_cross_12"]) training_loss_self_22.append(list_train_rest[-1]["loss_self_22"]) training_loss_cross_21.append(list_train_rest[-1]["loss_cross_21"]) eval_loss_self_11.append(eval_rest["loss_self_11"]) eval_loss_cross_12.append(eval_rest["loss_cross_12"]) eval_loss_self_22.append(eval_rest["loss_self_22"]) eval_loss_cross_21.append(eval_rest["loss_cross_21"]) if opts.print_metrics: print("Train") if epoch==0: messages_1=messages_2=np.zeros((opts.n_values**opts.n_attributes,opts.max_len)) messages_1, messages_2,acc_vec_1, acc_vec_2, acc_vec_11, acc_vec_22, similarity_messages = dump_compositionality(trainer.game,compo_dataset,train_split, opts.n_attributes, opts.n_values, device, False,epoch,past_messages_1=messages_1,past_messages_2=messages_2,compute_similarity=compute_similarity,print_metrics=opts.print_metrics) np_messages_1 = convert_messages_to_numpy(messages_1) np_messages_2 = convert_messages_to_numpy(messages_2) similarity_languages.append(similarity_messages) if opts.print_metrics: print("Test") if epoch==0: messages_1_test=messages_2_test=np.zeros((opts.n_values**opts.n_attributes,opts.max_len)) messages_1_test, messages_2_test,acc_vec_1_test, acc_vec_2_test, acc_vec_11_test, acc_vec_22_test, similarity_messages_test = dump_compositionality(trainer.game,compo_dataset,test_split, opts.n_attributes, opts.n_values, device, False,epoch,past_messages_1=messages_1_test,past_messages_2=messages_2_test,compute_similarity=compute_similarity,,print_metrics=opts.print_metrics) np_messages_1_test = convert_messages_to_numpy(messages_1_test) np_messages_2_test = convert_messages_to_numpy(messages_2_test) similarity_languages_test.append(similarity_messages_test) #game.optim_params["sender_entropy_coeff_1"]=opts.sender_entropy_coeff-(opts.sender_entropy_coeff+0.05)*np.mean(acc_vec_11) #game.optim_params["sender_entropy_coeff_2"]=opts.sender_entropy_coeff-(opts.sender_entropy_coeff+0.05)*np.mean(acc_vec_22) # Save models if epoch%20==0: torch.save(agent_1.state_dict(), f"{opts.dir_save}/models/agent_1_weights_{epoch}.pth") torch.save(agent_2.state_dict(), f"{opts.dir_save}/models/agent_2_weights_{epoch}.pth") # Save training info if epoch%10==0: np.save(opts.dir_save+'/training_info/training_loss_{}.npy'.format(epoch), training_losses) np.save(opts.dir_save+'/training_info/eval_loss_{}.npy'.format(epoch), eval_losses) np.save(opts.dir_save+'/training_info/training_entropy_1_{}.npy'.format(epoch), training_entropy_1) np.save(opts.dir_save+'/training_info/training_entropy_2_{}.npy'.format(epoch), training_entropy_2) np.save(opts.dir_save+'/training_info/training_loss_12_{}.npy'.format(epoch), training_loss_12) np.save(opts.dir_save+'/training_info/eval_loss_12_{}.npy'.format(epoch), eval_loss_12) np.save(opts.dir_save+'/training_info/training_loss_21_{}.npy'.format(epoch), training_loss_21) np.save(opts.dir_save+'/training_info/eval_loss_21_{}.npy'.format(epoch), eval_loss_21) np.save(opts.dir_save+'/training_info/training_loss_self_11_{}.npy'.format(epoch), training_loss_self_11) np.save(opts.dir_save+'/training_info/training_loss_cross_12_{}.npy'.format(epoch), training_loss_cross_12) np.save(opts.dir_save+'/training_info/training_loss_imitation_12_{}.npy'.format(epoch), training_loss_imitation_12) np.save(opts.dir_save+'/training_info/training_loss_self_22_{}.npy'.format(epoch), training_loss_self_22) np.save(opts.dir_save+'/training_info/training_loss_cross_21_{}.npy'.format(epoch), training_loss_cross_21) np.save(opts.dir_save+'/training_info/training_loss_imitation_21_{}.npy'.format(epoch), training_loss_imitation_21) np.save(opts.dir_save+'/training_info/eval_loss_self_11_{}.npy'.format(epoch), eval_loss_self_11) np.save(opts.dir_save+'/training_info/eval_loss_cross_12_{}.npy'.format(epoch), eval_loss_cross_12) np.save(opts.dir_save+'/training_info/eval_loss_imitation_12_{}.npy'.format(epoch), eval_loss_imitation_12) np.save(opts.dir_save+'/training_info/eval_loss_self_22_{}.npy'.format(epoch), eval_loss_self_22) np.save(opts.dir_save+'/training_info/eval_loss_cross_21_{}.npy'.format(epoch), eval_loss_cross_21) np.save(opts.dir_save+'/training_info/eval_loss_imitation_21_{}.npy'.format(epoch), eval_loss_imitation_21) np.save(opts.dir_save+'/training_info/similarity_languages_{}.npy'.format(epoch), similarity_languages) np.save(opts.dir_save+'/training_info/similarity_languages_test_{}.npy'.format(epoch), similarity_languages_test) # Save accuracy/message results np.save(opts.dir_save+'/messages/agent_1_messages_{}.npy'.format(epoch), np_messages_1) np.save(opts.dir_save+'/messages/agent_2_messages_{}.npy'.format(epoch), np_messages_2) np.save(opts.dir_save+'/accuracy/12_accuracy_{}.npy'.format(epoch), acc_vec_1) np.save(opts.dir_save+'/accuracy/21_accuracy_{}.npy'.format(epoch), acc_vec_2) np.save(opts.dir_save+'/accuracy/11_accuracy_{}.npy'.format(epoch), acc_vec_11) np.save(opts.dir_save+'/accuracy/22_accuracy_{}.npy'.format(epoch), acc_vec_22) # Test set np.save(opts.dir_save+'/test/agent_1_messages_test_{}.npy'.format(epoch), np_messages_1_test) np.save(opts.dir_save+'/test/agent_2_messages_test_{}.npy'.format(epoch), np_messages_2_test) np.save(opts.dir_save+'/test/12_accuracy_test_{}.npy'.format(epoch), acc_vec_1_test) np.save(opts.dir_save+'/test/21_accuracy_test_{}.npy'.format(epoch), acc_vec_2_test) np.save(opts.dir_save+'/test/11_accuracy_test_{}.npy'.format(epoch), acc_vec_11_test) np.save(opts.dir_save+'/test/22_accuracy_test_{}.npy'.format(epoch), acc_vec_22_test) core.close()
def main(params): opts = get_params(params) device = opts.device force_eos = opts.force_eos == 1 if opts.probs == "uniform": probs = [] probs_by_att = np.ones(opts.n_values) probs_by_att /= probs_by_att.sum() for i in range(opts.n_attributes): probs.append(probs_by_att) probs_attributes = [1] * opts.n_attributes agent_1 = AgentBaseline2(vocab_size=opts.vocab_size, n_features=opts.n_features, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_1.load_state_dict( torch.load(opts.agent_1_weights, map_location=torch.device('cpu'))) agent_1.to(device) agent_2 = AgentBaseline2(vocab_size=opts.vocab_size, n_features=opts.n_features, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_2.load_state_dict( torch.load(opts.agent_2_weights, map_location=torch.device('cpu'))) agent_2.to(device) policy_1 = estimate_policy(agent_1, opts.n_sampling, opts.n_features, opts.vocab_size, opts.max_len, device) policy_2 = estimate_policy(agent_2, opts.n_sampling, opts.n_features, opts.vocab_size, opts.max_len, device) #def L2_sim(p, q): # l2=(p-q)**2 # l2=np.sum(l2,axis=2) # return np.mean(l2) #l2=L2_sim(policy_1.cpu().numpy(),policy_2.cpu().numpy()) np.save(opts.dir_save + '/training_info/policy_1.npy', policy_1.cpu().numpy()) np.save(opts.dir_save + '/training_info/policy_2.npy', policy_2.cpu().numpy()) core.close()
def main(params): opts = get_params(params) device = opts.device force_eos = opts.force_eos == 1 if opts.probs == "uniform": probs = [] probs_by_att = np.ones(opts.n_values) probs_by_att /= probs_by_att.sum() for i in range(opts.n_attributes): probs.append(probs_by_att) probs_attributes = [1] * opts.n_attributes if opts.compositionality: compo_dataset = build_compo_dataset(opts.n_values, opts.n_attributes) train_split = np.load(opts.train_split) test_split = np.load(opts.test_split) train_loader = OneHotLoaderCompositionality( dataset=compo_dataset, split=train_split, n_values=opts.n_values, n_attributes=opts.n_attributes, batch_size=opts.batch_size, batches_per_epoch=opts.batches_per_epoch, probs=probs, probs_attributes=probs_attributes) # single batches with 1s on the diag #test_loader = TestLoaderCompositionality(dataset=compo_dataset,n_values=opts.n_values,n_attributes=opts.n_attributes) test_loader = TestLoaderCompositionality( dataset=compo_dataset, split=test_split, n_values=opts.n_values, n_attributes=opts.n_attributes, batch_size=opts.batch_size, batches_per_epoch=opts.batches_per_epoch, probs=probs, probs_attributes=probs_attributes) agent_1 = AgentBaselineCompositionality( vocab_size=opts.vocab_size, n_attributes=opts.n_attributes, n_values=opts.n_values, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_1.load_state_dict( torch.load(opts.agent_1_weights, map_location=torch.device('cpu'))) agent_1.to(device) agent_2 = AgentBaselineCompositionality( vocab_size=opts.vocab_size, n_attributes=opts.n_attributes, n_values=opts.n_values, max_len=opts.max_len, embed_dim=opts.sender_embedding, sender_hidden_size=opts.sender_hidden, receiver_hidden_size=opts.receiver_hidden, sender_cell=opts.sender_cell, receiver_cell=opts.receiver_cell, sender_num_layers=opts.sender_num_layers, receiver_num_layers=opts.receiver_num_layers, force_eos=force_eos) agent_2.load_state_dict( torch.load(opts.agent_2_weights, map_location=torch.device('cpu'))) agent_2.to(device) #complexity_train_1 = compute_complexity_compositionality(agent_1,compo_dataset,train_split,opts.n_attributes, opts.n_values,opts.n_sampling, device, meanings_distribution="uniform") #complexity_train_2 = compute_complexity_compositionality(agent_2,compo_dataset,train_split,opts.n_attributes, opts.n_values,opts.n_sampling, device, meanings_distribution="uniform") #complexity_test_1 = compute_complexity_compositionality(agent_1,compo_dataset,test_split,opts.n_attributes, opts.n_values,opts.n_sampling, device, meanings_distribution="uniform") #complexity_test_2 = compute_complexity_compositionality(agent_2,compo_dataset,test_split,opts.n_attributes, opts.n_values,opts.n_sampling, device, meanings_distribution="uniform") #print("Complexity train 1={}".format(complexity_train_1),flush=True) #print("Complexity train 2={}".format(complexity_train_2),flush=True) #print("Complexity test 1={}".format(complexity_test_1),flush=True) #print("Complexity test 2={}".format(complexity_test_2),flush=True) #np.save(opts.dir_save+'/training_info/complexity_train_1.npy',complexity_train_1) #np.save(opts.dir_save+'/training_info/complexity_train_2.npy',complexity_train_2) #np.save(opts.dir_save+'/training_info/complexity_test_1.npy',complexity_test_1) #np.save(opts.dir_save+'/training_info/complexity_test_2.npy',complexity_test_2) average_entropy_1 = compute_average_symbol_entropy( agent_1, compo_dataset, train_split, opts.n_attributes, opts.n_values, opts.max_len, opts.vocab_size, opts.n_sampling, device, meanings_distribution="uniform") average_entropy_2 = compute_average_symbol_entropy( agent_2, compo_dataset, train_split, opts.n_attributes, opts.n_values, opts.max_len, opts.vocab_size, opts.n_sampling, device, meanings_distribution="uniform") #np.save(opts.dir_save+'/training_info/average_train_1.npy',complexity_train_1) core.close()