def pretrain_model(): print("======MLM & NSP Pretraining======") """ MLM & NSP Pretraining You can modify this function by yourself. This function does not affects your final score. """ train_dataset = ParagraphDataset(os.path.join('data', 'imdb_train.csv')) train_dataset = PretrainDataset(train_dataset) val_dataset = ParagraphDataset(os.path.join('data', 'imdb_val.csv')) val_dataset = PretrainDataset(val_dataset) model = MLMandNSPmodel(train_dataset.token_num) model_name = 'pretrained' MLM_train_losses, MLM_val_losses, NSP_train_losses, NSP_val_losses \ = pretraining(model, model_name, train_dataset, val_dataset) torch.save(model.state_dict(), model_name + '_final.pth') with open(model_name + '_result.pkl', 'wb') as f: pickle.dump((MLM_train_losses, MLM_val_losses, NSP_train_losses, NSP_val_losses), f) utils.plot_values(MLM_train_losses, MLM_val_losses, title=model_name + "_mlm") utils.plot_values(NSP_train_losses, NSP_val_losses, title=model_name + "_nsp") print("Final MLM training loss: {:06.4f}".format(MLM_train_losses[-1])) print("Final MLM validation loss: {:06.4f}".format(MLM_val_losses[-1])) print("Final NSP training loss: {:06.4f}".format(NSP_train_losses[-1])) print("Final NSP validation loss: {:06.4f}".format(NSP_val_losses[-1]))
def run(): # build the mdp start = time.time() room_size = 3 num_rooms = 5 mdp = maze_mdp.MazeMDP(room_size=room_size, num_rooms=num_rooms) # build the agent m = Manager() init_dict = {(s, a): 0 for s in mdp.states for a in mdp.ACTIONS + [None]} shared_weights = m.dict(init_dict) shared_value_weights = m.dict(init_dict) agent = async_actor_critic.AsyncActorCritic(actions=mdp.ACTIONS, discount=mdp.DISCOUNT, weights=shared_weights, value_weights=shared_value_weights, tau=.3, learning_rate=.5) # build a single experiment rewards = m.list() start_state_values = m.list() max_steps = (2 * room_size * num_rooms) ** 2 exp = experiment.Experiment(mdp=mdp, agent=agent, num_episodes=800, max_steps=max_steps, rewards=rewards, start_state_values=start_state_values) # run the experiment multiexperiment = experiment.MultiProcessExperiment(experiment=exp, num_agents=NUM_PROCESSES) multiexperiment.run() # report results end = time.time() print 'took {} seconds to converge'.format(end - start) mdp.print_state_values(shared_value_weights) optimal = mdp.EXIT_REWARD + (2 * room_size * num_rooms * mdp.MOVE_REWARD) utils.plot_values(rewards, optimal, 'rewards') utils.plot_values(start_state_values, optimal, 'start state value')
def train_model(): print("======IMDB Training======") """ IMDB Training You can modify this function by yourself. This function does not affects your final score. """ train_dataset = IMDBdataset(os.path.join('data', 'imdb_train.csv')) val_dataset = IMDBdataset(os.path.join('data', 'imdb_val.csv')) model = IMDBmodel(train_dataset.token_num) model_name = 'imdb' # You can choose whether to enable fine-tuning fine_tuning = True if fine_tuning: model_name += '_fine_tuned' pretrained_model_path = 'pretrained_final.pth' # You can use a model which has been pretrained over 200 epochs by TA # If you use this saved model, you should mention it in the report # # pretrained_model_path = 'pretrained_byTA.pth' else: model_name += '_no_fine_tuned' pretrained_model_path = None train_losses, val_losses, train_accuracies, val_accuracies \ = training(model, model_name, train_dataset, val_dataset, \ pretrained_model_path=pretrained_model_path) torch.save(model.state_dict(), model_name + '_final.pth') with open(model_name + '_result.pkl', 'wb') as f: pickle.dump( (train_losses, val_losses, train_accuracies, val_accuracies), f) utils.plot_values(train_losses, val_losses, title=model_name + "_losses") utils.plot_values(train_accuracies, val_accuracies, title=model_name + "_accuracies") print("Final training loss: {:06.4f}".format(train_losses[-1])) print("Final validation loss: {:06.4f}".format(val_losses[-1])) print("Final training accuracy: {:06.4f}".format(train_accuracies[-1])) print("Final validation accuracy: {:06.4f}".format(val_accuracies[-1]))
else: criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) model.to(device) print("Running on:", device) train_losses, val_losses, train_accuracies, val_accuracies = train( model, train_set, val_set, criterion, optimizer, batch_size, num_epochs) torch.save(model.state_dict(), pretrained_model_path) print("Final training loss: {:06.4f}".format(train_losses[-1])) print("Final validation loss: {:06.4f}".format(val_losses[-1])) print("Final training accuracy: {:06.4f}".format(train_accuracies[-1])) print("Final validation accuracy: {:06.4f}".format(val_accuracies[-1])) plot_values( train_losses, val_losses, title="Losses", path="./losses/" + setting + f"_loss_hid({hidden_dim})_hid2({hidden_dim2})_dropout5_bn.png") plot_values( train_accuracies, val_accuracies, title="Accuracies", path="./accuracies/" + setting + f"_acc_hid({hidden_dim})_hid2({hidden_dim2})_dropout5_bn.png") # TODO: Predict test data ## save to answer_private.txt # predict(model, test_data)
NUM_EPISODES = 100_000 def monte_carlo_online_control(num_episodes=NUM_EPISODES): # init Q(s,a)=0, N(s,a)=0 for every s a Q = state_action_map() N = state_action_map() N_s = state_map() for k in range(num_episodes): if k % 1000 == 0: print(f'{k} / {num_episodes}') pi = e_greedy(Q, N_s) episode, reward = sample_episode(pi) explored = set() for s, a in episode: if s not in explored: explored.add(s) N[s, a] = N[s, a] + 1 N_s[s] = N_s[s] + 1 Q[s, a] = Q[s, a] + (1 / N[s, a]) * (reward - Q[s, a]) # print(f'{s}{a} -> {Q[s, a]}') return Q if __name__ == '__main__': Q_vals = monte_carlo_online_control() plot_values(Q_vals)