def main(): model_config = config.get_model_config() num_input_words = model_config['dim_lang'] world_state_size = model_config['dim_world'] num_output_actions = model_config['dim_action'] hidden_size = model_config['hidden_size'] learning_rate = model_config['learning_rate'] encoder = models.EncoderRNN(num_input_words, hidden_size, bidirectionality=True) attn_decoder = models.AttnDecoderRNN(hidden_size, world_state_size, num_output_actions) trainIters(encoder, attn_decoder, 1, learning_rate) id_process = os.getpid() time_current = datetime.datetime.now().isoformat() tag_model = '_PID=' + str(id_process) + '_TIME=' + time_current path_track = './tracks/track' + tag_model + '/' command_mkdir = 'mkdir -p ' + os.path.abspath( path_track ) os.system(command_mkdir) # ENCODER_PATH = path_track + 'encoder.pkl' DECODER_PATH = path_track + 'decoder.pkl' torch.save(encoder, ENCODER_PATH) torch.save(attn_decoder, DECODER_PATH)
def main(): model_config = config.get_model_config() num_input_words = model_config['dim_lang'] world_state_size = model_config['dim_world'] num_output_actions = model_config['dim_action'] hidden_size = model_config['hidden_size'] learning_rate = model_config['learning_rate'] encoder = models.EncoderRNN(num_input_words, hidden_size, bidirectionality=True) attn_decoder = models.AttnDecoderRNN(hidden_size, world_state_size, num_output_actions) trainIters(encoder, attn_decoder, 3, learning_rate)
if __name__ == '__main__': #hyper-parameters # #other parameters can be found in configs.py, but better to keep default use_model = False #True:use the trained model, else training from scratch hidden_size = 256 #rnn's hidden_size in_embed_dim = 256 #input language word embedding dimension out_embed_dim = 256 #output language word embedding dimension lr = 0.01 n_iters = 80000 print_every = 1000 plot_every = 100 encoder1 = models.EncoderRNN(input_lang.n_words, hidden_size, in_embed_dim).to(device) attn_decoder1 = models.AttnDecoderRNN(hidden_size, output_lang.n_words, out_embed_dim, dropout_p=0.1).to(device) if use_model: encoder1.load_state_dict(torch.load('data/encoder_25.pt')) attn_decoder1.load_state_dict(torch.load('data/attn_decoder_25.pt')) else: trainIters(pairs, input_lang, output_lang, encoder1, attn_decoder1, n_iters=n_iters, print_every=print_every, plot_every=plot_every, learning_rate=lr) evaluateRandomly(pairs, input_lang, output_lang, encoder1, attn_decoder1) evaluateInput(input_lang, output_lang, encoder1, attn_decoder1)