def main(_): device = "cpu" print("Use device: {}".format(device)) env = gym.make(FLAGS.env) p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num) v_net = VNet(env.observation_space, FLAGS.hid_num) p_net.to(device) v_net.to(device) agent = Agent(p_net, v_net, None, None, device) # モデルの途中状態を読込む max_rew = -1e6 model_filename_base = os.path.join( FLAGS.data_dir, "models", "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num)) print("Load best model: {}".format(model_filename_base)) load_info = agent.load_model(model_filename_base, "best") if load_info: max_rew = load_info["max_rew"] print("Max reward: {0}".format(max_rew)) else: print("Model file not found") exit(0) test(env, agent, device)
def main(_): device = get_device(FLAGS.use_gpu) print("Use device: {}".format(device)) # モデル保存用フォルダ生成 data_dir = FLAGS.data_dir create_directory(data_dir) create_directory(os.path.join(data_dir, "models")) env = gym.make(FLAGS.env) p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num) v_net = VNet(env.observation_space, FLAGS.hid_num) print(p_net) print(v_net) p_net.to(device) v_net.to(device) optim_p = ralamb.Ralamb(p_net.parameters(), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay) optim_v = ralamb.Ralamb(v_net.parameters(), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay) agent = Agent(p_net, v_net, optim_p, optim_v, device) if FLAGS.use_discrim: expert_filename = os.path.join(FLAGS.data_dir, "expert_data", "taxi_expert.pkl") print("Load expert data: ", expert_filename) with open(expert_filename, "rb") as f: expert_traj = Trajectory() expert_epis = pickle.load(f) for epi in expert_epis: epi["next_obs"] = np.append(epi["obs"][1:], epi["obs"][0]) expert_traj.append(epi) expert_traj.to_tensor(device) pseudo_rew_net = VNet(env.observation_space, FLAGS.hid_num) shaping_val_net = VNet(env.observation_space, FLAGS.hid_num) print(pseudo_rew_net) print(shaping_val_net) pseudo_rew_net.to(device) shaping_val_net.to(device) optim_discrim = ralamb.Ralamb( list(pseudo_rew_net.parameters()) + list(shaping_val_net.parameters()), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay, ) discrim = Discriminator(pseudo_rew_net, shaping_val_net, optim_discrim, device) else: discrim = None expert_traj = None # モデルの途中状態を読込む max_rew = -1e6 model_filename_base = os.path.join( FLAGS.data_dir, "models", "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num)) discrim_filename_base = None if FLAGS.resume: print("Load last model") load_info = agent.load_model(model_filename_base, "last") if load_info: max_rew = load_info["max_rew"] print("Max reward: {0}".format(max_rew)) else: print("Model file not found") if FLAGS.use_discrim: discrim_filename_base = os.path.join( FLAGS.data_dir, "models", "discrim_" + FLAGS.env + "_AIRL_H" + str(FLAGS.hid_num)) discrim.load_model(discrim_filename_base, "last") train(env, agent, max_rew, model_filename_base, device, discrim, discrim_filename_base, expert_traj) test(env, agent, device)