Пример #1
0
def main(_):
    device = "cpu"
    print("Use device: {}".format(device))

    env = gym.make(FLAGS.env)

    p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num)
    v_net = VNet(env.observation_space, FLAGS.hid_num)
    p_net.to(device)
    v_net.to(device)
    agent = Agent(p_net, v_net, None, None, device)

    # モデルの途中状態を読込む
    max_rew = -1e6
    model_filename_base = os.path.join(
        FLAGS.data_dir, "models",
        "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num))
    print("Load best model: {}".format(model_filename_base))
    load_info = agent.load_model(model_filename_base, "best")
    if load_info:
        max_rew = load_info["max_rew"]
        print("Max reward: {0}".format(max_rew))
    else:
        print("Model file not found")
        exit(0)

    test(env, agent, device)
Пример #2
0
def main(_):
    device = get_device(FLAGS.use_gpu)
    print("Use device: {}".format(device))

    # モデル保存用フォルダ生成
    data_dir = FLAGS.data_dir
    create_directory(data_dir)
    create_directory(os.path.join(data_dir, "models"))

    env = gym.make(FLAGS.env)

    p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num)
    v_net = VNet(env.observation_space, FLAGS.hid_num)
    print(p_net)
    print(v_net)
    p_net.to(device)
    v_net.to(device)
    optim_p = ralamb.Ralamb(p_net.parameters(),
                            lr=FLAGS.lr,
                            weight_decay=FLAGS.weight_decay)
    optim_v = ralamb.Ralamb(v_net.parameters(),
                            lr=FLAGS.lr,
                            weight_decay=FLAGS.weight_decay)
    agent = Agent(p_net, v_net, optim_p, optim_v, device)

    if FLAGS.use_discrim:
        expert_filename = os.path.join(FLAGS.data_dir, "expert_data",
                                       "taxi_expert.pkl")
        print("Load expert data: ", expert_filename)
        with open(expert_filename, "rb") as f:
            expert_traj = Trajectory()
            expert_epis = pickle.load(f)
            for epi in expert_epis:
                epi["next_obs"] = np.append(epi["obs"][1:], epi["obs"][0])
                expert_traj.append(epi)
            expert_traj.to_tensor(device)

        pseudo_rew_net = VNet(env.observation_space, FLAGS.hid_num)
        shaping_val_net = VNet(env.observation_space, FLAGS.hid_num)
        print(pseudo_rew_net)
        print(shaping_val_net)
        pseudo_rew_net.to(device)
        shaping_val_net.to(device)
        optim_discrim = ralamb.Ralamb(
            list(pseudo_rew_net.parameters()) +
            list(shaping_val_net.parameters()),
            lr=FLAGS.lr,
            weight_decay=FLAGS.weight_decay,
        )
        discrim = Discriminator(pseudo_rew_net, shaping_val_net, optim_discrim,
                                device)
    else:
        discrim = None
        expert_traj = None

    # モデルの途中状態を読込む
    max_rew = -1e6
    model_filename_base = os.path.join(
        FLAGS.data_dir, "models",
        "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num))
    discrim_filename_base = None
    if FLAGS.resume:
        print("Load last model")
        load_info = agent.load_model(model_filename_base, "last")
        if load_info:
            max_rew = load_info["max_rew"]
            print("Max reward: {0}".format(max_rew))
        else:
            print("Model file not found")

        if FLAGS.use_discrim:
            discrim_filename_base = os.path.join(
                FLAGS.data_dir, "models",
                "discrim_" + FLAGS.env + "_AIRL_H" + str(FLAGS.hid_num))
            discrim.load_model(discrim_filename_base, "last")

    train(env, agent, max_rew, model_filename_base, device, discrim,
          discrim_filename_base, expert_traj)
    test(env, agent, device)