示例#1
0
def train_agent(train, pickle_file, agent_type, env_kwargs, parms):

    bin_path = "bin/" + pickle_file

    if (path.exists(bin_path)):
        if agent_type == "a2c":
            print("Loading A2C Agent")
            RL_model = A2C.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
        elif agent_type == "ddpg":
            print("Loading DDPG Agent")
            RL_model = DDPG.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
        elif agent_type == "ppo":
            print("Loading PPO2 Agent")
            RL_model = PPO2.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
    else:
        e_train_gym = ipenv.PortfolioAllocEnv(df=train, **env_kwargs)
        env_train, _ = e_train_gym.get_sb_env()

        agent = ipagent.IPRLAgent(env=env_train)

        model = agent.get_model(model_name=agent_type, model_kwargs=parms)

        RL_model = agent.train_model(model=model,
                                     tb_log_name=agent_type,
                                     total_timesteps=1000000)

        RL_model.save(bin_path)

    return RL_model
示例#2
0
文件: enjoy_pad.py 项目: drwxyh/pad
def test(env_id, seed, policy):
    """
    Train PPO2 model for atari environment, for testing purposes

    :param env_id: (str) the environment id string
    :param seed: (int) Used to seed the random generator.
    :param policy: (Object) The policy model to use (MLP, CNN, LSTM, ...)
    """

    # if 'lstm' in policy:
    #     print('LSTM policies not supported for drawing')
    #     return 1
    env = DummyVecEnv([PadEnvRender for _ in range(1)])  # Need for lstm
    # else:
    #     env = PadEnvRender()

    env = VecFrameStack(env, 8)
    model = PPO2.load('./pad_5combo_ppo2.pkl', env)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0

        while not done:
            env.render()
            action, _ = model.predict(obs)
            obs, rew, done, _ = env.step(action)
            done = done.any()
            episode_rew += rew
            time.sleep(1 / 24.)
            if done:
                print('Episode reward:', rew)
def do_ppo(args, start_theta, parent_this_run_dir, full_space_save_dir):

    """
    Runs the test
    """

    logger.log(f"#######CMA and then PPO TRAIN: {args}")

    this_conti_ppo_run_dir = get_ppo_part(parent_this_run_dir)
    log_dir = get_log_dir(this_conti_ppo_run_dir)
    conti_ppo_save_dir = get_save_dir(this_conti_ppo_run_dir)
    logger.configure(log_dir)

    full_param_traj_dir_path = get_full_params_dir(this_conti_ppo_run_dir)

    if os.path.exists(full_param_traj_dir_path):
        import shutil
        shutil.rmtree(full_param_traj_dir_path)
    os.makedirs(full_param_traj_dir_path)

    if os.path.exists(conti_ppo_save_dir):
        import shutil
        shutil.rmtree(conti_ppo_save_dir)
    os.makedirs(conti_ppo_save_dir)



    def make_env():
        env_out = gym.make(args.env)
        env_out.env.disableViewer = True
        env_out.env.visualize = False
        env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
        return env_out
    env = DummyVecEnv([make_env])
    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{full_space_save_dir}/ppo2")
    model.set_from_flat(start_theta)

    if args.normalize:
        env.load_running_average(full_space_save_dir)
    model.set_env(env)


    run_info = {"run_num": args.run_num,
                "env_id": args.env,
                "full_param_traj_dir_path": full_param_traj_dir_path}

    # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99,
    #              noptepochs=10,
    #              ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer)

    model.tell_run_info(run_info)
    episode_returns = model.learn(total_timesteps=args.ppo_num_timesteps)

    model.save(f"{conti_ppo_save_dir}/ppo2")

    env.save_running_average(conti_ppo_save_dir)
    return episode_returns, full_param_traj_dir_path
示例#4
0
 def load(self, path, env):
     if self.trpo():
         return TRPO.load(path, env=env)
     elif self.ppo():
         return PPO2.load(path, env=env)
     else:
         return SAC.load(path, env=env)
示例#5
0
    def __init__(self,
                 policy_file="../models/controllers/PPO/CartPole-v1.pkl",
                 mass_prior=None,
                 length_prior=None,
                 episodes_per_params=1,
                 seed=1995,
                 params=["length", "masspole"],
                 steps_per_episode=50,
                 sufficient_stats="Cross-Correlation"):

        self.env = CartPoleEnv()
        self.seed = seed

        self.cached_data = None
        self.params_scaler = None
        self.params = params
        self.steps_per_episode = steps_per_episode
        self.sufficient_stats = sufficient_stats

        self.policy = PPO2.load(policy_file)
        if mass_prior is None:
            self.m_low = 0.1
            self.m_high = 2.0
            self.m_prior = self.sample_mass_from_uniform_prior

        if length_prior is None:
            self.l_low = 0.1
            self.l_high = 2.0
            self.l_prior = self.sample_length_from_uniform_prior
示例#6
0
    def __init__(self,
                 policy_file="../models/controllers/PPO/Pendulum-v0.pkl",
                 mass_prior=None,
                 length_prior=None,
                 episodes_per_params=1,
                 seed=1995,
                 params=["length", "mass"],
                 steps_per_episode=200,
                 sufficient_stats="Cross-Correlation",
                 load_from_file=False,
                 assets_path=".",
                 filename=""):

        self.env = PendulumEnv()
        self.seed = seed

        self.cached_data = None
        self.params_scaler = None
        self.params = params
        self.steps_per_episode = steps_per_episode
        self.sufficient_stats = sufficient_stats
        self.assets_path = assets_path
        self.load_from_file = load_from_file
        self.data_file = os.path.join(assets_path + filename)
        self.policy = PPO2.load(policy_file)
        if mass_prior is None:
            self.m_low = 0.1
            self.m_high = 2.0
            self.m_prior = self.sample_mass_from_uniform_prior

        if length_prior is None:
            self.l_low = 0.1
            self.l_high = 2.0
            self.l_prior = self.sample_length_from_uniform_prior
示例#7
0
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(args)
    plot_dir_alg = get_plot_dir(args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir,
                                                      params_scope="pi")
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    if not os.path.exists(plot_dir_alg):
        os.makedirs(plot_dir_alg)

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    def make_env():
        env_out = gym.make(args.env)

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])

    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    model.set_pi_from_flat(final_params)

    if args.normalize:
        env.load_running_average(save_dir)

    obz_tensor = model.act_model.fake_input_tensor

    some_neuron = model.act_model.policy_neurons[2][-1]

    grads = tf.gradients(tf.math.negative(some_neuron), obz_tensor)

    grads = list(zip(grads, obz_tensor))

    trainer = tf.train.AdamOptimizer(learning_rate=0.01, epsilon=1e-5)

    train_op = trainer.apply_gradients(grads)
    for i in range(10000):
        obz, _ = model.sess.run([obz_tensor, train_op])
def neuron_values_generator(args, save_dir, pi_theta, eval_timesteps):
    # logger.log(f"#######EVAL: {args}")

    neuron_values_list = []

    def make_env():
        env_out = gym.make(args.env)

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])

    if args.normalize:
        env = VecNormalize(env)

    # policy = MlpPolicy
    # # model = PPO2.load(f"{save_dir}/ppo2") # this also loads V function
    # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10,
    #              ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer)
    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    if pi_theta is not None:
        model.set_pi_from_flat(pi_theta)

    if args.normalize:
        env.load_running_average(save_dir)

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
    obs[:] = env.reset()
    env.render()
    ep_infos = []
    while 1:
        neuron_values, actions, _, _, _ = model.step_with_neurons(obs)
        # neuron_values = model.give_neuron_values(obs)

        # neuron_values_list.append( neuron_values )
        yield neuron_values
        obs, rew, done, infos = env.step(actions)
        env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:

            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            obs = env.reset()
示例#9
0
def cont_learn():
    print('Continue learning....')

    env = gym.make('CarRacing-v0')
    env = Monitor(env, log_dir, allow_early_resets=True)
    env = DummyVecEnv([lambda: env])
    trained_model = PPO2.load("CarRacing_model_PPO2.pkl")
    trained_model.set_env(env)
    trained_model.learn(300000)
    print("Saving model to CarRacing_model.pkl")
    trained_model.save("CarRacing_model_PPO2.pkl")
    plot_results(log_dir)
示例#10
0
def visualize_neurons(args, save_dir, pi_theta, eval_timesteps):
    # logger.log(f"#######EVAL: {args}")

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.disableViewer = True
        env_out.env.visualize = False
        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    if pi_theta is not None:
        model.set_pi_from_flat(pi_theta)

    if args.normalize:
        env.load_running_average(save_dir)

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
    obs[:] = env.reset()
    ep_infos = []
    for _ in range(eval_timesteps):
        actions = model.step(obs)[0]
        neuron_values = model.give_neuron_values(obs)

        obs, rew, done, infos = env.step(actions)

        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:
            if pi_theta is None:
                episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
                print(f'episode_rew={episode_rew}')
            obs = env.reset()

    return safe_mean([ep_info['r'] for ep_info in ep_infos])
def test_cnn_lstm_policy(request, policy):
    model_fname = './test_model_{}.zip'.format(request.node.name)

    try:
        env = make_env(0)
        model = PPO2(policy, env, nminibatches=1)
        model.learn(total_timesteps=15)
        env = model.get_env()
        evaluate_policy(model, env, n_eval_episodes=5)
        # saving
        model.save(model_fname)
        del model, env
        # loading
        _ = PPO2.load(model_fname, policy=policy)

    finally:
        if os.path.exists(model_fname):
            os.remove(model_fname)
示例#12
0
def run():
    """
    Run a trained model for the pong problem
    """
    env = gym.make('CarRacing-v0')
    env = DummyVecEnv([lambda: env])

    # model = PPO2.load("CarRacing_model_PPO1_"+ str(5) +".pkl", env)
    model = PPO2.load("CarRacing_model_PPO2_5.pkl", env)
    avg_rew = evaluate(model=model, env=env, num_steps=10000)
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            action, _ = model.predict(obs)
            obs, rew, done, _ = env.step(action)

            episode_rew += rew
        print("Episode reward", episode_rew)
示例#13
0
def main(env, load_path, fig_path):

    # skip over 1-baxter-no-penalty (no log monitor.csv)
    if load_path == "1-baxter-no-penalty":
        plot = False
    else:
        plot = True

    # arguments
    print("env %s; load_path %s; fig_path %s;" % (env, load_path, fig_path))
    log_path = os.getcwd() + "/log/" + load_path
    os.makedirs(os.getcwd() + "/figs/" + "/", exist_ok=True)
    fig_path = os.getcwd() + "/figs/" + "/" + fig_path
    load_path = os.getcwd() + "/models/" + load_path

    # make environment, flattened environment, vectorized environment
    env = gym.make(env)
    env = gym.wrappers.FlattenDictWrapper(env, ['observation', 'achieved_goal', 'desired_goal'])
    env = DummyVecEnv([lambda: env])

    # load model
    model = PPO2.load(load_path, env=env)
    obs_initial = env.reset()
    obs = obs_initial

    # plot results
    if plot:
        plot_results(fig_path, log_path)

    # initializations
    niter = 10
    counter = 0
    timestep = 0
    results = [[[0,0,0] for i in range(100)], [[0,0,0,0] for i in range(100)]]
    current = [[[0,0,0] for i in range(100)], [[0,0,0,0] for i in range(100)]]
    print("==============================")

    # check initial positions and quaternions
    print("grip", env.envs[0].env.env.sim.data.get_site_xpos('grip'))
    print("box", env.envs[0].env.env.sim.data.get_site_xpos('box'))
    print("tool", env.envs[0].env.env.sim.data.get_site_xpos('tool'))
    print("mocap", env.envs[0].env.env.sim.data.mocap_pos)
    print("quat", env.envs[0].env.env.sim.data.mocap_quat)
    print("==============================")

    # mocap quaternion check
    for i in range(5):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        quat = env.envs[0].env.env.sim.data.mocap_quat
        print("obs", obs)
        print("quat", quat)
    print("==============================")

    # start rendering
    dists = []
    box_goal_pos = np.array([0.6, 0.05, -0.17])
    while True:
        if counter == niter:
            break
        action, _states = model.predict(obs)
        obs_old = obs
        obs, rewards, dones, info = env.step(action)
        quaternion = env.envs[0].env.env.sim.data.mocap_quat
        if obs.all() == obs_initial.all():
            if counter % 10 == 0:
                xyzs = current[0]
                quats = current[1]
                print(xyzs)
                print(quats)
                filename = log_path + "/" + "results_" + str(counter) + ".txt"
                os.makedirs(log_path + "/", exist_ok=True)
                file = open(filename, 'w+')
                for xyz, quat in zip(xyzs, quats):
                    for coord in xyz:
                        file.write(str(coord) + " ")
                    for quat_coord in quat:
                        file.write(str(quat_coord) + " ")
                    file.write("\n")
                file.close()

            box_end_pos = np.array(obs_old[0][3:6].tolist())
            print(box_end_pos)
            print(np.shape(box_end_pos))
            print(box_goal_pos)
            print(np.shape(box_goal_pos))
            dists.append(np.linalg.norm(box_goal_pos - box_end_pos))
            current = [[[0,0,0] for i in range(100)], [[0,0,0,0] for i in range(100)]]
            timestep = 0
            counter += 1
        print(timestep)
        print("obs", obs)
        print("quat", quaternion)

        # for average trajectory, smoothed
        for i in range(3):
            results[0][timestep][i] += obs[0][:3].tolist()[i]
        for j in range(4):
            results[1][timestep][j] += quaternion[0].tolist()[j]

        # for current trajectory
        for i in range(3):
            current[0][timestep][i] += obs[0][:3].tolist()[i]
        for j in range(4):
            current[1][timestep][j] += quaternion[0].tolist()[j]

        timestep += 1
        env.render()

    # smooth paths by taking average, and calculate mean distance to goal state
    for timestep in range(100):
        for i in range(3):
            results[0][timeste][i] /= niter
        for j in range(4):
            results[0][timestep][j] /= niter
    dist = np.mean(dists)

    # print and write to file
    xyzs = results[0]
    quats = results[1]
    filename = log_path + "/" + "results_avg.txt"
    os.makedirs(log_path + "/", exist_ok=True)
    file = open(filename, 'w+')
    for xyz, quat in zip(xyzs, quats):
        for coord in xyz:
            file.write(str(coord) + " ")
        for quat_coord in quat:
            file.write(str(quat_coord) + " ")
        file.write("\n")
    file.close()

    # print average distances
    print("average distance of box from end goal: %f" % dist)
示例#14
0
 def __init__(self):
     self.model = PPO2.load(os.path.expanduser(
         "~/Code/drl_local_planner_ros_stable_baselines/example_agents/ppo2_1_raw_data_cont_0/ppo2_1_raw_data_cont_0.pkl"))  # noqa
示例#15
0
    #     qvel[i,:] = env_in.sim.data.qvel[[7,8,10,12,14,16,17,19,21,23]]
    qvel[i, :] = env_in.GetMotorVelocities()
    #     torque[i,:] = env_in.sim.data.actuator_force
    torque[i, :] = env_in.GetMotorTorques()
    i = (i + 1) % total_data_length
    return True


###############################################################################
# # Use this code for testing the basic controller
# Create the stoch mujoco environment
# env = stoch2_gym_mjc_env.Stoch2Env()
env = stoch2_gym_env.Stoch2Env(render=True)

model_test = PPO2.load(
    dir_name +
    "/tflow_log/model_trot")  # model_trot_200kiter_0pen_notermination
obs = env.reset()

print("Render mode...")

for _ in range(15):
    action, _states = model_test.predict(obs, deterministic=True)
    obs, reward, done, _ = env.step(action, callback=render_callback)
#     if done:
#         break

pickle.dump(qpos[0:total_data_length:int(total_data_length /
                                         total_data_length_for_saving)],
            open(input_file, "wb"))  # save it into a file named save.p
pickle.dump(action, open(action_file, "wb"))
def visualize_policy_and_collect_COM(
        augment_num_timesteps, top_num_to_include_slice, augment_seed,
        augment_run_num, network_size, policy_env, policy_num_timesteps,
        policy_run_num, policy_seed, eval_seed, eval_run_num, learning_rate,
        additional_note, metric_param):
    result_dir = get_result_dir(policy_env, policy_num_timesteps,
                                policy_run_num, policy_seed, eval_seed,
                                eval_run_num, additional_note, metric_param)
    args = AttributeDict()

    args.normalize = True
    args.num_timesteps = augment_num_timesteps
    args.run_num = augment_run_num
    args.alg = "ppo2"
    args.seed = augment_seed

    logger.log(f"#######VISUALIZE: {args}")
    # non_linear_global_dict
    linear_global_dict, non_linear_global_dict, lagrangian_values, input_values, layers_values, all_weights = read_all_data(
        policy_env,
        policy_num_timesteps,
        policy_run_num,
        policy_seed,
        eval_seed,
        eval_run_num,
        additional_note=additional_note)
    timestamp = get_time_stamp('%Y_%m_%d_%H_%M_%S')
    experiment_label = f"learning_rate_{learning_rate}timestamp_{timestamp}_augment_num_timesteps{augment_num_timesteps}" \
                       f"_top_num_to_include{top_num_to_include_slice.start}_{top_num_to_include_slice.stop}" \
                       f"_augment_seed{augment_seed}_augment_run_num{augment_run_num}_network_size{network_size}" \
                       f"_policy_num_timesteps{policy_num_timesteps}_policy_run_num{policy_run_num}_policy_seed{policy_seed}" \
                       f"_eval_seed{eval_seed}_eval_run_num{eval_run_num}_additional_note_{additional_note}"

    entry_point = 'gym.envs.dart:DartWalker2dEnv_aug_input'

    this_run_dir = get_experiment_path_for_this_run(
        entry_point,
        args.num_timesteps,
        args.run_num,
        args.seed,
        learning_rate=learning_rate,
        top_num_to_include=top_num_to_include_slice,
        result_dir=result_dir,
        network_size=network_size,
        metric_param=metric_param)
    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    aug_plot_dir = get_aug_plot_dir(this_run_dir) + "_vis"

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    args.env = f'{experiment_label}_{entry_point}-v1'
    register(id=args.env,
             entry_point=entry_point,
             max_episode_steps=1000,
             kwargs={
                 'linear_global_dict': linear_global_dict,
                 'non_linear_global_dict': non_linear_global_dict,
                 'top_to_include_slice': top_num_to_include_slice,
                 'aug_plot_dir': aug_plot_dir,
                 "lagrangian_values": lagrangian_values,
                 "layers_values": layers_values
             })

    def make_env():
        env_out = gym.make(args.env)

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    walker_env = env.envs[0].env.env

    walker_env.disableViewer = False

    if args.normalize:
        env = VecNormalize(env)

    set_global_seeds(args.seed)
    walker_env.seed(args.seed)

    model = PPO2.load(f"{save_dir}/ppo2", seed=augment_seed)
    model.set_pi_from_flat(final_params)
    if args.normalize:
        env.load_running_average(save_dir)

    sk = env.venv.envs[0].env.env.robot_skeleton
    lagrangian_values = {}

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)

    obs[:] = env.reset()

    env = VecVideoRecorder(env,
                           aug_plot_dir,
                           record_video_trigger=lambda x: x == 0,
                           video_length=3000,
                           name_prefix="vis_this_policy")

    lagrangian_values["M"] = [sk.M.reshape((-1, 1))]
    lagrangian_values["COM"] = [sk.C.reshape((-1, 1))]
    lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))]
    lagrangian_values["q"] = [sk.q.reshape((-1, 1))]
    lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))]

    contact_values = {}

    neuron_values = model.give_neuron_values(obs)
    raw_layer_values_list = [[neuron_value.reshape((-1, 1))]
                             for neuron_value in neuron_values]

    env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False

    # epi_rew = 0
    for _ in range(3000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        # epi_rew+= rew[0]
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1

        neuron_values = model.give_neuron_values(obs)

        for i, layer in enumerate(neuron_values):
            raw_layer_values_list[i].append(layer.reshape((-1, 1)))

        # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values)

        lagrangian_values["M"].append(sk.M.reshape((-1, 1)))
        lagrangian_values["q"].append(sk.q.reshape((-1, 1)))
        lagrangian_values["dq"].append(sk.dq.reshape((-1, 1)))
        lagrangian_values["COM"].append(sk.C.reshape((-1, 1)))
        lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1)))

        # env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            # print(f'episode_rew={epi_rew}')
            # epi_rew = 0
            obs = env.reset()

    #Hstack into a big matrix
    lagrangian_values["M"] = np.hstack(lagrangian_values["M"])
    lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"])
    lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"])
    lagrangian_values["q"] = np.hstack(lagrangian_values["q"])
    lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"])

    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #     for name, l in body_contact_dict.items():
    #         body_contact_dict[name] = np.hstack(body_contact_dict[name])
    input_values = np.hstack(raw_layer_values_list[0])

    layers_values = [
        np.hstack(layer_list) for layer_list in raw_layer_values_list
    ][1:-2]  # drop variance and inputs

    for i, com in enumerate(lagrangian_values["COM"]):
        plt.figure()
        plt.plot(np.arange(len(com)), com)
        plt.xlabel("time")
        plt.ylabel(f"COM{i}")

        plt.savefig(f"{aug_plot_dir}/COM{i}.jpg")
        plt.close()
        return env

    set_global_seeds(seed)
    return _init


# Make Environments
envs = [make_env(rank=0, seed=0, framerange=(start_frame, start_frame))]
env = SubprocVecEnv(envs)
'''
env = PyBulletEnvironment(max_steps, GUI, data_path = data_path, reward_func = reward_func)
env = DummyVecEnv(env)
'''

# Load Training Agent
agent = PPO2.load(model_path, env=env)

# Run Agent on Environment
reward_log = []
for i in range(repeats):
    state = env.reset()
    deterministic = False
    sum_reward = 0
    for _ in range(max_steps):
        action, _ = agent.predict(state, deterministic=deterministic)
        #print(action)
        state, reward, done, info = env.step(action)
        time.sleep(sleep_time)
        sum_reward += reward[0]
    reward_log.append(sum_reward / max_steps)
print('Reward Fraction Achieved', np.mean(reward_log))
示例#18
0
def train_agent_ppo2(config,
                     agent_name,
                     total_timesteps,
                     policy,
                     gamma=0.99,
                     n_steps=128,
                     ent_coef=0.01,
                     learning_rate=0.00025,
                     vf_coef=0.5,
                     max_grad_norm=0.5,
                     lam=0.95,
                     nminibatches=4,
                     noptepochs=4,
                     cliprange=0.2,
                     num_envs=1,
                     robot_radius=0.46,
                     rew_fnc=3,
                     num_stacks=1,
                     stack_offset=15,
                     disc_action_space=False,
                     debug=False,
                     normalize=False,
                     stage=0,
                     pretrained_model_name="",
                     task_mode="static"):

    # Setting seed
    seed = random.randint(0, 1000)
    np.random.seed(seed)
    tf.random.set_random_seed(seed)
    random.seed(seed)

    # Define pathes to store things
    path_to_tensorboard_log = config['PATHES']['path_to_tensorboard_log']
    global path_to_models
    path_to_models = config['PATHES']['path_to_models']

    agent_dir = '%s/%s' % (path_to_models, agent_name)
    if not os.path.exists(agent_dir):
        os.makedirs(agent_dir)

    # Loading simulation environment
    env = load_train_env(num_envs, robot_radius, rew_fnc, num_stacks,
                         stack_offset, debug, task_mode, policy,
                         disc_action_space, normalize)

    if stage == 0:
        model = PPO2(eval(policy),
                     env,
                     gamma=gamma,
                     n_steps=n_steps,
                     ent_coef=ent_coef,
                     learning_rate=learning_rate,
                     vf_coef=vf_coef,
                     max_grad_norm=max_grad_norm,
                     lam=lam,
                     nminibatches=nminibatches,
                     noptepochs=noptepochs,
                     cliprange=cliprange,
                     verbose=1,
                     tensorboard_log='%s' % (path_to_tensorboard_log))
    else:
        # Pretrained model is loaded to continue training.
        model = PPO2.load(
            "%s/%s/%s.pkl" %
            (path_to_models, pretrained_model_name, pretrained_model_name),
            env,
            tensorboard_log='%s' % (path_to_tensorboard_log))

    # Document agent
    print("Starting PPO2 Training of agent: %s" % (agent_name))
    print("------------------------------------------------------")
    print("gamma \t\t\t\t %f" % model.gamma)
    print("n_steps \t\t\t %d" % model.n_steps)
    print("ent_coef \t\t\t %f" % model.ent_coef)
    print("learning_rate \t\t\t %f" % learning_rate)
    print("vf_coef \t\t\t %f" % model.vf_coef)
    print("max_grad_norm \t\t\t %f" % model.max_grad_norm)
    print("lam \t\t\t\t %f" % model.lam)
    print("nminibatches \t\t\t %d" % model.nminibatches)
    print("noptepochs \t\t\t %d" % model.noptepochs)
    print("cliprange \t\t\t %f" % cliprange)
    print("total_timesteps \t\t %d" % total_timesteps)
    print("Policy \t\t\t\t %s" % policy)
    print("reward_fnc \t\t\t %d" % rew_fnc)
    print("Normalized state: %d" % normalize)
    print("discrete action space %d" % disc_action_space)
    print("Number of stacks: %d, stack offset: %d" %
          (num_stacks, stack_offset))
    print("\n")

    # Starting training
    reset_num_timesteps = False
    if stage == 0:
        reset_num_timesteps = True

    model.learn(total_timesteps=total_timesteps,
                log_interval=100,
                callback=train_callback,
                tb_log_name=agent_name,
                reset_num_timesteps=reset_num_timesteps)

    # Saving final model
    model.save("%s/%s/%s" % (path_to_models, agent_name, "%s_stage_%d" %
                             (agent_name, stage)))
    print("Training finished.")
    env.close()
示例#19
0
def visualize_policy_and_collect_COM(seed, run_num, policy_env,
                                     policy_num_timesteps, policy_seed,
                                     policy_run_num):

    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()
    args.env = policy_env
    args.seed = policy_seed
    args.num_timesteps = policy_num_timesteps
    args.run_num = policy_run_num
    this_run_dir = get_dir_path_for_this_run(args)
    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.disableViewer = False

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        env_out.seed(seed)
        return env_out

    env = DummyVecEnv([make_env])

    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2", seed=seed)
    model.set_pi_from_flat(final_params)
    if args.normalize:
        env.load_running_average(save_dir)

    sk = env.venv.envs[0].env.env.robot_skeleton
    lagrangian_values = {}

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)

    obs[:] = env.reset()
    plot_dir = get_plot_dir(policy_env=args.env,
                            policy_num_timesteps=policy_num_timesteps,
                            policy_run_num=policy_run_num,
                            policy_seed=policy_seed,
                            eval_seed=seed,
                            eval_run_num=run_num,
                            additional_note="")
    if os.path.exists(plot_dir):
        shutil.rmtree(plot_dir)
    os.makedirs(plot_dir)
    env = VecVideoRecorder(env,
                           plot_dir,
                           record_video_trigger=lambda x: x == 0,
                           video_length=3000,
                           name_prefix="3000000agent-{}".format(args.env))

    lagrangian_values["M"] = [sk.M.reshape((-1, 1))]
    lagrangian_values["COM"] = [sk.C.reshape((-1, 1))]
    lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))]
    lagrangian_values["q"] = [sk.q.reshape((-1, 1))]
    lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))]

    contact_values = {}

    neuron_values = model.give_neuron_values(obs)
    raw_layer_values_list = [[neuron_value.reshape((-1, 1))]
                             for neuron_value in neuron_values]

    env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False

    # epi_rew = 0
    for _ in range(3000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        # epi_rew+= rew[0]
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1

        neuron_values = model.give_neuron_values(obs)

        for i, layer in enumerate(neuron_values):
            raw_layer_values_list[i].append(layer.reshape((-1, 1)))

        # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values)

        lagrangian_values["M"].append(sk.M.reshape((-1, 1)))
        lagrangian_values["q"].append(sk.q.reshape((-1, 1)))
        lagrangian_values["dq"].append(sk.dq.reshape((-1, 1)))
        lagrangian_values["COM"].append(sk.C.reshape((-1, 1)))
        lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1)))

        # env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            # print(f'episode_rew={epi_rew}')
            # epi_rew = 0
            obs = env.reset()

    #Hstack into a big matrix
    lagrangian_values["M"] = np.hstack(lagrangian_values["M"])
    lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"])
    lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"])
    lagrangian_values["q"] = np.hstack(lagrangian_values["q"])
    lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"])

    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #     for name, l in body_contact_dict.items():
    #         body_contact_dict[name] = np.hstack(body_contact_dict[name])
    input_values = np.hstack(raw_layer_values_list[0])

    layers_values = [
        np.hstack(layer_list) for layer_list in raw_layer_values_list
    ][1:-2]  # drop variance and inputs

    for i, com in enumerate(lagrangian_values["COM"]):
        plt.figure()
        plt.plot(np.arange(len(com)), com)
        plt.xlabel("time")
        plt.ylabel(f"COM{i}")

        plt.savefig(f"{plot_dir}/COM{i}.jpg")
        plt.close()
示例#20
0
model = PPO2(CustomPolicy,
             env,
             n_steps=int(2048 / 128),
             nminibatches=64,
             noptepochs=10,
             lam=0.98,
             verbose=1,
             tensorboard_log='/home/xi/model/log')
# model = PPO2.load("ppo2_ipadgame")
# model.set_env(env)
# model.tensorboard_log='/home/xi/model/log'
# env.load_running_average("/home/xi/model/")

model.learn(total_timesteps=50000)

# model.save("ppo2_ipadgame")
# env.save_running_average("/home/xi/model/")
# print ('done')

env = gym.make(env_id)
env = DummyVecEnv([lambda: env])
env = VecNormalize(env)
obs = env.reset()
model = PPO2.load("ppo2_ipadgame")
env.load_running_average("/home/xi/model/")

for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()
def run_experiment_with_trained(augment_num_timesteps, linear_co_threshold, augment_seed, augment_run_num, network_size,
                                policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, learning_rate,
                                additional_note, result_dir, keys_to_include, metric_param, linear_top_vars_list=None,
                                linear_correlation_neuron_list=None, visualize=False, lagrangian_inds_to_include=None,
                                neurons_inds_to_include=None, use_lagrangian=True):
    trained_model = None
    if not use_lagrangian:
        with tf.variable_scope("trained_model"):
            common_arg_parser = get_common_parser()
            trained_args, cma_unknown_args = common_arg_parser.parse_known_args()
            trained_args.env = policy_env
            trained_args.seed = policy_seed
            trained_args.num_timesteps = policy_num_timesteps
            trained_args.run_num = policy_run_num
            trained_this_run_dir = get_dir_path_for_this_run(trained_args)
            trained_traj_params_dir_name = get_full_params_dir(trained_this_run_dir)
            trained_save_dir = get_save_dir(trained_this_run_dir)

            trained_final_file = get_full_param_traj_file_path(trained_traj_params_dir_name, "pi_final")
            trained_final_params = pd.read_csv(trained_final_file, header=None).values[0]

            trained_model = PPO2.load(f"{trained_save_dir}/ppo2", seed=augment_seed)
            trained_model.set_pi_from_flat(trained_final_params)

    args = AttributeDict()

    args.normalize = True
    args.num_timesteps = augment_num_timesteps
    args.run_num = augment_run_num
    args.alg = "ppo2"
    args.seed = augment_seed

    logger.log(f"#######TRAIN: {args}")
    # non_linear_global_dict
    timestamp = get_time_stamp('%Y_%m_%d_%H_%M_%S')
    experiment_label = f"learning_rate_{learning_rate}timestamp_{timestamp}_augment_num_timesteps{augment_num_timesteps}" \
                       f"_top_num_to_include{linear_co_threshold.start}_{linear_co_threshold.stop}" \
                       f"_augment_seed{augment_seed}_augment_run_num{augment_run_num}_network_size{network_size}" \
                       f"_policy_num_timesteps{policy_num_timesteps}_policy_run_num{policy_run_num}_policy_seed{policy_seed}" \
                       f"_eval_seed{eval_seed}_eval_run_num{eval_run_num}_additional_note_{additional_note}"

    if policy_env == "DartWalker2d-v1":
        entry_point = 'gym.envs.dart:DartWalker2dEnv_aug_input'
    elif policy_env == "DartHopper-v1":
        entry_point = 'gym.envs.dart:DartHopperEnv_aug_input'
    elif policy_env == "DartHalfCheetah-v1":
        entry_point = 'gym.envs.dart:DartHalfCheetahEnv_aug_input'
    elif policy_env == "DartSnake7Link-v1":
        entry_point = 'gym.envs.dart:DartSnake7LinkEnv_aug_input'
    else:
        raise NotImplemented()


    this_run_dir = get_experiment_path_for_this_run(entry_point, args.num_timesteps, args.run_num,
                                                    args.seed, learning_rate=learning_rate, top_num_to_include=linear_co_threshold,
                                                    result_dir=result_dir, network_size=network_size)
    full_param_traj_dir_path = get_full_params_dir(this_run_dir)
    log_dir = get_log_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)


    create_dir_remove(this_run_dir)
    create_dir_remove(full_param_traj_dir_path)
    create_dir_remove(save_dir)
    create_dir_remove(log_dir)
    logger.configure(log_dir)

    linear_top_vars_list_wanted_to_print = []
    if (use_lagrangian and lagrangian_inds_to_include is None) or (not use_lagrangian and neurons_inds_to_include is None):
        # note this is only linear
        if linear_top_vars_list is None or linear_correlation_neuron_list is None:

            linear_top_vars_list, linear_correlation_neuron_list = read_linear_top_var(policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed,
                                               eval_run_num, additional_note, metric_param=metric_param)

        lagrangian_inds_to_include, neurons_inds_to_include, linear_top_vars_list_wanted_to_print = \
            get_wanted_lagrangians_and_neurons(keys_to_include, linear_top_vars_list, linear_correlation_neuron_list, linear_co_threshold)



    with open(f"{log_dir}/lagrangian_inds_to_include.json", 'w') as fp:
        json.dump(lagrangian_inds_to_include, fp)
    with open(f"{log_dir}/linear_top_vars_list_wanted_to_print.json", 'w') as fp:
        json.dump(linear_top_vars_list_wanted_to_print, fp)
    with open(f"{log_dir}/neurons_inds_to_include.json", 'w') as fp:
        json.dump(neurons_inds_to_include, fp)


    args.env = f'{experiment_label}_{entry_point}-v1'

    if not use_lagrangian:
        register(
            id=args.env,
            entry_point=entry_point,
            max_episode_steps=1000,
            kwargs={"lagrangian_inds_to_include": None, "trained_model": trained_model,
                    "neurons_inds_to_include": neurons_inds_to_include}
        )
    else:
        register(
            id=args.env,
            entry_point=entry_point,
            max_episode_steps=1000,
            kwargs={"lagrangian_inds_to_include": lagrangian_inds_to_include, "trained_model": None,
                    "neurons_inds_to_include": None}
        )

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.visualize = visualize
        env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    walker_env = env.envs[0].env.env
    walker_env.disableViewer = not visualize


    if args.normalize:
        env = VecNormalize(env)
    policy = MlpPolicy



    set_global_seeds(args.seed)
    walker_env.seed(args.seed)

    num_dof = walker_env.robot_skeleton.ndofs
    show_M_matrix(num_dof, lagrangian_inds_to_include, linear_co_threshold, log_dir)




    # extra run info I added for my purposes
    run_info = {"run_num": args.run_num,
                "env_id": args.env,
                "full_param_traj_dir_path": full_param_traj_dir_path}

    layers = [network_size, network_size]
    policy_kwargs = {"net_arch" : [dict(vf=layers, pi=layers)]}
    model = PPO2(policy=policy, env=env, n_steps=4096, nminibatches=64, lam=0.95, gamma=0.99,
                 noptepochs=10,
                 ent_coef=0.0, learning_rate=learning_rate, cliprange=0.2, optimizer='adam', policy_kwargs=policy_kwargs,
                 seed=args.seed)
    model.tell_run_info(run_info)
    model.learn(total_timesteps=args.num_timesteps, seed=args.seed)

    model.save(f"{save_dir}/ppo2")

    if args.normalize:
        env.save_running_average(save_dir)

    return log_dir
示例#22
0
    qpos[i, :] = env_in.GetMotorAngles()
    #     qvel[i,:] = env_in.sim.data.qvel[[7,8,10,12,14,16,17,19,21,23]]
    qvel[i, :] = env_in.GetMotorVelocities()
    #     torque[i,:] = env_in.sim.data.actuator_force
    torque[i, :] = env_in.GetMotorTorques()
    i = (i + 1) % total_data_length
    return True


###############################################################################
# # Use this code for testing the basic controller
# Create the stoch mujoco environment
# env = stoch2_gym_mjc_env.Stoch2Env()
env = vision60_gym_bullet_env.Vision60BulletEnv(render=True)

model_test = PPO2.load(dir_name + "/model_trot")
obs = env.reset()

print("Render mode...")

for _ in range(10):
    action, _states = model_test.predict(obs, deterministic=True)
    obs, reward, done, _ = env.step(action, callback=render_callback)
#     if done:
#         break

pickle.dump(qpos[0:total_data_length:int(total_data_length / 100)],
            open("save.p", "wb"))  # save it into a file named save.p
# print(np.shape(qpos[0:total_data_length:int(total_data_length/100)]))
# print(np.shape(qpos))
示例#23
0
def main(env,
         load,
         save_path,
         load_path=None,
         train_timesteps=1.25e6,
         eval_timesteps=5e3):

    # arguments
    print(
        "env %s; load %s; save_path %s; load_path %s; train_timesteps %s; eval_timesteps %s;"
        % (env, load, save_path, load_path, train_timesteps, eval_timesteps))
    train_timesteps = int(float(train_timesteps))
    eval_timesteps = int(float(eval_timesteps))

    # models path
    model_dir = os.getcwd() + "/models/"
    os.makedirs(model_dir, exist_ok=True)

    # logging path
    log_dir = os.getcwd() + "/log/" + save_path
    os.makedirs(log_dir, exist_ok=True)

    # absolute save path and models path
    save_path = model_dir + save_path
    if load and not load_path:
        print("no load path given, exiting...")
        sys.exit()
    elif load:
        load_path = model_dir + load_path

    # make environment, flattened environment, monitor, vectorized environment
    env = gym.make(env)
    env = gym.wrappers.FlattenDictWrapper(
        env, ['observation', 'achieved_goal', 'desired_goal'])
    env = Monitor(env, log_dir, allow_early_resets=True)
    env = DummyVecEnv([lambda: env])

    # load model, or start from scratch
    if load:
        print("loading model from: " + load_path)
        model = PPO2.load(load_path, env=env)
    else:
        print("training model from scratch")
        model = PPO2(MlpPolicy, env, verbose=1)

    # evaluate current model
    mean_reward_before_train = evaluate(model, env, num_steps=eval_timesteps)

    # train model
    global best_mean_reward, n_steps
    best_mean_reward, n_steps = -np.inf, 0
    model.learn(total_timesteps=train_timesteps, callback=None)

    # save model
    print("saving model to:" + save_path)
    model.save(save_path)

    # evaluate post training model
    mean_reward_after_train = evaluate(model, env, num_steps=eval_timesteps)

    # results
    print("reward before training:" + str(mean_reward_before_train))
    print("reward after training:" + str(mean_reward_after_train))
    print("done")
示例#24
0
env.close()

# Curriculum implemented is extremely basic one. (Need to improvise later on)
# Curriculum is a newly made folder. In google drive, read the note
all_arenas = sorted(glob.glob('configs/Curriculum/*.yaml'))
print(all_arenas)

model_name = 'ppo_model_after_bc'
all_frames_vec = [200000, 200000, 100000, 200000, 200000, 200000, 200000, 200000] #no of tsteps per curriculum

for i in range(len(all_arenas)):
    # create arena 
    env = create_env_fn(num_actors = 1, inference=False, config=all_arenas[i], seed=0)
    env = make_vec_env(env, n_envs=4)
    print('####################')
    print("##  Curriculum {} ##".format(i))
    print('####################')

    model = PPO2.load(model_name, env)

    frames_idx = all_frames_vec[i]
    print('{} arena is used for training for {} timesteps'.format(all_arenas[i], frames_idx))
    model.learn(total_timesteps=frames_idx)
    model_name = "ppo_model_after_training_arena_{}".format(i)
    model.save(model_name)
    env.close()

    del model
    del env

print('Training complete!!')
示例#25
0
def eval_trained_policy_and_collect_data(eval_seed, eval_run_num, policy_env, policy_num_timesteps, policy_seed, policy_run_num, additional_note):


    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()
    args.env = policy_env
    args.seed = policy_seed
    args.num_timesteps = policy_num_timesteps
    args.run_num = policy_run_num
    this_run_dir = get_dir_path_for_this_run(args)
    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)



    final_file = get_full_param_traj_file_path(traj_params_dir_name, "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]


    def make_env():
        env_out = gym.make(args.env)
        env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
        env_out.seed(eval_seed)
        return env_out
    env = DummyVecEnv([make_env])
    running_env = env.envs[0].env.env


    set_global_seeds(eval_seed)
    running_env.seed(eval_seed)

    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2", seed=eval_seed)
    model.set_pi_from_flat(final_params)
    if args.normalize:
        env.load_running_average(save_dir)

    # is it necessary?
    running_env = env.venv.envs[0].env.env


    lagrangian_values = {}

    obs = np.zeros((env.num_envs,) + env.observation_space.shape)

    obs[:] = env.reset()

    # env = VecVideoRecorder(env, "./",
    #                            record_video_trigger=lambda x: x == 0, video_length=3000,
    #                            name_prefix="3000000agent-{}".format(args.env))

    #init lagrangian values
    for lagrangian_key in lagrangian_keys:
        flat_array = running_env.get_lagrangian_flat_array(lagrangian_key)
        lagrangian_values[lagrangian_key] = [flat_array]


    neuron_values = model.give_neuron_values(obs)
    raw_layer_values_list = [[neuron_value.reshape((-1,1))] for neuron_value in neuron_values]

    # env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False
    for _ in range(30000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1


        neuron_values = model.give_neuron_values(obs)


        for i, layer in enumerate(neuron_values):
            raw_layer_values_list[i].append(layer.reshape((-1,1)))

        # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values)

        # filling lagrangian values
        for lagrangian_key in lagrangian_keys:
            flat_array = running_env.get_lagrangian_flat_array(lagrangian_key)
            lagrangian_values[lagrangian_key].append(flat_array)

        # env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            obs = env.reset()


    #Hstack into a big matrix
    for lagrangian_key in lagrangian_keys:
        lagrangian_values[lagrangian_key] = np.hstack(lagrangian_values[lagrangian_key])

    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #     for name, l in body_contact_dict.items():
    #         body_contact_dict[name] = np.hstack(body_contact_dict[name])
    input_values = np.hstack(raw_layer_values_list[0])

    layers_values = [np.hstack(layer_list) for layer_list in raw_layer_values_list][1:-2]# drop variance and inputs


    data_dir = get_data_dir(policy_env=args.env, policy_num_timesteps=policy_num_timesteps, policy_run_num=policy_run_num
                            , policy_seed=policy_seed, eval_seed=eval_seed, eval_run_num=eval_run_num, additional_note=additional_note)
    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)


    lagrangian_values_fn = f"{data_dir}/lagrangian.pickle"

    with open(lagrangian_values_fn, 'wb') as handle:
        pickle.dump(lagrangian_values, handle, protocol=pickle.HIGHEST_PROTOCOL)

    input_values_fn = f"{data_dir}/input_values.npy"
    layers_values_fn = f"{data_dir}/layer_values.npy"

    np.save(input_values_fn, input_values)
    np.save(layers_values_fn, layers_values)


    all_weights = model.get_all_weight_values()

    for ind, weights in enumerate(all_weights):
        fname = f"{data_dir}/weights_layer_{ind}.txt"
        np.savetxt(fname, weights)
示例#26
0
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(args)
    plot_dir_alg = get_plot_dir(args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(plot_dir_alg):
        os.makedirs(plot_dir_alg)

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    def make_env():
        env_out = gym.make(args.env)
        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)

        return env_out

    env = DummyVecEnv([make_env])

    # env_out = gym.make(args.env)
    # env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
    if args.normalize:
        env = VecNormalize(env)
    # policy = MlpPolicy
    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10,
    #              ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer)
    model.set_pi_from_flat(final_params)

    if args.normalize:
        env.load_running_average(save_dir)

    sk = env.venv.envs[0].env.env.robot_skeleton
    lagrangian_values = {}

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)

    obs[:] = env.reset()

    # env = VecVideoRecorder(env, "./",
    #                            record_video_trigger=lambda x: x == 0, video_length=3000,
    #                            name_prefix="3000000agent-{}".format(args.env))

    lagrangian_values["M"] = [sk.M.reshape((-1, 1))]
    lagrangian_values["COM"] = [sk.C.reshape((-1, 1))]
    lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))]
    lagrangian_values["q"] = [sk.q.reshape((-1, 1))]
    lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))]

    contact_values = {}

    neuron_values = model.give_neuron_values(obs)
    layer_values_list = [[neuron_value.reshape((-1, 1))]
                         for neuron_value in neuron_values]

    env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False
    for _ in range(3000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1

        neuron_values = model.give_neuron_values(obs)

        for i, layer in enumerate(neuron_values):
            layer_values_list[i].append(layer.reshape((-1, 1)))

        fill_contacts_jac_dict(infos[0]["contacts"],
                               contact_dict=contact_values,
                               neuron_values=neuron_values)

        lagrangian_values["M"].append(sk.M.reshape((-1, 1)))
        lagrangian_values["q"].append(sk.q.reshape((-1, 1)))
        lagrangian_values["dq"].append(sk.dq.reshape((-1, 1)))
        lagrangian_values["COM"].append(sk.C.reshape((-1, 1)))
        lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1)))

        env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            obs = env.reset()

    #Hstack into a big matrix
    lagrangian_values["M"] = np.hstack(lagrangian_values["M"])
    lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"])
    lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"])
    lagrangian_values["q"] = np.hstack(lagrangian_values["q"])
    lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"])

    for contact_body_name, l in contact_values.items():
        body_contact_dict = contact_values[contact_body_name]
        for name, l in body_contact_dict.items():
            body_contact_dict[name] = np.hstack(body_contact_dict[name])

    layer_values_list = [
        np.hstack(layer_list) for layer_list in layer_values_list
    ][1:-2]  # drop variance

    # plt.scatter(lagrangian_values["M"][15], layer_values_list[1][2])
    # plt.scatter(lagrangian_values["M"][11], layer_values_list[0][63])
    out_dir = f"/home/panda-linux/PycharmProjects/low_dim_update_dart/low_dim_update_stable/neuron_vis/plots_{args.env}_{args.num_timesteps}"
    if os.path.exists(out_dir):
        shutil.rmtree(out_dir)
    os.makedirs(out_dir)

    all_weights = model.get_all_weight_values()

    for ind, weights in enumerate(all_weights):
        fname = f"{out_dir}/weights_layer_{ind}.txt"
        np.savetxt(fname, weights)

    PLOT_CUTOFF = steps_to_first_done
    plot_everything(lagrangian_values, layer_values_list, out_dir, PLOT_CUTOFF)
    scatter_the_linear_significant_ones(lagrangian_values,
                                        layer_values_list,
                                        threshold=0.6,
                                        out_dir=out_dir)
    scatter_the_nonlinear_significant_but_not_linear_ones(
        lagrangian_values,
        layer_values_list,
        linear_threshold=0.3,
        nonlinear_threshold=0.6,
        out_dir=out_dir)
    #
    # contact_dicts = {}
    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #
    #
    #     contact_dicts[contact_body_name] = {}
    #
    #     build_dict = contact_dicts[contact_body_name]
    #
    #     build_dict["body"] = {}
    #     build_dict["layer"] = {}
    #     for name, l in body_contact_dict.items():
    #         for i in range(len(l)):
    #
    #             if name == contact_body_name:
    #                 build_dict["body"][f"{contact_body_name}_{i}"] = l[i]
    #             else:
    #                 build_dict["layer"][f"layer_{name}_neuron_{i}"] = l[i]
    #
    #     body_contact_df = pd.DataFrame.from_dict(build_dict["body"], "index")
    #     layer_contact_df = pd.DataFrame.from_dict(build_dict["layer"], "index")

    # body_contact_df.to_csv(f"{data_dir}/{contact_body_name}_contact.txt", sep='\t')
    # layer_contact_df.to_csv(f"{data_dir}/{contact_body_name}_layers.txt", sep='\t')

    # #TO CSV format
    # data_dir = f"/home/panda-linux/PycharmProjects/low_dim_update_dart/mictools/examples/neuron_vis_data{args.env}_time_steps_{args.num_timesteps}"
    # if os.path.exists(data_dir):
    #     shutil.rmtree(data_dir)
    #
    # os.makedirs(data_dir)
    #
    # for contact_body_name, d in contact_dicts.items():
    #
    #     build_dict = d
    #
    #     body_contact_df = pd.DataFrame.from_dict(build_dict["body"], "index")
    #     layer_contact_df = pd.DataFrame.from_dict(build_dict["layer"], "index")
    #
    #     body_contact_df.to_csv(f"{data_dir}/{contact_body_name}_contact.txt", sep='\t')
    #     layer_contact_df.to_csv(f"{data_dir}/{contact_body_name}_layers.txt", sep='\t')
    #
    #
    #
    # neurons_dict = {}
    # for layer_index in range(len(layer_values_list)):
    #     for neuron_index in range(len(layer_values_list[layer_index])):
    #         neurons_dict[f"layer_{layer_index}_neuron_{neuron_index}"] = layer_values_list[layer_index][neuron_index]
    #
    # for i in range(len(lagrangian_values["COM"])):
    #     neurons_dict[f"COM_index_{i}"] = lagrangian_values["COM"][i]
    #
    # neuron_df = pd.DataFrame.from_dict(neurons_dict, "index")
    #
    #
    #
    # lagrangian_dict = {}
    # for k,v in lagrangian_values.items():
    #     for i in range(len(v)):
    #         lagrangian_dict[f"{k}_index_{i}"] = v[i]
    #
    # lagrangian_df = pd.DataFrame.from_dict(lagrangian_dict, "index")
    #
    #
    # neuron_df.to_csv(f"{data_dir}/neurons.txt", sep='\t')
    # lagrangian_df.to_csv(f"{data_dir}/lagrangian.txt", sep='\t')

    # cor = {}
    # best_cor = {}
    # cor["M"] = get_correlations(lagrangian_values["M"], layer_values_list)
    # best_cor["M"] = [np.max(np.abs(cor_m)) for cor_m in cor["M"]]
    #
    #
    # cor["COM"] = get_correlations(lagrangian_values["COM"], layer_values_list)
    # best_cor["COM"] = [np.max(np.abs(cor_m)) for cor_m in cor["COM"]]
    #
    # cor["Coriolis"] = get_correlations(lagrangian_values["Coriolis"], layer_values_list)
    # best_cor["Coriolis"] = [np.max(np.abs(cor_m)) for cor_m in cor["Coriolis"]]
    # best_cor["Coriolis_argmax"] = [np.argmax(np.abs(cor_m)) for cor_m in cor["Coriolis"]]
    #
    #
    #
    #
    # ncor = {}
    # nbest_cor = {}
    # ncor["M"] = get_normalized_correlations(lagrangian_values["M"], layer_values_list)
    # nbest_cor["M"] = [np.max(np.abs(cor_m)) for cor_m in ncor["M"]]
    #
    #
    # ncor["COM"] = get_normalized_correlations(lagrangian_values["COM"], layer_values_list)
    # nbest_cor["COM"] = [np.max(np.abs(cor_m)) for cor_m in ncor["COM"]]
    #
    # ncor["Coriolis"] = get_normalized_correlations(lagrangian_values["Coriolis"], layer_values_list)
    # nbest_cor["Coriolis"] = [np.max(np.abs(cor_m)) for cor_m in ncor["Coriolis"]]
    # nbest_cor["Coriolis_argmax"] = [np.argmax(np.abs(cor_m)) for cor_m in ncor["Coriolis"]]
    #
    #
    #
    #
    #
    # lin_reg = {"perm_1":{}, "perm_2":{}}
    # best_lin_reg = {"perm_1":{}, "perm_2":{}}
    # lin_reg["perm_1"]["M"], best_lin_reg["perm_1"]["M"] = get_results("M", lagrangian_values, layer_values_list, perm_num=1)
    # lin_reg["perm_2"]["M"], best_lin_reg["perm_2"]["M"] = get_results("M", lagrangian_values, layer_values_list, perm_num=2)
    # lin_reg["perm_1"]["COM"], best_lin_reg["perm_1"]["COM"] = get_results("COM", lagrangian_values, layer_values_list, perm_num=1)
    # lin_reg["perm_2"]["COM"], best_lin_reg["perm_2"]["COM"] = get_results("COM", lagrangian_values, layer_values_list, perm_num=2)

    #
    #
    # lin_reg_1["M"] = get_linear_regressions_1_perm(lagrangian_values["M"], layer_values_list)
    # lin_reg_2["M"] = get_linear_regressions_2_perm(lagrangian_values["M"], layer_values_list)
    # best_lin_reg_2["M"] = []
    # for lin_l in lin_reg_2["M"]:
    #     if lin_l == []:
    #         best_lin_reg_2["M"].append([])
    #     else:
    #         best_lin_reg_2["M"].append(lin_l[np.argmin(lin_l[:,0])])
    #
    # best_lin_reg_1["M"] = []
    # for lin_l in lin_reg_1["M"]:
    #     if lin_l == []:
    #         best_lin_reg_1["M"].append([])
    #     else:
    #         best_lin_reg_1["M"].append(lin_l[np.argmin(lin_l[:,0])])
    # best_lin_reg_1["M"] = np.array(best_lin_reg_1["M"])
    # best_lin_reg_2["M"] = np.array(best_lin_reg_2["M"])
    #
    #
    # lin_reg_1["M"].dump("lin_reg_1_M.txt")
    # lin_reg_2["M"].dump("lin_reg_2_M.txt")
    # best_lin_reg_1["M"].dump("best_lin_reg_1_M.txt")
    # best_lin_reg_2["M"].dump("best_lin_reg_2_M.txt")
    #
    # lin_reg_1["COM"] = get_linear_regressions_1_perm(lagrangian_values["COM"], layer_values_list)
    # lin_reg_2["COM"] = get_linear_regressions_2_perm(lagrangian_values["COM"], layer_values_list)
    # best_lin_reg_2["COM"] = []
    # for lin_l in lin_reg_2["COM"]:
    #     if lin_l == []:
    #         best_lin_reg_2["COM"].append([])
    #     else:
    #         best_lin_reg_2["COM"].append(lin_l[np.argmin(lin_l[:, 0])])
    #
    # best_lin_reg_1["COM"] = []
    # for lin_l in lin_reg_1["COM"]:
    #     if lin_l == []:
    #         best_lin_reg_1["COM"].append([])
    #     else:
    #         best_lin_reg_1["COM"].append(lin_l[np.argmin(lin_l[:, 0])])
    #
    #
    # best_lin_reg_1["COM"] = np.array(best_lin_reg_1["M"])
    # best_lin_reg_2["COM"] = np.array(best_lin_reg_2["M"])
    # lin_reg_1["COM"].dump("lin_reg_1_COM.txt")
    # lin_reg_2["COM"].dump("lin_reg_2_COM.txt")
    # best_lin_reg_1["COM"].dump("best_lin_reg_1_COM.txt")
    # best_lin_reg_2["COM"].dump("best_lin_reg_2_COM.txt")

    pass
def load_model(load_dir=LOAD_DIR):
    return PPO2.load(load_dir)