def do_ppo(args, start_theta, parent_this_run_dir, full_space_save_dir):

    """
    Runs the test
    """

    logger.log(f"#######CMA and then PPO TRAIN: {args}")

    this_conti_ppo_run_dir = get_ppo_part(parent_this_run_dir)
    log_dir = get_log_dir(this_conti_ppo_run_dir)
    conti_ppo_save_dir = get_save_dir(this_conti_ppo_run_dir)
    logger.configure(log_dir)

    full_param_traj_dir_path = get_full_params_dir(this_conti_ppo_run_dir)

    if os.path.exists(full_param_traj_dir_path):
        import shutil
        shutil.rmtree(full_param_traj_dir_path)
    os.makedirs(full_param_traj_dir_path)

    if os.path.exists(conti_ppo_save_dir):
        import shutil
        shutil.rmtree(conti_ppo_save_dir)
    os.makedirs(conti_ppo_save_dir)



    def make_env():
        env_out = gym.make(args.env)
        env_out.env.disableViewer = True
        env_out.env.visualize = False
        env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
        return env_out
    env = DummyVecEnv([make_env])
    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{full_space_save_dir}/ppo2")
    model.set_from_flat(start_theta)

    if args.normalize:
        env.load_running_average(full_space_save_dir)
    model.set_env(env)


    run_info = {"run_num": args.run_num,
                "env_id": args.env,
                "full_param_traj_dir_path": full_param_traj_dir_path}

    # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99,
    #              noptepochs=10,
    #              ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer)

    model.tell_run_info(run_info)
    episode_returns = model.learn(total_timesteps=args.ppo_num_timesteps)

    model.save(f"{conti_ppo_save_dir}/ppo2")

    env.save_running_average(conti_ppo_save_dir)
    return episode_returns, full_param_traj_dir_path
示例#2
0
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(args)
    plot_dir_alg = get_plot_dir(args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir,
                                                      params_scope="pi")
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    if not os.path.exists(plot_dir_alg):
        os.makedirs(plot_dir_alg)

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    def make_env():
        env_out = gym.make(args.env)

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])

    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    model.set_pi_from_flat(final_params)

    if args.normalize:
        env.load_running_average(save_dir)

    obz_tensor = model.act_model.fake_input_tensor

    some_neuron = model.act_model.policy_neurons[2][-1]

    grads = tf.gradients(tf.math.negative(some_neuron), obz_tensor)

    grads = list(zip(grads, obz_tensor))

    trainer = tf.train.AdamOptimizer(learning_rate=0.01, epsilon=1e-5)

    train_op = trainer.apply_gradients(grads)
    for i in range(10000):
        obz, _ = model.sess.run([obz_tensor, train_op])
def neuron_values_generator(args, save_dir, pi_theta, eval_timesteps):
    # logger.log(f"#######EVAL: {args}")

    neuron_values_list = []

    def make_env():
        env_out = gym.make(args.env)

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])

    if args.normalize:
        env = VecNormalize(env)

    # policy = MlpPolicy
    # # model = PPO2.load(f"{save_dir}/ppo2") # this also loads V function
    # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10,
    #              ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer)
    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    if pi_theta is not None:
        model.set_pi_from_flat(pi_theta)

    if args.normalize:
        env.load_running_average(save_dir)

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
    obs[:] = env.reset()
    env.render()
    ep_infos = []
    while 1:
        neuron_values, actions, _, _, _ = model.step_with_neurons(obs)
        # neuron_values = model.give_neuron_values(obs)

        # neuron_values_list.append( neuron_values )
        yield neuron_values
        obs, rew, done, infos = env.step(actions)
        env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:

            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            obs = env.reset()
示例#4
0
def visualize_neurons(args, save_dir, pi_theta, eval_timesteps):
    # logger.log(f"#######EVAL: {args}")

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.disableViewer = True
        env_out.env.visualize = False
        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    if pi_theta is not None:
        model.set_pi_from_flat(pi_theta)

    if args.normalize:
        env.load_running_average(save_dir)

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
    obs[:] = env.reset()
    ep_infos = []
    for _ in range(eval_timesteps):
        actions = model.step(obs)[0]
        neuron_values = model.give_neuron_values(obs)

        obs, rew, done, infos = env.step(actions)

        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:
            if pi_theta is None:
                episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
                print(f'episode_rew={episode_rew}')
            obs = env.reset()

    return safe_mean([ep_info['r'] for ep_info in ep_infos])
def visualize_policy_and_collect_COM(
        augment_num_timesteps, top_num_to_include_slice, augment_seed,
        augment_run_num, network_size, policy_env, policy_num_timesteps,
        policy_run_num, policy_seed, eval_seed, eval_run_num, learning_rate,
        additional_note, metric_param):
    result_dir = get_result_dir(policy_env, policy_num_timesteps,
                                policy_run_num, policy_seed, eval_seed,
                                eval_run_num, additional_note, metric_param)
    args = AttributeDict()

    args.normalize = True
    args.num_timesteps = augment_num_timesteps
    args.run_num = augment_run_num
    args.alg = "ppo2"
    args.seed = augment_seed

    logger.log(f"#######VISUALIZE: {args}")
    # non_linear_global_dict
    linear_global_dict, non_linear_global_dict, lagrangian_values, input_values, layers_values, all_weights = read_all_data(
        policy_env,
        policy_num_timesteps,
        policy_run_num,
        policy_seed,
        eval_seed,
        eval_run_num,
        additional_note=additional_note)
    timestamp = get_time_stamp('%Y_%m_%d_%H_%M_%S')
    experiment_label = f"learning_rate_{learning_rate}timestamp_{timestamp}_augment_num_timesteps{augment_num_timesteps}" \
                       f"_top_num_to_include{top_num_to_include_slice.start}_{top_num_to_include_slice.stop}" \
                       f"_augment_seed{augment_seed}_augment_run_num{augment_run_num}_network_size{network_size}" \
                       f"_policy_num_timesteps{policy_num_timesteps}_policy_run_num{policy_run_num}_policy_seed{policy_seed}" \
                       f"_eval_seed{eval_seed}_eval_run_num{eval_run_num}_additional_note_{additional_note}"

    entry_point = 'gym.envs.dart:DartWalker2dEnv_aug_input'

    this_run_dir = get_experiment_path_for_this_run(
        entry_point,
        args.num_timesteps,
        args.run_num,
        args.seed,
        learning_rate=learning_rate,
        top_num_to_include=top_num_to_include_slice,
        result_dir=result_dir,
        network_size=network_size,
        metric_param=metric_param)
    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    aug_plot_dir = get_aug_plot_dir(this_run_dir) + "_vis"

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    args.env = f'{experiment_label}_{entry_point}-v1'
    register(id=args.env,
             entry_point=entry_point,
             max_episode_steps=1000,
             kwargs={
                 'linear_global_dict': linear_global_dict,
                 'non_linear_global_dict': non_linear_global_dict,
                 'top_to_include_slice': top_num_to_include_slice,
                 'aug_plot_dir': aug_plot_dir,
                 "lagrangian_values": lagrangian_values,
                 "layers_values": layers_values
             })

    def make_env():
        env_out = gym.make(args.env)

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    walker_env = env.envs[0].env.env

    walker_env.disableViewer = False

    if args.normalize:
        env = VecNormalize(env)

    set_global_seeds(args.seed)
    walker_env.seed(args.seed)

    model = PPO2.load(f"{save_dir}/ppo2", seed=augment_seed)
    model.set_pi_from_flat(final_params)
    if args.normalize:
        env.load_running_average(save_dir)

    sk = env.venv.envs[0].env.env.robot_skeleton
    lagrangian_values = {}

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)

    obs[:] = env.reset()

    env = VecVideoRecorder(env,
                           aug_plot_dir,
                           record_video_trigger=lambda x: x == 0,
                           video_length=3000,
                           name_prefix="vis_this_policy")

    lagrangian_values["M"] = [sk.M.reshape((-1, 1))]
    lagrangian_values["COM"] = [sk.C.reshape((-1, 1))]
    lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))]
    lagrangian_values["q"] = [sk.q.reshape((-1, 1))]
    lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))]

    contact_values = {}

    neuron_values = model.give_neuron_values(obs)
    raw_layer_values_list = [[neuron_value.reshape((-1, 1))]
                             for neuron_value in neuron_values]

    env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False

    # epi_rew = 0
    for _ in range(3000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        # epi_rew+= rew[0]
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1

        neuron_values = model.give_neuron_values(obs)

        for i, layer in enumerate(neuron_values):
            raw_layer_values_list[i].append(layer.reshape((-1, 1)))

        # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values)

        lagrangian_values["M"].append(sk.M.reshape((-1, 1)))
        lagrangian_values["q"].append(sk.q.reshape((-1, 1)))
        lagrangian_values["dq"].append(sk.dq.reshape((-1, 1)))
        lagrangian_values["COM"].append(sk.C.reshape((-1, 1)))
        lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1)))

        # env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            # print(f'episode_rew={epi_rew}')
            # epi_rew = 0
            obs = env.reset()

    #Hstack into a big matrix
    lagrangian_values["M"] = np.hstack(lagrangian_values["M"])
    lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"])
    lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"])
    lagrangian_values["q"] = np.hstack(lagrangian_values["q"])
    lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"])

    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #     for name, l in body_contact_dict.items():
    #         body_contact_dict[name] = np.hstack(body_contact_dict[name])
    input_values = np.hstack(raw_layer_values_list[0])

    layers_values = [
        np.hstack(layer_list) for layer_list in raw_layer_values_list
    ][1:-2]  # drop variance and inputs

    for i, com in enumerate(lagrangian_values["COM"]):
        plt.figure()
        plt.plot(np.arange(len(com)), com)
        plt.xlabel("time")
        plt.ylabel(f"COM{i}")

        plt.savefig(f"{aug_plot_dir}/COM{i}.jpg")
        plt.close()
示例#6
0
def visualize_policy_and_collect_COM(seed, run_num, policy_env,
                                     policy_num_timesteps, policy_seed,
                                     policy_run_num):

    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()
    args.env = policy_env
    args.seed = policy_seed
    args.num_timesteps = policy_num_timesteps
    args.run_num = policy_run_num
    this_run_dir = get_dir_path_for_this_run(args)
    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.disableViewer = False

        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        env_out.seed(seed)
        return env_out

    env = DummyVecEnv([make_env])

    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2", seed=seed)
    model.set_pi_from_flat(final_params)
    if args.normalize:
        env.load_running_average(save_dir)

    sk = env.venv.envs[0].env.env.robot_skeleton
    lagrangian_values = {}

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)

    obs[:] = env.reset()
    plot_dir = get_plot_dir(policy_env=args.env,
                            policy_num_timesteps=policy_num_timesteps,
                            policy_run_num=policy_run_num,
                            policy_seed=policy_seed,
                            eval_seed=seed,
                            eval_run_num=run_num,
                            additional_note="")
    if os.path.exists(plot_dir):
        shutil.rmtree(plot_dir)
    os.makedirs(plot_dir)
    env = VecVideoRecorder(env,
                           plot_dir,
                           record_video_trigger=lambda x: x == 0,
                           video_length=3000,
                           name_prefix="3000000agent-{}".format(args.env))

    lagrangian_values["M"] = [sk.M.reshape((-1, 1))]
    lagrangian_values["COM"] = [sk.C.reshape((-1, 1))]
    lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))]
    lagrangian_values["q"] = [sk.q.reshape((-1, 1))]
    lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))]

    contact_values = {}

    neuron_values = model.give_neuron_values(obs)
    raw_layer_values_list = [[neuron_value.reshape((-1, 1))]
                             for neuron_value in neuron_values]

    env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False

    # epi_rew = 0
    for _ in range(3000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        # epi_rew+= rew[0]
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1

        neuron_values = model.give_neuron_values(obs)

        for i, layer in enumerate(neuron_values):
            raw_layer_values_list[i].append(layer.reshape((-1, 1)))

        # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values)

        lagrangian_values["M"].append(sk.M.reshape((-1, 1)))
        lagrangian_values["q"].append(sk.q.reshape((-1, 1)))
        lagrangian_values["dq"].append(sk.dq.reshape((-1, 1)))
        lagrangian_values["COM"].append(sk.C.reshape((-1, 1)))
        lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1)))

        # env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            # print(f'episode_rew={epi_rew}')
            # epi_rew = 0
            obs = env.reset()

    #Hstack into a big matrix
    lagrangian_values["M"] = np.hstack(lagrangian_values["M"])
    lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"])
    lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"])
    lagrangian_values["q"] = np.hstack(lagrangian_values["q"])
    lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"])

    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #     for name, l in body_contact_dict.items():
    #         body_contact_dict[name] = np.hstack(body_contact_dict[name])
    input_values = np.hstack(raw_layer_values_list[0])

    layers_values = [
        np.hstack(layer_list) for layer_list in raw_layer_values_list
    ][1:-2]  # drop variance and inputs

    for i, com in enumerate(lagrangian_values["COM"]):
        plt.figure()
        plt.plot(np.arange(len(com)), com)
        plt.xlabel("time")
        plt.ylabel(f"COM{i}")

        plt.savefig(f"{plot_dir}/COM{i}.jpg")
        plt.close()
示例#7
0
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(args)
    plot_dir_alg = get_plot_dir(args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(plot_dir_alg):
        os.makedirs(plot_dir_alg)

    final_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    def make_env():
        env_out = gym.make(args.env)
        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)

        return env_out

    env = DummyVecEnv([make_env])

    # env_out = gym.make(args.env)
    # env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
    if args.normalize:
        env = VecNormalize(env)
    # policy = MlpPolicy
    model = PPO2.load(f"{save_dir}/ppo2")  # this also loads V function
    # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10,
    #              ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer)
    model.set_pi_from_flat(final_params)

    if args.normalize:
        env.load_running_average(save_dir)

    sk = env.venv.envs[0].env.env.robot_skeleton
    lagrangian_values = {}

    obs = np.zeros((env.num_envs, ) + env.observation_space.shape)

    obs[:] = env.reset()

    # env = VecVideoRecorder(env, "./",
    #                            record_video_trigger=lambda x: x == 0, video_length=3000,
    #                            name_prefix="3000000agent-{}".format(args.env))

    lagrangian_values["M"] = [sk.M.reshape((-1, 1))]
    lagrangian_values["COM"] = [sk.C.reshape((-1, 1))]
    lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))]
    lagrangian_values["q"] = [sk.q.reshape((-1, 1))]
    lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))]

    contact_values = {}

    neuron_values = model.give_neuron_values(obs)
    layer_values_list = [[neuron_value.reshape((-1, 1))]
                         for neuron_value in neuron_values]

    env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False
    for _ in range(3000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1

        neuron_values = model.give_neuron_values(obs)

        for i, layer in enumerate(neuron_values):
            layer_values_list[i].append(layer.reshape((-1, 1)))

        fill_contacts_jac_dict(infos[0]["contacts"],
                               contact_dict=contact_values,
                               neuron_values=neuron_values)

        lagrangian_values["M"].append(sk.M.reshape((-1, 1)))
        lagrangian_values["q"].append(sk.q.reshape((-1, 1)))
        lagrangian_values["dq"].append(sk.dq.reshape((-1, 1)))
        lagrangian_values["COM"].append(sk.C.reshape((-1, 1)))
        lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1)))

        env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            obs = env.reset()

    #Hstack into a big matrix
    lagrangian_values["M"] = np.hstack(lagrangian_values["M"])
    lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"])
    lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"])
    lagrangian_values["q"] = np.hstack(lagrangian_values["q"])
    lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"])

    for contact_body_name, l in contact_values.items():
        body_contact_dict = contact_values[contact_body_name]
        for name, l in body_contact_dict.items():
            body_contact_dict[name] = np.hstack(body_contact_dict[name])

    layer_values_list = [
        np.hstack(layer_list) for layer_list in layer_values_list
    ][1:-2]  # drop variance

    # plt.scatter(lagrangian_values["M"][15], layer_values_list[1][2])
    # plt.scatter(lagrangian_values["M"][11], layer_values_list[0][63])
    out_dir = f"/home/panda-linux/PycharmProjects/low_dim_update_dart/low_dim_update_stable/neuron_vis/plots_{args.env}_{args.num_timesteps}"
    if os.path.exists(out_dir):
        shutil.rmtree(out_dir)
    os.makedirs(out_dir)

    all_weights = model.get_all_weight_values()

    for ind, weights in enumerate(all_weights):
        fname = f"{out_dir}/weights_layer_{ind}.txt"
        np.savetxt(fname, weights)

    PLOT_CUTOFF = steps_to_first_done
    plot_everything(lagrangian_values, layer_values_list, out_dir, PLOT_CUTOFF)
    scatter_the_linear_significant_ones(lagrangian_values,
                                        layer_values_list,
                                        threshold=0.6,
                                        out_dir=out_dir)
    scatter_the_nonlinear_significant_but_not_linear_ones(
        lagrangian_values,
        layer_values_list,
        linear_threshold=0.3,
        nonlinear_threshold=0.6,
        out_dir=out_dir)
    #
    # contact_dicts = {}
    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #
    #
    #     contact_dicts[contact_body_name] = {}
    #
    #     build_dict = contact_dicts[contact_body_name]
    #
    #     build_dict["body"] = {}
    #     build_dict["layer"] = {}
    #     for name, l in body_contact_dict.items():
    #         for i in range(len(l)):
    #
    #             if name == contact_body_name:
    #                 build_dict["body"][f"{contact_body_name}_{i}"] = l[i]
    #             else:
    #                 build_dict["layer"][f"layer_{name}_neuron_{i}"] = l[i]
    #
    #     body_contact_df = pd.DataFrame.from_dict(build_dict["body"], "index")
    #     layer_contact_df = pd.DataFrame.from_dict(build_dict["layer"], "index")

    # body_contact_df.to_csv(f"{data_dir}/{contact_body_name}_contact.txt", sep='\t')
    # layer_contact_df.to_csv(f"{data_dir}/{contact_body_name}_layers.txt", sep='\t')

    # #TO CSV format
    # data_dir = f"/home/panda-linux/PycharmProjects/low_dim_update_dart/mictools/examples/neuron_vis_data{args.env}_time_steps_{args.num_timesteps}"
    # if os.path.exists(data_dir):
    #     shutil.rmtree(data_dir)
    #
    # os.makedirs(data_dir)
    #
    # for contact_body_name, d in contact_dicts.items():
    #
    #     build_dict = d
    #
    #     body_contact_df = pd.DataFrame.from_dict(build_dict["body"], "index")
    #     layer_contact_df = pd.DataFrame.from_dict(build_dict["layer"], "index")
    #
    #     body_contact_df.to_csv(f"{data_dir}/{contact_body_name}_contact.txt", sep='\t')
    #     layer_contact_df.to_csv(f"{data_dir}/{contact_body_name}_layers.txt", sep='\t')
    #
    #
    #
    # neurons_dict = {}
    # for layer_index in range(len(layer_values_list)):
    #     for neuron_index in range(len(layer_values_list[layer_index])):
    #         neurons_dict[f"layer_{layer_index}_neuron_{neuron_index}"] = layer_values_list[layer_index][neuron_index]
    #
    # for i in range(len(lagrangian_values["COM"])):
    #     neurons_dict[f"COM_index_{i}"] = lagrangian_values["COM"][i]
    #
    # neuron_df = pd.DataFrame.from_dict(neurons_dict, "index")
    #
    #
    #
    # lagrangian_dict = {}
    # for k,v in lagrangian_values.items():
    #     for i in range(len(v)):
    #         lagrangian_dict[f"{k}_index_{i}"] = v[i]
    #
    # lagrangian_df = pd.DataFrame.from_dict(lagrangian_dict, "index")
    #
    #
    # neuron_df.to_csv(f"{data_dir}/neurons.txt", sep='\t')
    # lagrangian_df.to_csv(f"{data_dir}/lagrangian.txt", sep='\t')

    # cor = {}
    # best_cor = {}
    # cor["M"] = get_correlations(lagrangian_values["M"], layer_values_list)
    # best_cor["M"] = [np.max(np.abs(cor_m)) for cor_m in cor["M"]]
    #
    #
    # cor["COM"] = get_correlations(lagrangian_values["COM"], layer_values_list)
    # best_cor["COM"] = [np.max(np.abs(cor_m)) for cor_m in cor["COM"]]
    #
    # cor["Coriolis"] = get_correlations(lagrangian_values["Coriolis"], layer_values_list)
    # best_cor["Coriolis"] = [np.max(np.abs(cor_m)) for cor_m in cor["Coriolis"]]
    # best_cor["Coriolis_argmax"] = [np.argmax(np.abs(cor_m)) for cor_m in cor["Coriolis"]]
    #
    #
    #
    #
    # ncor = {}
    # nbest_cor = {}
    # ncor["M"] = get_normalized_correlations(lagrangian_values["M"], layer_values_list)
    # nbest_cor["M"] = [np.max(np.abs(cor_m)) for cor_m in ncor["M"]]
    #
    #
    # ncor["COM"] = get_normalized_correlations(lagrangian_values["COM"], layer_values_list)
    # nbest_cor["COM"] = [np.max(np.abs(cor_m)) for cor_m in ncor["COM"]]
    #
    # ncor["Coriolis"] = get_normalized_correlations(lagrangian_values["Coriolis"], layer_values_list)
    # nbest_cor["Coriolis"] = [np.max(np.abs(cor_m)) for cor_m in ncor["Coriolis"]]
    # nbest_cor["Coriolis_argmax"] = [np.argmax(np.abs(cor_m)) for cor_m in ncor["Coriolis"]]
    #
    #
    #
    #
    #
    # lin_reg = {"perm_1":{}, "perm_2":{}}
    # best_lin_reg = {"perm_1":{}, "perm_2":{}}
    # lin_reg["perm_1"]["M"], best_lin_reg["perm_1"]["M"] = get_results("M", lagrangian_values, layer_values_list, perm_num=1)
    # lin_reg["perm_2"]["M"], best_lin_reg["perm_2"]["M"] = get_results("M", lagrangian_values, layer_values_list, perm_num=2)
    # lin_reg["perm_1"]["COM"], best_lin_reg["perm_1"]["COM"] = get_results("COM", lagrangian_values, layer_values_list, perm_num=1)
    # lin_reg["perm_2"]["COM"], best_lin_reg["perm_2"]["COM"] = get_results("COM", lagrangian_values, layer_values_list, perm_num=2)

    #
    #
    # lin_reg_1["M"] = get_linear_regressions_1_perm(lagrangian_values["M"], layer_values_list)
    # lin_reg_2["M"] = get_linear_regressions_2_perm(lagrangian_values["M"], layer_values_list)
    # best_lin_reg_2["M"] = []
    # for lin_l in lin_reg_2["M"]:
    #     if lin_l == []:
    #         best_lin_reg_2["M"].append([])
    #     else:
    #         best_lin_reg_2["M"].append(lin_l[np.argmin(lin_l[:,0])])
    #
    # best_lin_reg_1["M"] = []
    # for lin_l in lin_reg_1["M"]:
    #     if lin_l == []:
    #         best_lin_reg_1["M"].append([])
    #     else:
    #         best_lin_reg_1["M"].append(lin_l[np.argmin(lin_l[:,0])])
    # best_lin_reg_1["M"] = np.array(best_lin_reg_1["M"])
    # best_lin_reg_2["M"] = np.array(best_lin_reg_2["M"])
    #
    #
    # lin_reg_1["M"].dump("lin_reg_1_M.txt")
    # lin_reg_2["M"].dump("lin_reg_2_M.txt")
    # best_lin_reg_1["M"].dump("best_lin_reg_1_M.txt")
    # best_lin_reg_2["M"].dump("best_lin_reg_2_M.txt")
    #
    # lin_reg_1["COM"] = get_linear_regressions_1_perm(lagrangian_values["COM"], layer_values_list)
    # lin_reg_2["COM"] = get_linear_regressions_2_perm(lagrangian_values["COM"], layer_values_list)
    # best_lin_reg_2["COM"] = []
    # for lin_l in lin_reg_2["COM"]:
    #     if lin_l == []:
    #         best_lin_reg_2["COM"].append([])
    #     else:
    #         best_lin_reg_2["COM"].append(lin_l[np.argmin(lin_l[:, 0])])
    #
    # best_lin_reg_1["COM"] = []
    # for lin_l in lin_reg_1["COM"]:
    #     if lin_l == []:
    #         best_lin_reg_1["COM"].append([])
    #     else:
    #         best_lin_reg_1["COM"].append(lin_l[np.argmin(lin_l[:, 0])])
    #
    #
    # best_lin_reg_1["COM"] = np.array(best_lin_reg_1["M"])
    # best_lin_reg_2["COM"] = np.array(best_lin_reg_2["M"])
    # lin_reg_1["COM"].dump("lin_reg_1_COM.txt")
    # lin_reg_2["COM"].dump("lin_reg_2_COM.txt")
    # best_lin_reg_1["COM"].dump("best_lin_reg_1_COM.txt")
    # best_lin_reg_2["COM"].dump("best_lin_reg_2_COM.txt")

    pass
示例#8
0
def eval_trained_policy_and_collect_data(eval_seed, eval_run_num, policy_env, policy_num_timesteps, policy_seed, policy_run_num, additional_note):


    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    args, cma_unknown_args = common_arg_parser.parse_known_args()
    args.env = policy_env
    args.seed = policy_seed
    args.num_timesteps = policy_num_timesteps
    args.run_num = policy_run_num
    this_run_dir = get_dir_path_for_this_run(args)
    traj_params_dir_name = get_full_params_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)



    final_file = get_full_param_traj_file_path(traj_params_dir_name, "pi_final")
    final_params = pd.read_csv(final_file, header=None).values[0]


    def make_env():
        env_out = gym.make(args.env)
        env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True)
        env_out.seed(eval_seed)
        return env_out
    env = DummyVecEnv([make_env])
    running_env = env.envs[0].env.env


    set_global_seeds(eval_seed)
    running_env.seed(eval_seed)

    if args.normalize:
        env = VecNormalize(env)

    model = PPO2.load(f"{save_dir}/ppo2", seed=eval_seed)
    model.set_pi_from_flat(final_params)
    if args.normalize:
        env.load_running_average(save_dir)

    # is it necessary?
    running_env = env.venv.envs[0].env.env


    lagrangian_values = {}

    obs = np.zeros((env.num_envs,) + env.observation_space.shape)

    obs[:] = env.reset()

    # env = VecVideoRecorder(env, "./",
    #                            record_video_trigger=lambda x: x == 0, video_length=3000,
    #                            name_prefix="3000000agent-{}".format(args.env))

    #init lagrangian values
    for lagrangian_key in lagrangian_keys:
        flat_array = running_env.get_lagrangian_flat_array(lagrangian_key)
        lagrangian_values[lagrangian_key] = [flat_array]


    neuron_values = model.give_neuron_values(obs)
    raw_layer_values_list = [[neuron_value.reshape((-1,1))] for neuron_value in neuron_values]

    # env.render()
    ep_infos = []
    steps_to_first_done = 0
    first_done = False
    for _ in range(30000):
        actions = model.step(obs)[0]

        # yield neuron_values
        obs, rew, done, infos = env.step(actions)
        if done and not first_done:
            first_done = True

        if not first_done:
            steps_to_first_done += 1


        neuron_values = model.give_neuron_values(obs)


        for i, layer in enumerate(neuron_values):
            raw_layer_values_list[i].append(layer.reshape((-1,1)))

        # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values)

        # filling lagrangian values
        for lagrangian_key in lagrangian_keys:
            flat_array = running_env.get_lagrangian_flat_array(lagrangian_key)
            lagrangian_values[lagrangian_key].append(flat_array)

        # env.render()

        # time.sleep(1)
        for info in infos:
            maybe_ep_info = info.get('episode')
            if maybe_ep_info is not None:
                ep_infos.append(maybe_ep_info)

        # env.render()
        done = done.any()
        if done:
            episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos])
            print(f'episode_rew={episode_rew}')
            obs = env.reset()


    #Hstack into a big matrix
    for lagrangian_key in lagrangian_keys:
        lagrangian_values[lagrangian_key] = np.hstack(lagrangian_values[lagrangian_key])

    # for contact_body_name, l in contact_values.items():
    #     body_contact_dict = contact_values[contact_body_name]
    #     for name, l in body_contact_dict.items():
    #         body_contact_dict[name] = np.hstack(body_contact_dict[name])
    input_values = np.hstack(raw_layer_values_list[0])

    layers_values = [np.hstack(layer_list) for layer_list in raw_layer_values_list][1:-2]# drop variance and inputs


    data_dir = get_data_dir(policy_env=args.env, policy_num_timesteps=policy_num_timesteps, policy_run_num=policy_run_num
                            , policy_seed=policy_seed, eval_seed=eval_seed, eval_run_num=eval_run_num, additional_note=additional_note)
    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)


    lagrangian_values_fn = f"{data_dir}/lagrangian.pickle"

    with open(lagrangian_values_fn, 'wb') as handle:
        pickle.dump(lagrangian_values, handle, protocol=pickle.HIGHEST_PROTOCOL)

    input_values_fn = f"{data_dir}/input_values.npy"
    layers_values_fn = f"{data_dir}/layer_values.npy"

    np.save(input_values_fn, input_values)
    np.save(layers_values_fn, layers_values)


    all_weights = model.get_all_weight_values()

    for ind, weights in enumerate(all_weights):
        fname = f"{data_dir}/weights_layer_{ind}.txt"
        np.savetxt(fname, weights)
def run_experiment(augment_num_timesteps,
                   top_num_to_include_slice,
                   augment_seed,
                   augment_run_num,
                   network_size,
                   policy_env,
                   policy_num_timesteps,
                   policy_run_num,
                   policy_seed,
                   eval_seed,
                   eval_run_num,
                   learning_rate,
                   additional_note,
                   result_dir,
                   keys_to_include,
                   metric_param,
                   linear_top_vars_list=None,
                   linear_correlation_neuron_list=None,
                   visualize=False):

    args = AttributeDict()

    args.normalize = True
    args.num_timesteps = augment_num_timesteps
    args.run_num = augment_run_num
    args.alg = "ppo2"
    args.seed = augment_seed

    logger.log(f"#######TRAIN: {args}")
    # non_linear_global_dict
    timestamp = get_time_stamp('%Y_%m_%d_%H_%M_%S')
    experiment_label = f"learning_rate_{learning_rate}timestamp_{timestamp}_augment_num_timesteps{augment_num_timesteps}" \
                       f"_top_num_to_include{top_num_to_include_slice.start}_{top_num_to_include_slice.stop}" \
                       f"_augment_seed{augment_seed}_augment_run_num{augment_run_num}_network_size{network_size}" \
                       f"_policy_num_timesteps{policy_num_timesteps}_policy_run_num{policy_run_num}_policy_seed{policy_seed}" \
                       f"_eval_seed{eval_seed}_eval_run_num{eval_run_num}_additional_note_{additional_note}"

    if policy_env == "DartWalker2d-v1":
        entry_point = 'gym.envs.dart:DartWalker2dEnv_aug_input'
    elif policy_env == "DartHopper-v1":
        entry_point = 'gym.envs.dart:DartHopperEnv_aug_input'
    elif policy_env == "DartHalfCheetah-v1":
        entry_point = 'gym.envs.dart:DartHalfCheetahEnv_aug_input'
    elif policy_env == "DartSnake7Link-v1":
        entry_point = 'gym.envs.dart:DartSnake7LinkEnv_aug_input'
    else:
        raise NotImplemented()

    this_run_dir = get_experiment_path_for_this_run(
        entry_point,
        args.num_timesteps,
        args.run_num,
        args.seed,
        learning_rate=learning_rate,
        top_num_to_include=top_num_to_include_slice,
        result_dir=result_dir,
        network_size=network_size)
    full_param_traj_dir_path = get_full_params_dir(this_run_dir)
    log_dir = get_log_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)

    if visualize:
        vis_dir = get_aug_plot_dir(this_run_dir) + "_vis"
        final_file = get_full_param_traj_file_path(full_param_traj_dir_path,
                                                   "pi_final")
        final_params = pd.read_csv(final_file, header=None).values[0]
    else:
        create_dir_remove(this_run_dir)
        create_dir_remove(full_param_traj_dir_path)
        create_dir_remove(save_dir)
        create_dir_remove(log_dir)
        logger.configure(log_dir)

    # note this is only linear
    if linear_top_vars_list is None:

        linear_top_vars_list = read_linear_top_var(policy_env,
                                                   policy_num_timesteps,
                                                   policy_run_num,
                                                   policy_seed,
                                                   eval_seed,
                                                   eval_run_num,
                                                   additional_note,
                                                   metric_param=metric_param)

    lagrangian_inds_to_include = get_wanted_lagrangians(
        keys_to_include, linear_top_vars_list, top_num_to_include_slice)

    with open(f"{log_dir}/lagrangian_inds_to_include.json", 'w') as fp:
        json.dump(lagrangian_inds_to_include, fp)

    args.env = f'{experiment_label}_{entry_point}-v1'
    register(id=args.env,
             entry_point=entry_point,
             max_episode_steps=1000,
             kwargs={"lagrangian_inds_to_include": lagrangian_inds_to_include})

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.visualize = visualize
        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    walker_env = env.envs[0].env.env
    walker_env.disableViewer = not visualize

    if args.normalize:
        env = VecNormalize(env)
    policy = MlpPolicy

    set_global_seeds(args.seed)
    walker_env.seed(args.seed)

    num_dof = walker_env.robot_skeleton.ndofs
    show_M_matrix(num_dof, lagrangian_inds_to_include,
                  top_num_to_include_slice, log_dir)

    if visualize:
        model = PPO2.load(f"{save_dir}/ppo2", seed=augment_seed)
        model.set_pi_from_flat(final_params)
        if args.normalize:
            env.load_running_average(save_dir)

        run_model(model=model, env=env, vedio_dir=vis_dir)
    else:
        # extra run info I added for my purposes
        run_info = {
            "run_num": args.run_num,
            "env_id": args.env,
            "full_param_traj_dir_path": full_param_traj_dir_path
        }

        layers = [network_size, network_size]
        policy_kwargs = {"net_arch": [dict(vf=layers, pi=layers)]}
        model = PPO2(policy=policy,
                     env=env,
                     n_steps=4096,
                     nminibatches=64,
                     lam=0.95,
                     gamma=0.99,
                     noptepochs=10,
                     ent_coef=0.0,
                     learning_rate=learning_rate,
                     cliprange=0.2,
                     optimizer='adam',
                     policy_kwargs=policy_kwargs,
                     seed=args.seed)
        model.tell_run_info(run_info)
        model.learn(total_timesteps=args.num_timesteps, seed=args.seed)

        model.save(f"{save_dir}/ppo2")

        if args.normalize:
            env.save_running_average(save_dir)

    return log_dir