示例#1
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    if ARGS.Seed:
        seed = ARGS.Seed

    max_timesteps = 4e6
    file_name = "spot_ars_"

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRender:
        render = False
    else:
        render = True

    if ARGS.DontRandomize:
        env_randomizer = None
    else:
        env_randomizer = SpotEnvRandomizer()

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=render,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path,
                        contacts=contacts,
                        env_randomizer=env_randomizer)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # to GUI or not to GUI
    if ARGS.GUI:
        gui = True
    else:
        gui = False

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui)
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = np.inf
        policy = agent.policy

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    t = 0
    while t < (int(max_timesteps)):

        episode_reward, episode_timesteps = agent.deployTG()

        t += episode_timesteps
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, episode_timesteps, episode_reward))
        episode_num += 1

        # Plot Policy Output
        if ARGS.PlotPolicy or ARGS.TrueAction or ARGS.SaveData:
            if ARGS.TrueAction:
                action_name = "robot_act"
                action = np.array(agent.true_action_history)
            else:
                action_name = "agent_act"
                action = np.array(agent.action_history)

            if ARGS.SaveData:
                if height_field:
                    terrain_name = "rough_"
                else:
                    terrain_name = "flat_"
                np.save(
                    results_path + "/" + "policy_out_" + terrain_name +
                    action_name, action)

                print("SAVED DATA")

            ClearHeight_act = action[:, 0]
            BodyHeight_act = action[:, 1]
            Residuals_act = action[:, 2:]

            plt.plot(ClearHeight_act,
                     label='Clearance Height Mod',
                     color='black')
            plt.plot(BodyHeight_act,
                     label='Body Height Mod',
                     color='darkviolet')

            # FL
            plt.plot(Residuals_act[:, 0],
                     label='Residual: FL (x)',
                     color='limegreen')
            plt.plot(Residuals_act[:, 1],
                     label='Residual: FL (y)',
                     color='lime')
            plt.plot(Residuals_act[:, 2],
                     label='Residual: FL (z)',
                     color='green')

            # FR
            plt.plot(Residuals_act[:, 3],
                     label='Residual: FR (x)',
                     color='lightskyblue')
            plt.plot(Residuals_act[:, 4],
                     label='Residual: FR (y)',
                     color='dodgerblue')
            plt.plot(Residuals_act[:, 5],
                     label='Residual: FR (z)',
                     color='blue')

            # BL
            plt.plot(Residuals_act[:, 6],
                     label='Residual: BL (x)',
                     color='firebrick')
            plt.plot(Residuals_act[:, 7],
                     label='Residual: BL (y)',
                     color='crimson')
            plt.plot(Residuals_act[:, 8],
                     label='Residual: BL (z)',
                     color='red')

            # BR
            plt.plot(Residuals_act[:, 9],
                     label='Residual: BR (x)',
                     color='gold')
            plt.plot(Residuals_act[:, 10],
                     label='Residual: BR (y)',
                     color='orange')
            plt.plot(Residuals_act[:, 11],
                     label='Residual: BR (z)',
                     color='coral')

            plt.xlabel("Epoch Iteration")
            plt.ylabel("Action Value")
            plt.title("Policy Output")
            plt.legend()
            plt.show()

    env.close()
示例#2
0
def main():
    """ The main() function. """

    # Hold mp pipes
    mp.freeze_support()

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    eval_freq = 1e1
    save_model = True
    file_name = "mini_tg_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=False)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    # TRAJECTORY GENERATOR
    movetype = "walk"
    # movetype = "trot"
    # movetype = "bound"
    # movetype = "pace"
    # movetype = "pronk"
    TG = TGPolicy(movetype=movetype,
                  center_swing=0.0,
                  amplitude_extension=0.6,
                  amplitude_lift=0.3)
    TG_state_dim = len(TG.get_TG_state())
    TG_action_dim = 5  # f_tg, alpha_tg, h_tg, Beta, Intensity
    state_dim = env.observation_space.shape[0] + TG_state_dim
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0] + TG_action_dim
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, TGP=TG)
    agent_num = 0
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))

    # Evaluate untrained agent and init list for storage
    evaluations = []

    env.reset(agent.desired_velocity, agent.desired_rate)
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    # MULTIPROCESSING

    # Create mp pipes
    num_processes = policy.num_deltas
    processes = []
    childPipes = []
    parentPipes = []

    # Store mp pipes
    for pr in range(num_processes):
        parentPipe, childPipe = Pipe()
        parentPipes.append(parentPipe)
        childPipes.append(childPipe)

    # Start multiprocessing
    for proc_num in range(num_processes):
        p = mp.Process(target=ParallelWorker,
                       args=(childPipes[proc_num], env, state_dim))
        p.start()
        processes.append(p)

    print("STARTED MINITAUR ARS")

    t = 0
    while t < (int(max_timesteps)):

        # Maximum timesteps per rollout
        t += policy.episode_steps

        episode_timesteps += 1

        episode_reward = agent.train_parallel(parentPipes)
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}, >400: {}".
              format(t, episode_num, policy.episode_steps, episode_reward,
                     agent.successes))
        # Reset environment
        evaluations.append(episode_reward)
        episode_reward = 0
        episode_timesteps = 0

        # Evaluate episode
        if (episode_num + 1) % eval_freq == 0:
            # evaluate_agent(agent, env_name, seed,
            np.save(results_path + "/" + str(file_name), evaluations)
            if save_model:
                agent.save(models_path + "/" + str(file_name) +
                           str(episode_num))
                # replay_buffer.save(t)

        episode_num += 1

    # Close pipes and hence envs
    for parentPipe in parentPipes:
        parentPipe.send([_CLOSE, "pay2"])

    for p in processes:
        p.join()
示例#3
0
def main():
    """ The main() function. """
    # Hold mp pipes
    mp.freeze_support()

    print("STARTING SPOT TRAINING ENV")
    seed = 0
    max_timesteps = 4e6
    eval_freq = 1e1
    save_model = True
    file_name = "spot_ars_"

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=height_field,
                        draw_foot_path=False,
                        contacts=contacts)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    g_u_i = GUI(env.spot.quadruped)

    spot = SpotModel()
    T_bf = spot.WorldToFoot

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot)
    agent_num = 0
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))

    env.reset(agent.desired_velocity, agent.desired_rate)

    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    # Create mp pipes
    num_processes = policy.num_deltas
    processes = []
    childPipes = []
    parentPipes = []

    # Store mp pipes
    for pr in range(num_processes):
        parentPipe, childPipe = Pipe()
        parentPipes.append(parentPipe)
        childPipes.append(childPipe)

    # Start multiprocessing
    # Start multiprocessing
    for proc_num in range(num_processes):
        p = mp.Process(target=ParallelWorker,
                       args=(childPipes[proc_num], env, state_dim))
        p.start()
        processes.append(p)

    print("STARTED SPOT TRAINING ENV")
    t = 0
    while t < (int(max_timesteps)):

        # Maximum timesteps per rollout

        episode_reward, episode_timesteps = agent.train_parallel(parentPipes)
        t += episode_timesteps
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print(
            "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}"
            .format(t + 1, episode_num, episode_timesteps, episode_reward,
                    episode_reward / float(episode_timesteps)))

        # Evaluate episode
        if (episode_num + 1) % eval_freq == 0:
            if save_model:
                agent.save(models_path + "/" + str(file_name) +
                           str(episode_num))
                # replay_buffer.save(t)

        episode_num += 1

    # Close pipes and hence envs
    for parentPipe in parentPipes:
        parentPipe.send([_CLOSE, "pay2"])

    for p in processes:
        p.join()
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "spot_ars_"

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRender:
        render = False
    else:
        render = True

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=render,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path,
                        contacts=contacts)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # to GUI or not to GUI
    if ARGS.GUI:
        gui = True
    else:
        gui = False

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui)
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = np.inf
        policy = agent.policy

    # Evaluate untrained agent and init list for storage
    evaluations = []

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    t = 0
    while t < (int(max_timesteps)):

        episode_reward, episode_timesteps = agent.deployTG()

        t += episode_timesteps
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, episode_timesteps, episode_reward))
        episode_num += 1

    env.close()
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "spot_ars_"

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRender:
        render = False
    else:
        render = True

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=render,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path,
                        contacts=contacts)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # to GUI or not to GUI
    if ARGS.GUI:
        gui = True
    else:
        gui = False

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui)
    agent_num = 9

    still_going = True

    print("Loading and Saving")

    while still_going:
        if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                          "_policy"):
            print("Loading Existing agent: {}".format(agent_num))
            # Load Class
            agent.load(models_path + "/" + file_name + str(agent_num))
            # Save np array
            agent.save(models_path + "/" + file_name + str(agent_num))
        else:
            still_going = False

        agent_num += 10
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_episodes = 1000
    if ARGS.NumberOfEpisodes:
        max_episodes = ARGS.NumberOfEpisodes
    file_name = "spot_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=height_field,
                        draw_foot_path=False)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim, episode_steps=np.inf)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False)
    use_agent = False
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
        use_agent = True
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 50000
        policy = agent.policy

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    # Used to create gaussian distribution of survival
    surv_dt = []

    while episode_num < (int(max_episodes)):

        episode_reward, episode_timesteps = agent.deployTG()
        episode_num += 1

        # Store dt and frequency for prob distribution
        surv_dt.append(episode_timesteps)

        print("Episode Num: {} Episode T: {} Reward: {}".format(
            episode_num, episode_timesteps, episode_reward))

    env.close()
    print("---------------------------------------")

    # Store results
    if use_agent:
        # Store _agent
        agt = "agent"
    else:
        # Store _vanilla
        agt = "vanilla"

    with open(
            results_path + "/" + str(file_name) + agt + '_survival_' +
            str(max_episodes), 'wb') as filehandle:
        pickle.dump(surv_dt, filehandle)
示例#7
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "mini_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=True)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env)
    agent_num = raw_input("Policy Number: ")
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 3000
        policy = agent.policy

    # Evaluate untrained agent and init list for storage
    evaluations = []

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    t = 0
    while t < (int(max_timesteps)):

        # Maximum timesteps per rollout
        t += policy.episode_steps

        episode_timesteps += 1

        episode_reward = agent.deploy()
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, policy.episode_steps, episode_reward))
        # Reset environment
        evaluations.append(episode_reward)
        episode_reward = 0
        episode_timesteps = 0
        episode_num += 1

    env.close()
示例#8
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_episodes = 1000
    if ARGS.NumberOfEpisodes:
        max_episodes = ARGS.NumberOfEpisodes
    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRandomize:
        env_randomizer = None
    else:
        env_randomizer = SpotEnvRandomizer()

    file_name = "spot_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=height_field,
                        draw_foot_path=False,
                        contacts=contacts,
                        env_randomizer=env_randomizer)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim, episode_steps=np.inf)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False)
    use_agent = False
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
        use_agent = True
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 50000
        policy = agent.policy

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    # Used to create gaussian distribution of survival distance
    surv_pos = []

    # Reset every 200 episodes (pb client doesn't like running for long)
    reset_ep = 200

    while episode_num < (int(max_episodes)):

        episode_reward, episode_timesteps = agent.deployTG()
        # We only care about x/y pos
        travelled_pos = list(agent.returnPose())
        # NOTE: FORMAT: X, Y, TIMESTEPS -
        # tells us if robobt was just stuck forever. didn't actually fall.
        travelled_pos[-1] = episode_timesteps
        episode_num += 1

        # Store dt and frequency for prob distribution
        surv_pos.append(travelled_pos)

        print("Episode Num: {} Episode T: {} Reward: {}".format(
            episode_num, episode_timesteps, episode_reward))
        print("Survival Pos: {}".format(surv_pos[-1]))

        # Reset every X episodes (pb client doesn't like running for long)
        if episode_num % reset_ep == 0:
            env.close()
            env = spotBezierEnv(render=False,
                                on_rack=False,
                                height_field=height_field,
                                draw_foot_path=False,
                                contacts=contacts,
                                env_randomizer=env_randomizer)

            # Set seeds
            env.seed(seed)
            agent.env = env

    env.close()
    print("---------------------------------------")

    # Store results
    if use_agent:
        # Store _agent
        agt = "agent_" + str(agent_num)
    else:
        # Store _vanilla
        agt = "vanilla"

    with open(
            results_path + "/" + str(file_name) + agt + '_survival_' +
            str(max_episodes), 'wb') as filehandle:
        pickle.dump(surv_pos, filehandle)
示例#9
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 1e6
    file_name = "mini_tg_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=True, on_rack=False)

    dt = env._time_step

    # TRAJECTORY GENERATOR
    movetype = "walk"
    # movetype = "trot"
    # movetype = "bound"
    # movetype = "pace"
    # movetype = "pronk"
    TG = TGPolicy(movetype=movetype,
                  center_swing=0.0,
                  amplitude_extension=0.2,
                  amplitude_lift=0.4)
    TG_state_dim = len(TG.get_TG_state())
    TG_action_dim = 5  # f_tg, Beta, alpha_tg, h_tg, intensity
    state_dim = env.observation_space.shape[0] + TG_state_dim
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0] + TG_action_dim
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, TGP=TG)
    agent_num = raw_input("Policy Number: ")
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 1000
        policy = agent.policy

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    # Just to store correct action space
    action = env.action_space.sample()

    # Record extends for plot
    # LF_ext = []
    # LB_ext = []
    # RF_ext = []
    # RB_ext = []

    LF_tp = []
    LB_tp = []
    RF_tp = []
    RB_tp = []

    t = 0
    while t < (int(max_timesteps)):
        action[:] = 0.0

        # # Get Action from TG [no policies here]
        # action = TG.get_utg(action, alpha_tg, h_tg, intensity,
        #                     env.minitaur.num_motors)

        # LF_ext.append(action[env.minitaur.num_motors / 2])
        # LB_ext.append(action[1 + env.minitaur.num_motors / 2])
        # RF_ext.append(action[2 + env.minitaur.num_motors / 2])
        # RB_ext.append(action[3 + env.minitaur.num_motors / 2])
        # # Perform action
        # next_state, reward, done, _ = env.step(action)

        obs = agent.TGP.get_TG_state()
        # LF_tp.append(obs[0])
        # LB_tp.append(obs[1])
        # RF_tp.append(obs[2])
        # RB_tp.append(obs[3])

        # # Increment phase
        # TG.increment(dt, f_tg, Beta)

        # # time.sleep(1.0)

        # t += 1

        # Maximum timesteps per rollout
        t += policy.episode_steps

        episode_timesteps += 1

        episode_reward = agent.deployTG()
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, policy.episode_steps, episode_reward))
        # Reset environment
        episode_reward = 0
        episode_timesteps = 0
        episode_num += 1

    plt.plot(0)
    plt.plot(LF_tp, label="LF")
    plt.plot(LB_tp, label="LB")
    plt.plot(RF_tp, label="RF")
    plt.plot(RB_tp, label="RB")
    plt.xlabel("t")
    plt.ylabel("EXT")
    plt.title("Leg Extensions")
    plt.legend()
    plt.show()

    env.close()