def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 if ARGS.Seed: seed = ARGS.Seed max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = np.inf policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): episode_reward, episode_timesteps = agent.deployTG() t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, episode_timesteps, episode_reward)) episode_num += 1 # Plot Policy Output if ARGS.PlotPolicy or ARGS.TrueAction or ARGS.SaveData: if ARGS.TrueAction: action_name = "robot_act" action = np.array(agent.true_action_history) else: action_name = "agent_act" action = np.array(agent.action_history) if ARGS.SaveData: if height_field: terrain_name = "rough_" else: terrain_name = "flat_" np.save( results_path + "/" + "policy_out_" + terrain_name + action_name, action) print("SAVED DATA") ClearHeight_act = action[:, 0] BodyHeight_act = action[:, 1] Residuals_act = action[:, 2:] plt.plot(ClearHeight_act, label='Clearance Height Mod', color='black') plt.plot(BodyHeight_act, label='Body Height Mod', color='darkviolet') # FL plt.plot(Residuals_act[:, 0], label='Residual: FL (x)', color='limegreen') plt.plot(Residuals_act[:, 1], label='Residual: FL (y)', color='lime') plt.plot(Residuals_act[:, 2], label='Residual: FL (z)', color='green') # FR plt.plot(Residuals_act[:, 3], label='Residual: FR (x)', color='lightskyblue') plt.plot(Residuals_act[:, 4], label='Residual: FR (y)', color='dodgerblue') plt.plot(Residuals_act[:, 5], label='Residual: FR (z)', color='blue') # BL plt.plot(Residuals_act[:, 6], label='Residual: BL (x)', color='firebrick') plt.plot(Residuals_act[:, 7], label='Residual: BL (y)', color='crimson') plt.plot(Residuals_act[:, 8], label='Residual: BL (z)', color='red') # BR plt.plot(Residuals_act[:, 9], label='Residual: BR (x)', color='gold') plt.plot(Residuals_act[:, 10], label='Residual: BR (y)', color='orange') plt.plot(Residuals_act[:, 11], label='Residual: BR (z)', color='coral') plt.xlabel("Epoch Iteration") plt.ylabel("Action Value") plt.title("Policy Output") plt.legend() plt.show() env.close()
def main(): """ The main() function. """ # Hold mp pipes mp.freeze_support() print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 eval_freq = 1e1 save_model = True file_name = "mini_tg_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=False) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) # TRAJECTORY GENERATOR movetype = "walk" # movetype = "trot" # movetype = "bound" # movetype = "pace" # movetype = "pronk" TG = TGPolicy(movetype=movetype, center_swing=0.0, amplitude_extension=0.6, amplitude_lift=0.3) TG_state_dim = len(TG.get_TG_state()) TG_action_dim = 5 # f_tg, alpha_tg, h_tg, Beta, Intensity state_dim = env.observation_space.shape[0] + TG_state_dim print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] + TG_action_dim print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, TGP=TG) agent_num = 0 if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) # Evaluate untrained agent and init list for storage evaluations = [] env.reset(agent.desired_velocity, agent.desired_rate) episode_reward = 0 episode_timesteps = 0 episode_num = 0 # MULTIPROCESSING # Create mp pipes num_processes = policy.num_deltas processes = [] childPipes = [] parentPipes = [] # Store mp pipes for pr in range(num_processes): parentPipe, childPipe = Pipe() parentPipes.append(parentPipe) childPipes.append(childPipe) # Start multiprocessing for proc_num in range(num_processes): p = mp.Process(target=ParallelWorker, args=(childPipes[proc_num], env, state_dim)) p.start() processes.append(p) print("STARTED MINITAUR ARS") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout t += policy.episode_steps episode_timesteps += 1 episode_reward = agent.train_parallel(parentPipes) # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}, >400: {}". format(t, episode_num, policy.episode_steps, episode_reward, agent.successes)) # Reset environment evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 # Evaluate episode if (episode_num + 1) % eval_freq == 0: # evaluate_agent(agent, env_name, seed, np.save(results_path + "/" + str(file_name), evaluations) if save_model: agent.save(models_path + "/" + str(file_name) + str(episode_num)) # replay_buffer.save(t) episode_num += 1 # Close pipes and hence envs for parentPipe in parentPipes: parentPipe.send([_CLOSE, "pay2"]) for p in processes: p.join()
def main(): """ The main() function. """ # Hold mp pipes mp.freeze_support() print("STARTING SPOT TRAINING ENV") seed = 0 max_timesteps = 4e6 eval_freq = 1e1 save_model = True file_name = "spot_ars_" if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() g_u_i = GUI(env.spot.quadruped) spot = SpotModel() T_bf = spot.WorldToFoot bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot) agent_num = 0 if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) env.reset(agent.desired_velocity, agent.desired_rate) episode_reward = 0 episode_timesteps = 0 episode_num = 0 # Create mp pipes num_processes = policy.num_deltas processes = [] childPipes = [] parentPipes = [] # Store mp pipes for pr in range(num_processes): parentPipe, childPipe = Pipe() parentPipes.append(parentPipe) childPipes.append(childPipe) # Start multiprocessing # Start multiprocessing for proc_num in range(num_processes): p = mp.Process(target=ParallelWorker, args=(childPipes[proc_num], env, state_dim)) p.start() processes.append(p) print("STARTED SPOT TRAINING ENV") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout episode_reward, episode_timesteps = agent.train_parallel(parentPipes) t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print( "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}" .format(t + 1, episode_num, episode_timesteps, episode_reward, episode_reward / float(episode_timesteps))) # Evaluate episode if (episode_num + 1) % eval_freq == 0: if save_model: agent.save(models_path + "/" + str(file_name) + str(episode_num)) # replay_buffer.save(t) episode_num += 1 # Close pipes and hence envs for parentPipe in parentPipes: parentPipe.send([_CLOSE, "pay2"]) for p in processes: p.join()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = np.inf policy = agent.policy # Evaluate untrained agent and init list for storage evaluations = [] env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): episode_reward, episode_timesteps = agent.deployTG() t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, episode_timesteps, episode_reward)) episode_num += 1 env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 9 still_going = True print("Loading and Saving") while still_going: if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent: {}".format(agent_num)) # Load Class agent.load(models_path + "/" + file_name + str(agent_num)) # Save np array agent.save(models_path + "/" + file_name + str(agent_num)) else: still_going = False agent_num += 10
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_episodes = 1000 if ARGS.NumberOfEpisodes: max_episodes = ARGS.NumberOfEpisodes file_name = "spot_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.HeightField: height_field = True else: height_field = False env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim, episode_steps=np.inf) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False) use_agent = False agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum use_agent = True if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 50000 policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Used to create gaussian distribution of survival surv_dt = [] while episode_num < (int(max_episodes)): episode_reward, episode_timesteps = agent.deployTG() episode_num += 1 # Store dt and frequency for prob distribution surv_dt.append(episode_timesteps) print("Episode Num: {} Episode T: {} Reward: {}".format( episode_num, episode_timesteps, episode_reward)) env.close() print("---------------------------------------") # Store results if use_agent: # Store _agent agt = "agent" else: # Store _vanilla agt = "vanilla" with open( results_path + "/" + str(file_name) + agt + '_survival_' + str(max_episodes), 'wb') as filehandle: pickle.dump(surv_dt, filehandle)
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "mini_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=True) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env) agent_num = raw_input("Policy Number: ") if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 3000 policy = agent.policy # Evaluate untrained agent and init list for storage evaluations = [] env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout t += policy.episode_steps episode_timesteps += 1 episode_reward = agent.deploy() # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, policy.episode_steps, episode_reward)) # Reset environment evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 episode_num += 1 env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_episodes = 1000 if ARGS.NumberOfEpisodes: max_episodes = ARGS.NumberOfEpisodes if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() file_name = "spot_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.HeightField: height_field = True else: height_field = False env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim, episode_steps=np.inf) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False) use_agent = False agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum use_agent = True if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 50000 policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Used to create gaussian distribution of survival distance surv_pos = [] # Reset every 200 episodes (pb client doesn't like running for long) reset_ep = 200 while episode_num < (int(max_episodes)): episode_reward, episode_timesteps = agent.deployTG() # We only care about x/y pos travelled_pos = list(agent.returnPose()) # NOTE: FORMAT: X, Y, TIMESTEPS - # tells us if robobt was just stuck forever. didn't actually fall. travelled_pos[-1] = episode_timesteps episode_num += 1 # Store dt and frequency for prob distribution surv_pos.append(travelled_pos) print("Episode Num: {} Episode T: {} Reward: {}".format( episode_num, episode_timesteps, episode_reward)) print("Survival Pos: {}".format(surv_pos[-1])) # Reset every X episodes (pb client doesn't like running for long) if episode_num % reset_ep == 0: env.close() env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) agent.env = env env.close() print("---------------------------------------") # Store results if use_agent: # Store _agent agt = "agent_" + str(agent_num) else: # Store _vanilla agt = "vanilla" with open( results_path + "/" + str(file_name) + agt + '_survival_' + str(max_episodes), 'wb') as filehandle: pickle.dump(surv_pos, filehandle)
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 1e6 file_name = "mini_tg_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=True, on_rack=False) dt = env._time_step # TRAJECTORY GENERATOR movetype = "walk" # movetype = "trot" # movetype = "bound" # movetype = "pace" # movetype = "pronk" TG = TGPolicy(movetype=movetype, center_swing=0.0, amplitude_extension=0.2, amplitude_lift=0.4) TG_state_dim = len(TG.get_TG_state()) TG_action_dim = 5 # f_tg, Beta, alpha_tg, h_tg, intensity state_dim = env.observation_space.shape[0] + TG_state_dim print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] + TG_action_dim print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, TGP=TG) agent_num = raw_input("Policy Number: ") if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 1000 policy = agent.policy # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Just to store correct action space action = env.action_space.sample() # Record extends for plot # LF_ext = [] # LB_ext = [] # RF_ext = [] # RB_ext = [] LF_tp = [] LB_tp = [] RF_tp = [] RB_tp = [] t = 0 while t < (int(max_timesteps)): action[:] = 0.0 # # Get Action from TG [no policies here] # action = TG.get_utg(action, alpha_tg, h_tg, intensity, # env.minitaur.num_motors) # LF_ext.append(action[env.minitaur.num_motors / 2]) # LB_ext.append(action[1 + env.minitaur.num_motors / 2]) # RF_ext.append(action[2 + env.minitaur.num_motors / 2]) # RB_ext.append(action[3 + env.minitaur.num_motors / 2]) # # Perform action # next_state, reward, done, _ = env.step(action) obs = agent.TGP.get_TG_state() # LF_tp.append(obs[0]) # LB_tp.append(obs[1]) # RF_tp.append(obs[2]) # RB_tp.append(obs[3]) # # Increment phase # TG.increment(dt, f_tg, Beta) # # time.sleep(1.0) # t += 1 # Maximum timesteps per rollout t += policy.episode_steps episode_timesteps += 1 episode_reward = agent.deployTG() # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, policy.episode_steps, episode_reward)) # Reset environment episode_reward = 0 episode_timesteps = 0 episode_num += 1 plt.plot(0) plt.plot(LF_tp, label="LF") plt.plot(LB_tp, label="LB") plt.plot(RF_tp, label="RF") plt.plot(RB_tp, label="RB") plt.xlabel("t") plt.ylabel("EXT") plt.title("Leg Extensions") plt.legend() plt.show() env.close()