gamma=0.99, minibatch_size=64, max_episodes=50000, max_episode_len=1000, # Exploration Strategies exploration_action_noise_type='ou_0.2', exploration_epsilon_greedy_type='none', # Save Summaries save_dir='../ROM_Experiment_results/LASAgentActorCritic/', experiment_runs='run3', # Save and Restore Actor-Critic Model restore_actor_model_flag=False, restore_critic_model_flag=False) # Step counter i = 1 done = False reward_for_LAS = 0 while not done: # LAS interacts with environment. actionLAS = LASAgent1.perceive_and_act(observation_For_LAS, reward_for_LAS, done) # delay the observing of consequence of LASAgent's action observation_For_LAS, reward_for_LAS, done, info = envLAS.step( actionLAS) print("LAS Step: {}, reward: {}".format(i, reward_for_LAS)) i += 1 envLAS.destroy()
reward = 0 done = False start_time = time.time() try: for i in range(max_episodes): observation = env.reset() ep_reward = 0 for j in range(max_episode_len): if render_env == True: env.render() # Added exploration noise action = LASAgent.perceive_and_act(observation, reward, done) observation, reward, done, info = env.step(action[0]) ep_reward += reward if done or j == (max_episode_len - 1): print('| Reward: {:d} | Episode: {:d} '.format( int(ep_reward), i)) episod_reward_memory.append(ep_reward) plot_cumulative_reward(episod_reward_memory) break #time.sleep(0.5) print("Time elapsed:{}".format(time.time() - start_time)) except KeyboardInterrupt: sess.close() env.destroy() print("Shut Down.")
Created on Wed May 9 08:52:36 2018 @author: jack.lingheng.meng """ from Environment.LASEnv import LASEnv from LASAgent.RandomLASAgent import RandomLASAgent if __name__ == '__main__': # Instantiate environment object envLAS = LASEnv('127.0.0.1', 19997) observationForLAS = envLAS.reset() # Iinstantiate LAS-agent LASAgent1 = RandomLASAgent(envLAS.observation_space, envLAS.action_space) # Step counter i = 1 done = False rewardLAS = 0 while not done: actionLAS = LASAgent1.interact(observationForLAS, rewardLAS, done) observationForLAS, rewardLAS, done, info = envLAS.step(actionLAS) print("LAS Step: {}, reward: {}".format(i, rewardLAS)) i += 1 envLAS.destroy()