from Environment.LASEnv import LASEnv from LASAgent.InternalEnvOfAgent import InternalEnvOfAgent from LASAgent.InternalEnvOfCommunity import InternalEnvOfCommunity # Logging logging.basicConfig(filename='../ROM_Experiment_results/ROM_experiment_' + datetime.now().strftime("%Y%m%d-%H%M%S") + '.log', level=logging.INFO, format='%(asctime)s:%(levelname)s: %(message)s') ####################################################################### # Instatiate LAS virtual environment # ####################################################################### # Instantiate LAS environment object envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy') observation = envLAS.reset() ####################################################################### # Instatiate LAS-Agent # ####################################################################### # Note: 1. Set load_pretrained_agent_flag to "True" only when you have # and want to load pretrained agent. # 2. Keep observation unchanged if using pretrained agent. agent_name = 'LAS_Single_Agent' observation_space = envLAS.observation_space action_space = envLAS.action_space observation_space_name = [], action_space_name = [] x_order_MDP = 5 x_order_MDP_observation_type = 'concatenate_observation' occupancy_reward_type = 'IR_distance'
restore_critic_model_flag=False) # Learning records episod_reward_memory = deque(maxlen=10000) # Train parameters max_episodes = 50000 max_episode_len = 1000 render_env = False reward = 0 done = False start_time = time.time() try: for i in range(max_episodes): observation = env.reset() ep_reward = 0 for j in range(max_episode_len): if render_env == True: env.render() # Added exploration noise action = LASAgent.perceive_and_act(observation, reward, done) observation, reward, done, info = env.step(action[0]) ep_reward += reward if done or j == (max_episode_len - 1): print('| Reward: {:d} | Episode: {:d} '.format( int(ep_reward), i))
import numpy as np import time from Environment.LASEnv import LASEnv from LASAgent.RandomLASAgent import RandomLASAgent from LASAgent.LASAgent_Actor_Critic import LASAgent_Actor_Critic from Environment.VisitorEnv import VisitorEnv from VisitorAgent.RedLightExcitedVisitorAgent import RedLightExcitedVisitorAgent if __name__ == '__main__': with tf.Session() as sess: # Instantiate LAS environment object envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy') observation_For_LAS = envLAS.reset() # Iinstantiate LAS-agent # LASAgent1 = RandomLASAgent(envLAS) LASAgent1 = LASAgent_Actor_Critic( sess, envLAS, actor_lr=0.0001, actor_tau=0.001, critic_lr=0.0001, critic_tau=0.001, gamma=0.99, minibatch_size=64, max_episodes=50000, max_episode_len=1000, # Exploration Strategies exploration_action_noise_type='ou_0.2',
sess = tf.Session() K.set_session(sess) # Instantiate LASEnv envLAS = LASEnv('127.0.0.1', 19997) # Instantiate Extrinsically Motivated LAS-agent Ext_Mot_LASAgent = ExtrinsicallyMotivatedLASAgent(envLAS, sess, learnFromScratch = True) # Iinstantiate Random Action LAS-agent Random_LASAgent = RandomLASAgent(envLAS) # Step counter i = 1 observationForLAS, rewardLAS, done, info = envLAS.reset() #while not done: for temp in range(10000): actionLAS = Ext_Mot_LASAgent.perceive_and_act(observationForLAS, rewardLAS, done) observationForLAS, rewardLAS, done, info = envLAS.step_LAS(actionLAS) print("Ext_Mot_LASAgent Step: {}, reward: {}".format(i, rewardLAS)) i += 1 observationForLAS, rewardLAS, done, info = envLAS.reset() for temp in range(10000): actionLAS = Random_LASAgent.perceive_and_act(observationForLAS, rewardLAS, done) observationForLAS, rewardLAS, done, info = envLAS.step_LAS(actionLAS) print("Random_LASAgent Step: {}, reward: {}".format(i, rewardLAS)) i += 1
# -*- coding: utf-8 -*- """ Created on Wed May 9 08:52:36 2018 @author: jack.lingheng.meng """ from Environment.LASEnv import LASEnv from LASAgent.RandomLASAgent import RandomLASAgent if __name__ == '__main__': # Instantiate environment object envLAS = LASEnv('127.0.0.1', 19997) observationForLAS = envLAS.reset() # Iinstantiate LAS-agent LASAgent1 = RandomLASAgent(envLAS.observation_space, envLAS.action_space) # Step counter i = 1 done = False rewardLAS = 0 while not done: actionLAS = LASAgent1.interact(observationForLAS, rewardLAS, done) observationForLAS, rewardLAS, done, info = envLAS.step(actionLAS) print("LAS Step: {}, reward: {}".format(i, rewardLAS)) i += 1