Пример #1
0
from Environment.LASEnv import LASEnv
from LASAgent.InternalEnvOfAgent import InternalEnvOfAgent
from LASAgent.InternalEnvOfCommunity import InternalEnvOfCommunity

# Logging
logging.basicConfig(filename='../ROM_Experiment_results/ROM_experiment_' +
                    datetime.now().strftime("%Y%m%d-%H%M%S") + '.log',
                    level=logging.INFO,
                    format='%(asctime)s:%(levelname)s: %(message)s')

#######################################################################
#                 Instatiate LAS virtual environment                  #
#######################################################################
# Instantiate LAS environment object
envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy')
observation = envLAS.reset()

#######################################################################
#                          Instatiate LAS-Agent                       #
#######################################################################
# Note: 1. Set load_pretrained_agent_flag to "True" only when you have
#           and want to load pretrained agent.
#       2. Keep observation unchanged if using pretrained agent.
agent_name = 'LAS_Single_Agent'
observation_space = envLAS.observation_space
action_space = envLAS.action_space
observation_space_name = [],
action_space_name = []
x_order_MDP = 5
x_order_MDP_observation_type = 'concatenate_observation'
occupancy_reward_type = 'IR_distance'
Пример #2
0
            restore_critic_model_flag=False)

        # Learning records
        episod_reward_memory = deque(maxlen=10000)

        # Train parameters
        max_episodes = 50000
        max_episode_len = 1000
        render_env = False
        reward = 0
        done = False
        start_time = time.time()

        try:
            for i in range(max_episodes):
                observation = env.reset()
                ep_reward = 0
                for j in range(max_episode_len):

                    if render_env == True:
                        env.render()

                    # Added exploration noise
                    action = LASAgent.perceive_and_act(observation, reward,
                                                       done)

                    observation, reward, done, info = env.step(action[0])
                    ep_reward += reward
                    if done or j == (max_episode_len - 1):
                        print('| Reward: {:d} | Episode: {:d} '.format(
                            int(ep_reward), i))
import numpy as np
import time

from Environment.LASEnv import LASEnv
from LASAgent.RandomLASAgent import RandomLASAgent
from LASAgent.LASAgent_Actor_Critic import LASAgent_Actor_Critic

from Environment.VisitorEnv import VisitorEnv
from VisitorAgent.RedLightExcitedVisitorAgent import RedLightExcitedVisitorAgent

if __name__ == '__main__':

    with tf.Session() as sess:
        # Instantiate LAS environment object
        envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy')
        observation_For_LAS = envLAS.reset()
        # Iinstantiate LAS-agent
        #        LASAgent1 = RandomLASAgent(envLAS)
        LASAgent1 = LASAgent_Actor_Critic(
            sess,
            envLAS,
            actor_lr=0.0001,
            actor_tau=0.001,
            critic_lr=0.0001,
            critic_tau=0.001,
            gamma=0.99,
            minibatch_size=64,
            max_episodes=50000,
            max_episode_len=1000,
            # Exploration Strategies
            exploration_action_noise_type='ou_0.2',
    sess = tf.Session()
    K.set_session(sess)
    
    # Instantiate LASEnv
    envLAS = LASEnv('127.0.0.1', 19997)
    
    # Instantiate Extrinsically Motivated LAS-agent
    Ext_Mot_LASAgent = ExtrinsicallyMotivatedLASAgent(envLAS, 
                                                      sess, 
                                                      learnFromScratch = True)
    # Iinstantiate Random Action LAS-agent
    Random_LASAgent = RandomLASAgent(envLAS)
    
    # Step counter
    i = 1
    observationForLAS, rewardLAS, done, info = envLAS.reset()
    #while not done:
    for temp in range(10000):

        actionLAS = Ext_Mot_LASAgent.perceive_and_act(observationForLAS, rewardLAS, done)
        observationForLAS, rewardLAS, done, info = envLAS.step_LAS(actionLAS)
        print("Ext_Mot_LASAgent Step: {}, reward: {}".format(i, rewardLAS))
        i += 1
    
    observationForLAS, rewardLAS, done, info = envLAS.reset()
    for temp in range(10000):

        actionLAS = Random_LASAgent.perceive_and_act(observationForLAS, rewardLAS, done)
        observationForLAS, rewardLAS, done, info = envLAS.step_LAS(actionLAS)
        print("Random_LASAgent Step: {}, reward: {}".format(i, rewardLAS))
        i += 1
# -*- coding: utf-8 -*-
"""
Created on Wed May  9 08:52:36 2018

@author: jack.lingheng.meng
"""

from Environment.LASEnv import LASEnv
from LASAgent.RandomLASAgent import RandomLASAgent

if __name__ == '__main__':

    # Instantiate environment object
    envLAS = LASEnv('127.0.0.1', 19997)

    observationForLAS = envLAS.reset()

    # Iinstantiate LAS-agent
    LASAgent1 = RandomLASAgent(envLAS.observation_space, envLAS.action_space)

    # Step counter
    i = 1
    done = False
    rewardLAS = 0
    while not done:

        actionLAS = LASAgent1.interact(observationForLAS, rewardLAS, done)
        observationForLAS, rewardLAS, done, info = envLAS.step(actionLAS)
        print("LAS Step: {}, reward: {}".format(i, rewardLAS))
        i += 1