gamma=0.99,
            minibatch_size=64,
            max_episodes=50000,
            max_episode_len=1000,
            # Exploration Strategies
            exploration_action_noise_type='ou_0.2',
            exploration_epsilon_greedy_type='none',
            # Save Summaries
            save_dir='../ROM_Experiment_results/LASAgentActorCritic/',
            experiment_runs='run3',
            # Save and Restore Actor-Critic Model
            restore_actor_model_flag=False,
            restore_critic_model_flag=False)

        # Step counter
        i = 1
        done = False
        reward_for_LAS = 0
        while not done:
            # LAS interacts with environment.
            actionLAS = LASAgent1.perceive_and_act(observation_For_LAS,
                                                   reward_for_LAS, done)
            # delay the observing of consequence of LASAgent's action
            observation_For_LAS, reward_for_LAS, done, info = envLAS.step(
                actionLAS)
            print("LAS Step: {}, reward: {}".format(i, reward_for_LAS))

            i += 1

        envLAS.destroy()
Пример #2
0
        reward = 0
        done = False
        start_time = time.time()

        try:
            for i in range(max_episodes):
                observation = env.reset()
                ep_reward = 0
                for j in range(max_episode_len):

                    if render_env == True:
                        env.render()

                    # Added exploration noise
                    action = LASAgent.perceive_and_act(observation, reward,
                                                       done)

                    observation, reward, done, info = env.step(action[0])
                    ep_reward += reward
                    if done or j == (max_episode_len - 1):
                        print('| Reward: {:d} | Episode: {:d} '.format(
                            int(ep_reward), i))
                        episod_reward_memory.append(ep_reward)
                        plot_cumulative_reward(episod_reward_memory)
                        break
                    #time.sleep(0.5)
                print("Time elapsed:{}".format(time.time() - start_time))
        except KeyboardInterrupt:
            sess.close()
            env.destroy()
            print("Shut Down.")
Created on Wed May  9 08:52:36 2018

@author: jack.lingheng.meng
"""

from Environment.LASEnv import LASEnv
from LASAgent.RandomLASAgent import RandomLASAgent

if __name__ == '__main__':

    # Instantiate environment object
    envLAS = LASEnv('127.0.0.1', 19997)

    observationForLAS = envLAS.reset()

    # Iinstantiate LAS-agent
    LASAgent1 = RandomLASAgent(envLAS.observation_space, envLAS.action_space)

    # Step counter
    i = 1
    done = False
    rewardLAS = 0
    while not done:

        actionLAS = LASAgent1.interact(observationForLAS, rewardLAS, done)
        observationForLAS, rewardLAS, done, info = envLAS.step(actionLAS)
        print("LAS Step: {}, reward: {}".format(i, rewardLAS))
        i += 1

    envLAS.destroy()