示例#1
0
    def test_load_performance(self):
        self.start_tests(name='load-performance')

        environment = Environment.create(environment='CartPole-v1')

        agent = Agent.load(
            directory='test/data', filename='ppo-checkpoint', format='checkpoint',
            environment=environment
        )
        runner = Runner(agent=agent, environment=environment)
        runner.run(num_episodes=10, use_tqdm=False, evaluation=True)
        self.assertTrue(all(episode_reward == 500.0 for episode_reward in runner.episode_rewards))
        runner.close()
        agent.close()
        self.finished_test()

        agent = Agent.load(
            directory='test/data', filename='ppo-checkpoint', format='numpy',
            environment=environment
        )
        runner = Runner(agent=agent, environment=environment)
        runner.run(num_episodes=10, use_tqdm=False, evaluation=True)
        self.assertTrue(all(episode_reward == 500.0 for episode_reward in runner.episode_rewards))
        runner.close()
        agent.close()
        self.finished_test()

        agent = Agent.load(
            directory='test/data', filename='ppo-checkpoint', format='hdf5',
            environment=environment
        )
        runner = Runner(agent=agent, environment=environment)
        runner.run(num_episodes=10, use_tqdm=False, evaluation=True)
        self.assertTrue(all(episode_reward == 500.0 for episode_reward in runner.episode_rewards))
        runner.close()
        agent.close()
        self.finished_test()

        agent = tf.saved_model.load(export_dir='test/data/ppo-checkpoint')

        # 10 episodes
        for _ in range(10):
            states = environment.reset()
            terminal = False
            episode_reward = 0.0
            while not terminal:
                states = np.expand_dims(states, axis=0)
                auxiliaries = dict(mask=np.ones(shape=(1, 2), dtype=bool))
                actions = agent.act(states, auxiliaries, True)
                actions = actions.numpy().item()
                states, terminal, reward = environment.execute(actions=actions)
                episode_reward += reward
            self.assertEqual(episode_reward, 500.0)

        environment.close()
        self.finished_test()
示例#2
0
    def test_dpg(self):
        self.start_tests(name='DPG')
        actions = dict(gaussian_action1=dict(type='float',
                                             shape=(1, 2),
                                             min_value=1.0,
                                             max_value=2.0),
                       gaussian_action2=dict(type='float',
                                             shape=(1, ),
                                             min_value=-2.0,
                                             max_value=1.0))
        agent, environment = self.prepare(
            actions=actions,
            agent='dpg',
            memory=100,
            batch_size=4,
            # TODO: no-RNN restriction can be removed
            network=dict(type='auto', size=8, depth=1, rnn=False),
            # TODO: cannot use RNN since value function takes states and actions
            critic=dict(type='auto', size=7, depth=1, rnn=False))

        self.execute(agent=agent, environment=environment)

        with TemporaryDirectory() as directory:
            agent.save(directory=directory, format='numpy')
            agent = Agent.load(directory=directory)
            states = environment.reset()
            agent.act(states=states)
            agent.close()
            environment.close()
示例#3
0
    def test_vpg(self):
        self.start_tests(name='VPG')
        agent, environment = self.prepare(agent='vpg',
                                          batch_size=2,
                                          network=dict(type='auto',
                                                       size=8,
                                                       depth=1,
                                                       rnn=2),
                                          baseline=dict(type='auto',
                                                        size=7,
                                                        depth=1,
                                                        rnn=1),
                                          baseline_optimizer=dict(
                                              optimizer='adam',
                                              learning_rate=1e-3))

        self.execute(agent=agent, environment=environment)

        with TemporaryDirectory() as directory:
            agent.save(directory=directory, format='numpy')
            agent = Agent.load(directory=directory)
            states = environment.reset()
            agent.act(states=states)
            agent.close()
            environment.close()
示例#4
0
def main():
    # OpenAI-Gym environment initialization
    environment = Environment.create(environment='benchmarks/configs/cartpole.json')

    # PPO agent initialization
    agent = Agent.create(
        agent='benchmarks/configs/ppo.json', environment=environment,
        # Option 1: Saver - save agent periodically every 10 updates
        # and keep the 5 most recent checkpoints
        saver=dict(directory='model-checkpoint', frequency=10, max_checkpoints=5),
    )

    # Runner initialization
    runner = Runner(agent=agent, environment=environment)

    # Training
    runner.run(num_episodes=100)
    runner.close()

    # Option 2: Explicit save
    # (format: 'numpy' or 'hdf5' store only weights, 'checkpoint' stores full TensorFlow model,
    # agent argument saver, specified above, uses 'checkpoint')
    agent.save(directory='model-numpy', format='numpy', append='episodes')

    # Close agent separately, since created separately
    agent.close()

    # Load agent TensorFlow checkpoint
    agent = Agent.load(directory='model-checkpoint', format='checkpoint', environment=environment)
    runner = Runner(agent=agent, environment=environment)
    runner.run(num_episodes=100, evaluation=True)
    runner.close()
    agent.close()

    # Load agent NumPy weights
    agent = Agent.load(directory='model-numpy', format='numpy', environment=environment)
    runner = Runner(agent=agent, environment=environment)
    runner.run(num_episodes=100, evaluation=True)
    runner.close()
    agent.close()

    # Close environment separately, since created separately
    environment.close()
示例#5
0
    def create_agent(
        self,
        env,
        n_episodes,
        save_frenquency,
        load=False,
    ):
        ########### WORK NEEDED ###########
        ### You need to tweak the Agent ###
        ###################################
        """
        Agent definition. Tweak the Agent's parameters to your convenience

        Use any agent from tensorforce and refer to the documentation for the available hyperparameters :
        -Vanilla Policy Gradient : https://tensorforce.readthedocs.io/en/latest/agents/vpg.html
        -Proximal Policy Optimization : https://tensorforce.readthedocs.io/en/latest/agents/ppo.html
        -Trust-Region Policy Optimization : https://tensorforce.readthedocs.io/en/latest/agents/trpo.html
        -Deterministic Policy Gradient : https://tensorforce.readthedocs.io/en/latest/agents/dpg.html
        -Deep Q-Network : https://tensorforce.readthedocs.io/en/latest/agents/dqn.html
        -Double DQN : https://tensorforce.readthedocs.io/en/latest/agents/double_dqn.html
        -Dueling DQN : https://tensorforce.readthedocs.io/en/latest/agents/dueling_dqn.html
        -Actor-Critic : https://tensorforce.readthedocs.io/en/latest/agents/ac.html
        -Advantage Actor-Critic : https://tensorforce.readthedocs.io/en/latest/agents/a2c.html

        For the network parameters :
        https://tensorforce.readthedocs.io/en/latest/modules/networks.html


        """
        ##### Agent definition ########
        if not (load):
            agent = Agent.create(
                agent="ppo",
                batch_size=10,
                exploration=0.01,
                learning_rate=0.00001,
                likelihood_ratio_clipping=0.1,
                # etc...,
                saver=dict(
                    directory="data/checkpoints",
                    frequency=10,  # save checkpoint every 10 updates
                ),  # don't change this
                environment=env,
            )

        else:
            agent = Agent.load(directory="data/checkpoints")
        return agent
示例#6
0
    def initialize_agent(self):
        # Set up information about the boost pads now that the game is active and the info is available
        self.boost_pad_tracker.initialize_boosts(self.get_field_info())
        if MODEL is not None:
            max_time = 10
            frames_per_sec = 20
            max_timesteps = RLEnvironment.get_max_timesteps(max_time, frames_per_sec)
            self.env = Environment.create(
                environment=KickoffEnvironment,
                max_episode_timesteps=max_timesteps,
                max_time=max_time,
                message_throttle=20,
                frames_per_sec=frames_per_sec,
                input_exclude=[
                    InputOptions.BALL_POSITION_REL,
                    InputOptions.BALL_DIRECTION,
                    InputOptions.CAR_POSITION_REL,
                    InputOptions.CAR_VELOCITY_MAG,
                ],
                output_exclude=[
                    OutputOptions.BOOST,
                    OutputOptions.STEER,
                    OutputOptions.E_BRAKE,
                    OutputOptions.THROTTLE,
                    OutputOptions.ROLL,
                ]
            )

            directory='../learning/training/{0}'.format(MODEL)
            filename='agent'
            agent = os.path.join(directory, os.path.splitext(filename)[0] + '.json') 

            if not os.path.isfile(agent):
                logging_utils.log_warn(os.getcwd(), {})
                raise Exception('Model file doesn\'t exist')
            
            self.agent = Agent.load(
                directory=os.path.abspath(directory),
                environment=self.env,
                format='checkpoint',
            )
            self.env.reset()
def main():
    agent_type = 'dqn'
    agent_dir = f'data/{agent_type}'
    agent_name = 'counting'
    model_format = 'tensorflow'
    num_episodes = 1000000
    debug = False

    environment = TFBlackjackEnvironment(CountDeck(),
                                         SimpleDealer(),
                                         Player(PassPlayerHandAgent(),
                                                ConstantBettingAgent()),
                                         debug=debug)

    agent = Agent.load(
        directory=agent_dir,
        name=agent_name,
        format=model_format,
        environment=environment,
    )

    for _ in range(num_episodes):
        if debug:
            print()

        states = environment.reset()
        terminal = False

        while not terminal:
            actions = agent.act(states=states, evaluation=True)

            if debug:
                print(f"ACTION TAKEN: {actions}")

            states, terminal, _ = environment.execute(actions=actions)
            environment.get_stats()

    environment.print_stats()
    agent.close()
    environment.close()
示例#8
0
    def test_dueling_dqn(self):
        self.start_tests(name='DuelingDQN')
        agent, environment = self.prepare(actions=dict(type='int',
                                                       shape=(2, ),
                                                       num_values=4),
                                          agent='dueling_dqn',
                                          memory=100,
                                          batch_size=4,
                                          network=dict(type='auto',
                                                       size=8,
                                                       depth=1,
                                                       rnn=2))

        self.execute(agent=agent, environment=environment)

        with TemporaryDirectory() as directory:
            agent.save(directory=directory, format='numpy')
            agent = Agent.load(directory=directory)
            states = environment.reset()
            agent.act(states=states)
            agent.close()
            environment.close()
示例#9
0
    def test_ac(self):
        self.start_tests(name='AC')
        # TODO: baseline horizon has to be equal to policy horizon
        agent, environment = self.prepare(agent='ac',
                                          batch_size=4,
                                          network=dict(type='auto',
                                                       size=8,
                                                       depth=1,
                                                       rnn=2),
                                          critic=dict(type='auto',
                                                      size=7,
                                                      depth=1,
                                                      rnn=2))

        self.execute(agent=agent, environment=environment)

        with TemporaryDirectory() as directory:
            agent.save(directory=directory, format='numpy')
            agent = Agent.load(directory=directory)
            states = environment.reset()
            agent.act(states=states)
            agent.close()
            environment.close()
示例#10
0
    def test_tensorforce(self):
        self.start_tests(name='Tensorforce')

        # Explicit, singleton state/action
        self.unittest(states=dict(type='float',
                                  shape=(),
                                  min_value=1.0,
                                  max_value=2.0),
                      actions=dict(type='int', shape=(), num_values=4),
                      agent='tensorforce',
                      **UnittestBase.agent)

        # Implicit
        agent, environment = self.prepare(**UnittestBase.agent)

        self.execute(agent=agent, environment=environment)

        with TemporaryDirectory() as directory:
            agent.save(directory=directory, format='numpy')
            agent = Agent.load(directory=directory)
            states = environment.reset()
            agent.act(states=states)
            agent.close()
            environment.close()
示例#11
0
    def test_config(self):
        # FEATURES.MD
        self.start_tests(name='config')

        with TemporaryDirectory() as directory:
            # save: before first timestep
            update = dict(unit='episodes', batch_size=1)
            saver = dict(directory=directory, frequency=1)
            agent, environment = self.prepare(update=update,
                                              saver=saver,
                                              config=dict(
                                                  eager_mode=False,
                                                  create_debug_assertions=True,
                                                  tf_log_level=20))
            weights0 = agent.model.policy.network.layers[1].weights.numpy()
            states = environment.reset()
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            updated = agent.observe(terminal=terminal, reward=reward)
            agent.close()
            self.finished_test()

            # load: from given directory
            agent = Agent.load(directory=directory, environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights0))
            self.assertEqual(agent.timesteps, 0)
            while not terminal:
                actions = agent.act(states=states)
                states, terminal, reward = environment.execute(actions=actions)
                updated = agent.observe(terminal=terminal, reward=reward)
            self.assertTrue(updated)
            weights1 = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(not np.allclose(weights1, weights0))
            timesteps = agent.timesteps
            agent.close()
            self.finished_test()

            # load: from given directory
            agent = Agent.load(directory=directory, environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights1))
            self.assertEqual(agent.timesteps, timesteps)
            agent.close()
            environment.close()
            self.finished_test()

            # create, not load
            agent, environment = self.prepare(update=update,
                                              saver=saver,
                                              config=dict(
                                                  eager_mode=False,
                                                  create_debug_assertions=True,
                                                  tf_log_level=20))
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(not np.allclose(x, weights0))
            self.assertTrue(not np.allclose(x, weights1))
            self.assertEqual(agent.timesteps, 0)
            states = environment.reset()
            terminal = False
            while not terminal:
                actions = agent.act(states=states)
                states, terminal, reward = environment.execute(actions=actions)
                updated = agent.observe(terminal=terminal, reward=reward)
            self.assertTrue(updated)
            weights2 = agent.model.policy.network.layers[1].weights.numpy()
            agent.close()
            self.finished_test()

            # load: from given directory
            agent = Agent.load(directory=directory, environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights2))
            agent.close()
            environment.close()
            self.finished_test()

            files = set(os.listdir(path=directory))
            self.assertTrue(
                files == {
                    'agent.json', 'agent-0.data-00000-of-00001',
                    'agent-0.index', 'agent-1.data-00000-of-00001',
                    'agent-1.index', 'checkpoint'
                })

        self.finished_test()
示例#12
0
def main():
    agent_type = 'dqn'
    agent_dir = f'data/{agent_type}'
    agent_name = 'counting'
    model_format = 'tensorflow'
    tensorboard_dir = f'data/summaries/{agent_type}'
    tensorboard_labels = [
        'graph', 'entropy', 'kl-divergence', 'losses', 'rewards'
    ]
    tensorboard_freq = 20
    batch_size = 20
    memory = 10000
    num_episodes = 50000
    learning_rate = 3e-4
    exploration = 0.0
    summarizer = dict(
        directory=tensorboard_dir,
        labels=tensorboard_labels,
        frequency=tensorboard_freq,
    )
    should_load = True
    debug = False

    environment = TFBlackjackEnvironment(CountDeck(),
                                         SimpleDealer(),
                                         Player(PassPlayerHandAgent(),
                                                ConstantBettingAgent()),
                                         debug=debug)

    if should_load:
        agent = Agent.load(
            name=agent_name,
            directory=agent_dir,
            format=model_format,
            batch_size=batch_size,
            environment=environment,
            exploration=exploration,
            summarizer=summarizer,
            memory=memory,
            learning_rate=learning_rate,
        )
        print("Loading existing agent for training")
    else:
        agent = Agent.create(
            name=agent_name,
            agent=agent_type,
            environment=environment,
            batch_size=batch_size,
            exploration=exploration,
            summarizer=summarizer,
            memory=memory,
            learning_rate=learning_rate,
        )
        print("Creating new agent")

    # Train the agent on the number of episodes specified
    for _ in range(num_episodes):
        if debug:
            print()

        states = environment.reset()
        terminal = False

        while not terminal:
            # Episode timestep
            actions = agent.act(states=states)

            if debug:
                print(f"ACTION TAKEN: {actions}")

            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)

    # save agent after done training. Attaches num episodes trained on to agent name
    agent.save(directory=agent_dir, append='episodes')

    agent.close()
    environment.close()
示例#13
0
    def __init__(self,
                 agent,
                 environment=None,
                 max_episode_timesteps=None,
                 num_parallel=None,
                 environments=None,
                 evaluation=False,
                 remote=None,
                 blocking=False,
                 host=None,
                 port=None):
        if environment is None and environments is None:
            if remote != 'socket-client':
                raise TensorforceError.required(
                    name='Runner', argument='environment or environments')
            if num_parallel is None:
                raise TensorforceError.required(
                    name='Runner',
                    argument='num_parallel',
                    condition='socket-client remote mode')
            environments = [None for _ in range(num_parallel)]

        elif environment is None:
            if environments is None:
                raise TensorforceError.required(
                    name='Runner', argument='environment or environments')
            if not util.is_iterable(x=environments):
                raise TensorforceError.type(name='Runner',
                                            argument='environments',
                                            value=environments)
            if len(environments) == 0:
                raise TensorforceError.value(name='Runner',
                                             argument='len(environments)',
                                             value=len(environments))
            if num_parallel is not None and num_parallel != len(environments):
                raise TensorforceError.value(name='Runner',
                                             argument='num_parallel',
                                             value=num_parallel,
                                             hint='!= len(environments)')
            num_parallel = len(environments)
            environments = list(environments)

        elif num_parallel is None:
            if environments is not None:
                raise TensorforceError.invalid(
                    name='Runner',
                    argument='environments',
                    condition='environment is specified')
            if evaluation:
                raise TensorforceError.invalid(name='Runner',
                                               argument='evaluation',
                                               condition='single environment')
            num_parallel = 1
            environments = [environment]

        else:
            if not isinstance(num_parallel, int):
                raise TensorforceError.value(name='Runner',
                                             argument='num_parallel',
                                             dtype=type(num_parallel))
            elif num_parallel < 2:
                raise TensorforceError.value(name='Runner',
                                             argument='num_parallel',
                                             value=num_parallel,
                                             hint='< 2')
            if environments is not None:
                raise TensorforceError.invalid(
                    name='Runner',
                    argument='environments',
                    condition='environment is specified')
            if isinstance(environment, Environment):
                raise TensorforceError.value(
                    name='Runner',
                    argument='environment',
                    value=environment,
                    condition='num_parallel',
                    hint=
                    'is Environment instance, but specification dict is required'
                )
            environments = [environment for _ in range(num_parallel)]

        if port is None or isinstance(port, int):
            if isinstance(host, str):
                port = [port + n for n in range(num_parallel)]
            else:
                port = [port for _ in range(num_parallel)]
        else:
            if len(port) != num_parallel:
                raise TensorforceError.value(name='Runner',
                                             argument='len(port)',
                                             value=len(port),
                                             hint='!= num_parallel')
        if host is None or isinstance(host, str):
            host = [host for _ in range(num_parallel)]
        else:
            if len(host) != num_parallel:
                raise TensorforceError.value(name='Runner',
                                             argument='len(host)',
                                             value=len(host),
                                             hint='!= num_parallel')

        self.environments = list()
        self.is_environment_external = isinstance(environments[0], Environment)
        environment = Environment.create(
            environment=environments[0],
            max_episode_timesteps=max_episode_timesteps,
            remote=remote,
            blocking=blocking,
            host=host[0],
            port=port[0])
        self.is_environment_remote = isinstance(environment, RemoteEnvironment)
        states = environment.states()
        actions = environment.actions()
        self.environments.append(environment)
        if remote is None and num_parallel > 1 and environment.is_vectorizable(
        ):
            self.num_vectorized = num_parallel
            environments = environments[:1]
            if evaluation:
                raise TensorforceError.invalid(
                    name='Runner',
                    argument='evaluation',
                    condition='vectorized environment')
        elif environment.num_actors() > 1:
            assert num_parallel == 1
            num_parallel = environment.num_actors()
            self.num_vectorized = environment.num_actors()
        else:
            self.num_vectorized = None

        for n, environment in enumerate(environments[1:], start=1):
            assert isinstance(environment,
                              Environment) == self.is_environment_external
            environment = Environment.create(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                remote=remote,
                blocking=blocking,
                host=host[n],
                port=port[n])
            assert isinstance(environment,
                              RemoteEnvironment) == self.is_environment_remote
            assert util.is_equal(x=environment.states(), y=states)
            assert util.is_equal(x=environment.actions(), y=actions)
            self.environments.append(environment)

        self.evaluation = evaluation

        self.is_agent_external = isinstance(agent, Agent)
        if not self.is_agent_external and 'directory' in agent:
            self.agent = Agent.load(**agent,
                                    environment=environment,
                                    parallel_interactions=(
                                        num_parallel - int(self.evaluation)))
        elif num_parallel - int(self.evaluation) > 1:
            self.agent = Agent.create(
                agent=agent,
                environment=environment,
                parallel_interactions=(num_parallel - int(self.evaluation)))
        else:
            self.agent = Agent.create(agent=agent, environment=environment)
    pass

# Test the agent with RandomAgent opponents

test_agents = []
for agent_id in range(3):
    test_agents.append(
        SimpleAgent(config["agent"](agent_id, config["game_type"])))

# Add TensorforceAgent
agent_id += 1
test_agents.append(
    TensorforceAgent(config["agent"](agent_id, config["game_type"])))
env.set_agents(test_agents)

test_agent = Agent.load(directory="C:\\Users\\ali_k\\Desktop\\my_model",
                        format='checkpoint')

wrapped_env = WrappedEnv(env, env.observation_space, env.action_space, True,
                         3000)
test_runner = Runner(agent=test_agent,
                     environment=wrapped_env,
                     max_episode_timesteps=2000)

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                      histogram_freq=1)

test_runner.run(num_episodes=100,
                evaluation=True,
                evaluation_callback=tensorboard_callback)
示例#15
0
    def test_explicit_extended(self):
        self.start_tests(name='explicit extended')

        # filename
        agent, environment = self.prepare()

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.save(directory=self.__class__.directory, filename='test')
        agent.close()

        agent = Agent.load(directory=self.__class__.directory,
                           filename='test',
                           environment=environment)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'test.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # no timestep
        agent, environment = self.prepare()

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.save(directory=self.__class__.directory, append_timestep=False)
        agent.close()

        agent = Agent.load(directory=self.__class__.directory,
                           environment=environment)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
示例#16
0
    def test_explicit(self):
        self.start_tests(name='explicit')

        # default
        agent, environment = self.prepare()

        states = environment.reset()
        agent.save(directory=self.__class__.directory)
        agent.close()

        agent = Agent.load(directory=self.__class__.directory,
                           environment=environment)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # single then parallel and different episode length
        agent, environment = self.prepare(memory=50,
                                          update=dict(unit='episodes',
                                                      batch_size=1))

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.save(directory=self.__class__.directory)
        agent.close()
        environment.close()

        agent, environment = self.prepare(update=dict(unit='episodes',
                                                      batch_size=1),
                                          max_episode_timesteps=7,
                                          parallel_interactions=2)

        agent.restore(directory=self.__class__.directory)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
from tensorforce import Agent, Environment
import matplotlib.pyplot as plt
import numpy as np
import math
import pickle
from tqdm import tqdm
import gym
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

environment = Environment.create(environment='gym',
                                 level='InvertedDoublePendulum-v2')

#polynomial regression
coach = Agent.load(directory='Double_Model', format='numpy')
internals = coach.initial_internals()
actions_record = []
theta_states = []

theta1_record = []
theta2_record = []

theta1_integral_record = []
theta2_integral_record = []

for k in range(20):
    states = environment.reset()
    terminal = False
    theta1_integral = 0
    theta2_integral = 0
示例#18
0
    def test_explicit(self):
        # FEATURES.MD
        self.start_tests(name='explicit')

        with TemporaryDirectory() as directory:
            policy = dict(
                network=dict(type='auto', size=8, depth=1, rnn=False))
            update = dict(unit='episodes', batch_size=1)
            # TODO: no
            agent, environment = self.prepare(
                policy=policy,
                memory=50,
                update=update,
                config=dict(eager_mode=False, create_debug_assertions=True))
            states = environment.reset()

            # save: default checkpoint format
            weights0 = agent.model.policy.network.layers[1].weights.numpy()
            agent.save(directory=directory)
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)
            self.assertEqual(agent.timesteps, 1)
            agent.close()
            self.finished_test()

            # load: only directory
            agent = Agent.load(directory=directory, environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((x == weights0).all())
            self.assertEqual(agent.timesteps, 0)
            self.finished_test()

            # one timestep
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)

            # save: numpy format, append timesteps
            agent.save(directory=directory, format='numpy', append='timesteps')
            agent.close()
            self.finished_test()

            # load: numpy format and directory
            agent = Agent.load(directory=directory,
                               format='numpy',
                               environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((x == weights0).all())
            self.assertEqual(agent.timesteps, 1)
            self.finished_test()

            # one timestep
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)

            # save: numpy format, append timesteps
            agent.save(directory=directory, format='numpy', append='timesteps')
            agent.close()
            self.finished_test()

            # load: numpy format and directory
            agent = Agent.load(directory=directory,
                               format='numpy',
                               environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((x == weights0).all())
            self.assertEqual(agent.timesteps, 2)
            self.finished_test()

            # one episode
            while not terminal:
                actions = agent.act(states=states)
                states, terminal, reward = environment.execute(actions=actions)
                agent.observe(terminal=terminal, reward=reward)

            # save: hdf5 format, filename, append episodes
            weights1 = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((weights1 != weights0).any())
            self.assertEqual(agent.episodes, 1)
            agent.save(directory=directory,
                       filename='agent2',
                       format='hdf5',
                       append='episodes')
            agent.close()
            self.finished_test()

            # env close
            environment.close()

            # differing agent config: update, parallel_interactions
            # TODO: episode length, others?
            environment = Environment.create(
                environment=self.environment_spec())

            # load: filename (hdf5 format implicit)
            update['batch_size'] = 2
            agent = Agent.load(directory=directory,
                               filename='agent2',
                               environment=environment,
                               policy=policy,
                               update=update,
                               parallel_interactions=2)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((x == weights1).all())
            self.assertEqual(agent.episodes, 1)
            agent.close()
            self.finished_test()

            # load: tensorflow format (filename explicit)
            # TODO: parallel_interactions=2 should be possible, but problematic if all variables are
            # saved in checkpoint format
            agent = Agent.load(directory=directory,
                               format='checkpoint',
                               environment=environment,
                               policy=policy,
                               update=update,
                               parallel_interactions=1)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((x == weights0).all())
            self.assertEqual(agent.timesteps, 0)
            self.assertEqual(agent.episodes, 0)
            agent.close()
            self.finished_test()

            # load: numpy format, full filename including timesteps suffix
            agent = Agent.load(directory=directory,
                               filename='agent-1',
                               format='numpy',
                               environment=environment,
                               policy=policy,
                               update=update,
                               parallel_interactions=2)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue((x == weights0).all())
            self.assertEqual(agent.timesteps, 1)
            self.assertEqual(agent.episodes, 0)
            self.finished_test()

            # three episodes (due to batch_size change, mismatch with loaded internal last_update)
            for _ in range(3):
                states = environment.reset()
                terminal = False
                while not terminal:
                    actions = agent.act(states=states)
                    states, terminal, reward = environment.execute(
                        actions=actions)
                    agent.observe(terminal=terminal, reward=reward)
            self.assertEqual(agent.updates, 1)

            # save: saved-model format, append updates
            agent.save(directory=directory,
                       format='saved-model',
                       append='updates')
            agent.close()

            # load: saved-model format
            import tensorflow as tf
            agent = tf.saved_model.load(
                export_dir=os.path.join(directory, 'agent-1'))
            act = next(iter(agent._independent_act_graphs.values()))

            # one episode
            states = environment.reset()
            terminal = False
            while not terminal:
                # Turn dicts into lists and batch inputs
                auxiliaries = [[
                    np.expand_dims(states.pop('int_action_mask'), axis=0)
                ]]
                states = [
                    np.expand_dims(state, axis=0) for state in states.values()
                ]
                actions = act(states, auxiliaries)
                # Split result dict and unbatch values
                actions = {
                    name: value.numpy().item()
                    if value.shape == (1, ) else value.numpy()[0]
                    for name, value in actions.items()
                }
                states, terminal, _ = environment.execute(actions=actions)

            # agent.close()
            environment.close()

            files = set(os.listdir(path=directory))
            self.assertTrue(
                files == {
                    'agent.json', 'agent-1', 'agent-1.data-00000-of-00001',
                    'agent-1.index', 'agent-1.npz', 'agent2.json',
                    'agent-2.npz', 'agent2-1.hdf5', 'checkpoint'
                })
            files = set(os.listdir(path=os.path.join(directory, 'agent-1')))
            self.assertTrue(files == {'assets', 'saved_model.pb', 'variables'})
            files = set(
                os.listdir(
                    path=os.path.join(directory, 'agent-1', 'variables')))
            self.assertTrue(
                files == {'variables.data-00000-of-00001', 'variables.index'})

        self.finished_test()
示例#19
0
    def test_save_load_agent(self):
        self.start_tests(name='save-load-agent')

        with TemporaryDirectory() as checkpoint_directory, TemporaryDirectory(
        ) as numpy_directory:

            # ====================

            # OpenAI-Gym environment initialization
            environment = Environment.create(
                environment='benchmarks/configs/cartpole.json')

            # PPO agent initialization
            agent = Agent.create(
                agent='benchmarks/configs/ppo.json',
                environment=environment,
                # Option 1: Saver - save agent periodically every 10 updates
                # and keep the 5 most recent checkpoints
                saver=dict(directory=checkpoint_directory,
                           frequency=1,
                           max_checkpoints=5),
            )

            # Runner initialization
            runner = Runner(agent=agent, environment=environment)

            # Training
            runner.run(num_episodes=10)
            runner.close()

            # Option 2: Explicit save
            # (format: 'numpy' or 'hdf5' store only weights, 'checkpoint' stores full TensorFlow model,
            # agent argument saver, specified above, uses 'checkpoint')
            agent.save(directory=numpy_directory,
                       format='numpy',
                       append='episodes')

            # Close agent separately, since created separately
            agent.close()

            # Load agent TensorFlow checkpoint
            agent = Agent.load(directory=checkpoint_directory,
                               format='checkpoint',
                               environment=environment)
            runner = Runner(agent=agent, environment=environment)
            runner.run(num_episodes=10, evaluation=True)
            runner.close()
            agent.close()

            # Load agent NumPy weights
            agent = Agent.load(directory=numpy_directory,
                               format='numpy',
                               environment=environment)
            runner = Runner(agent=agent, environment=environment)
            runner.run(num_episodes=10, evaluation=True)
            runner.close()
            agent.close()

            # Close environment separately, since created separately
            environment.close()

        # ====================

        self.finished_test()
from tensorforce import Agent, Environment
import matplotlib.pyplot as plt
import numpy as np
import math
import pickle
from tqdm import tqdm
import gym
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

environment = Environment.create(environment='gym', level='Hopper-v3')
#polynomial regression"
coach = Agent.load(directory='Hopper_RL', format='numpy')
internals = coach.initial_internals()
actions_record = []
theta_states = []
for k in range(1):
    states = environment.reset()
    terminal = False
    while not terminal:
        print('y_position %s y_velocity %s' % (states[1], states[7]))

        theta_states.append(states)
        actions, internals = coach.act(states=states,
                                       internals=internals,
                                       independent=True,
                                       deterministic=True)
        states, terminal, reward = environment.execute(actions=actions)
        actions_record.append(actions)
示例#21
0
LEVEL = 'PandaReach-v1'
EPISODES = 1
EPISODE_MAX_LENGTH = 500

MODEL_DICT = f'{LEVEL}/model'
VISUALIZE_DICT = f'{LEVEL}/visualize/{random.randint(0, 1000)}'

gym_environment = gym.make(LEVEL)
environment = Environment.create(
    environment=gym_environment, 
    max_episode_timesteps=EPISODE_MAX_LENGTH,
    visualize=True,
    visualize_directory=VISUALIZE_DICT,
)

agent = Agent.load(directory=MODEL_DICT, environment=environment)

runner = Runner(agent=agent, environment=environment, max_episode_timesteps=EPISODE_MAX_LENGTH)
runner.run(num_episodes=EPISODES, evaluation=True)
runner.close()

# sum_rewards = 0.0
# for _ in range(EPISODES):
#     states = environment.reset()
#     internals = agent.initial_internals()
#     terminal = False
#     while not terminal:
#         actions, internals = agent.act(
#             states=states, internals=internals, independent=True, deterministic=True
#         )
#         states, terminal, reward = environment.execute(actions=actions)
示例#22
0
    def test_explicit(self):
        # FEATURES.MD
        self.start_tests(name='explicit')

        with TemporaryDirectory() as directory:
            update = dict(unit='episodes', batch_size=1)
            agent, environment = self.prepare(memory=50,
                                              update=update,
                                              config=dict(
                                                  eager_mode=False,
                                                  create_debug_assertions=True,
                                                  tf_log_level=20))
            states = environment.reset()

            # save: default checkpoint format
            weights0 = agent.model.policy.network.layers[1].weights.numpy()
            agent.save(directory=directory)
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)
            self.assertEqual(agent.timesteps, 1)
            agent.close()
            self.finished_test()

            # load: only directory
            agent = Agent.load(directory=directory, environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights0))
            self.assertEqual(agent.timesteps, 0)
            self.finished_test()

            # one timestep
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)

            # save: numpy format, append timesteps
            agent.save(directory=directory, format='numpy', append='timesteps')
            agent.close()
            self.finished_test()

            # load: numpy format and directory
            agent = Agent.load(directory=directory,
                               format='numpy',
                               environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights0))
            self.assertEqual(agent.timesteps, 1)
            self.finished_test()

            # one timestep
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)

            # save: numpy format, append timesteps
            agent.save(directory=directory, format='numpy', append='timesteps')
            agent.close()
            self.finished_test()

            # load: numpy format and directory
            agent = Agent.load(directory=directory,
                               format='numpy',
                               environment=environment)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights0))
            self.assertEqual(agent.timesteps, 2)
            self.finished_test()

            # one episode
            while not terminal:
                actions = agent.act(states=states)
                states, terminal, reward = environment.execute(actions=actions)
                agent.observe(terminal=terminal, reward=reward)

            # save: hdf5 format, filename, append episodes
            weights1 = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(not np.allclose(weights1, weights0))
            self.assertEqual(agent.episodes, 1)
            agent.save(directory=directory,
                       filename='agent2',
                       format='hdf5',
                       append='episodes')
            agent.close()
            self.finished_test()

            # env close
            environment.close()

            # differing agent config: update, parallel_interactions
            # TODO: episode length, others?
            environment = Environment.create(
                environment=self.environment_spec())

            # load: filename (hdf5 format implicit)
            update['batch_size'] = 2
            agent = Agent.load(directory=directory,
                               filename='agent2',
                               environment=environment,
                               update=update,
                               parallel_interactions=2)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights1))
            self.assertEqual(agent.episodes, 1)
            agent.close()
            self.finished_test()

            # load: tensorflow format (filename explicit)
            # TODO: parallel_interactions=2 should be possible, but problematic if all variables are
            # saved in checkpoint format
            agent = Agent.load(directory=directory,
                               format='checkpoint',
                               environment=environment,
                               update=update,
                               parallel_interactions=1)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights0))
            self.assertEqual(agent.timesteps, 0)
            self.assertEqual(agent.episodes, 0)
            agent.close()
            self.finished_test()

            # load: numpy format, full filename including timesteps suffix
            agent = Agent.load(directory=directory,
                               filename='agent-1',
                               format='numpy',
                               environment=environment,
                               update=update,
                               parallel_interactions=2)
            x = agent.model.policy.network.layers[1].weights.numpy()
            self.assertTrue(np.allclose(x, weights0))
            self.assertEqual(agent.timesteps, 1)
            self.assertEqual(agent.episodes, 0)
            self.finished_test()

            # three episodes (due to batch_size change, mismatch with loaded internal last_update)
            for _ in range(3):
                states = environment.reset()
                terminal = False
                while not terminal:
                    actions = agent.act(states=states)
                    states, terminal, reward = environment.execute(
                        actions=actions)
                    agent.observe(terminal=terminal, reward=reward)
            self.assertEqual(agent.updates, 1)

            # save: saved-model format, append updates
            agent.save(directory=directory,
                       format='saved-model',
                       append='updates')
            agent.close()

            # saved-model functions
            def batch(x):
                return np.expand_dims(x, axis=0)

            def unbatch(x):
                if isinstance(x, tf.Tensor):
                    x = x.numpy()
                if x.shape == (1, ):
                    return x.item()
                else:
                    return np.squeeze(x, axis=0)

            def recursive_map(function, dictionary):
                mapped = dict()
                for key, value in dictionary.items():
                    if isinstance(value, dict):
                        mapped[key] = recursive_map(function, value)
                    else:
                        mapped[key] = function(value)
                return mapped

            # load: saved-model format
            agent = tf.saved_model.load(
                export_dir=os.path.join(directory, 'agent-1'))

            # one episode
            states = environment.reset()
            internals = agent.initial_internals()
            internals = recursive_map(batch, internals)
            terminal = False
            while not terminal:
                auxiliaries = dict(int_action=dict(
                    mask=batch(states.pop('int_action_mask'))))
                states = recursive_map(batch, states)
                actions_internals = agent.act(states, internals, auxiliaries,
                                              False)
                actions = actions_internals['actions']
                internals = actions_internals['internals']
                actions = recursive_map(unbatch, actions)
                states, terminal, _ = environment.execute(actions=actions)

            environment.close()

            # saved-model format with singleton state/action, no internals, no masking
            policy = dict(
                network=dict(type='auto', size=8, depth=1, rnn=False))
            update = dict(unit='episodes', batch_size=1)
            baseline = dict(
                network=dict(type='auto', size=7, depth=1, rnn=False))
            agent, environment = self.prepare(states=dict(type='float',
                                                          shape=(),
                                                          min_value=1.0,
                                                          max_value=2.0),
                                              actions=dict(type='float',
                                                           shape=(),
                                                           min_value=1.0,
                                                           max_value=2.0),
                                              policy=policy,
                                              update=update,
                                              baseline=baseline,
                                              config=dict(
                                                  eager_mode=False,
                                                  create_debug_assertions=True,
                                                  tf_log_level=20))

            # one episode
            states = environment.reset()
            terminal = False
            while not terminal:
                actions = agent.act(states=states)
                states, terminal, reward = environment.execute(actions=actions)
                agent.observe(terminal=terminal, reward=reward)
            self.assertEqual(agent.updates, 1)

            # save: saved-model format, append updates
            agent.save(directory=directory,
                       format='saved-model',
                       append='updates')
            agent.close()

            # load: saved-model format
            agent = tf.saved_model.load(
                export_dir=os.path.join(directory, 'agent-1'))

            # one episode
            states = environment.reset()
            terminal = False
            while not terminal:
                states = batch(states)
                actions = agent.act(states, True)
                actions = unbatch(actions)
                states, terminal, _ = environment.execute(actions=actions)

            environment.close()

            files = set(os.listdir(path=directory))
            self.assertTrue(
                files == {
                    'agent.json', 'agent-1', 'agent-1.data-00000-of-00001',
                    'agent-1.index', 'agent-1.npz', 'agent2.json',
                    'agent-2.npz', 'agent2-1.hdf5', 'checkpoint'
                })
            files = set(os.listdir(path=os.path.join(directory, 'agent-1')))
            self.assertTrue(files == {'assets', 'saved_model.pb', 'variables'})
            files = set(
                os.listdir(
                    path=os.path.join(directory, 'agent-1', 'variables')))
            self.assertTrue(
                files == {'variables.data-00000-of-00001', 'variables.index'})

        self.finished_test()
示例#23
0
    def test_config_extended(self):
        self.start_tests(name='config extended')

        # filename
        saver = dict(directory=self.__class__.directory, filename='test')
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()

        agent = Agent.load(directory=self.__class__.directory,
                           filename='test',
                           environment=environment)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'test.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # frequency
        saver = dict(directory=self.__class__.directory, frequency=1)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        time.sleep(1)
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        time.sleep(1)
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.'), filename
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # load filename
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        saver = dict(directory=self.__class__.directory, load='agent-0')
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
示例#24
0
    def create_agent(
        self,
        env,
        n_episodes,
        save_frenquency,
        load=False,
    ):
        ########### WORK NEEDED ###########
        ### You need to tweak the Agent ###
        ###################################
        """
        Agent definition. Tweak the Agent's parameters to your convenience

        Use any agent from tensorforce and refer to the documentation for the available hyperparameters :
        -Vanilla Policy Gradient : https://tensorforce.readthedocs.io/en/latest/agents/vpg.html
        -Proximal Policy Optimization : https://tensorforce.readthedocs.io/en/latest/agents/ppo.html
        -Trust-Region Policy Optimization : https://tensorforce.readthedocs.io/en/latest/agents/trpo.html
        -Deterministic Policy Gradient : https://tensorforce.readthedocs.io/en/latest/agents/dpg.html
        -Deep Q-Network : https://tensorforce.readthedocs.io/en/latest/agents/dqn.html
        -Double DQN : https://tensorforce.readthedocs.io/en/latest/agents/double_dqn.html
        -Dueling DQN : https://tensorforce.readthedocs.io/en/latest/agents/dueling_dqn.html
        -Actor-Critic : https://tensorforce.readthedocs.io/en/latest/agents/ac.html
        -Advantage Actor-Critic : https://tensorforce.readthedocs.io/en/latest/agents/a2c.html

        For the network parameters :
        https://tensorforce.readthedocs.io/en/latest/modules/networks.html


        """
        ##### Agent definition ########
        if not (load):
            print(f"INIT AGENT.")
            agent = Agent.create(
                agent="ppo",
                states={
                    'type':
                    'float',
                    'shape': (10, ),
                    'min_value': [
                        -1.000e+00, -2.000e+00, -1.000e+00, -1.000e+00,
                        -1.000e+00, -1.280e+00, -3.400e+00, -9.999e+03,
                        -9.999e+03, -9.999e+03
                    ],
                    'max_value': [
                        1.000e+00, 2.000e+00, 1.000e+00, 1.000e+00, 1.000e+00,
                        1.000e+00, 3.600e+00, 9.999e+03, 9.999e+03, 9.999e+03
                    ]
                },
                actions=dict(
                    gimbal=dict(type='int', shape=1, num_values=5),
                    throttle=dict(type='int', shape=1, num_values=5),
                    side_booster=dict(type='int', shape=1, num_values=5),
                ),
                max_episode_timesteps=100000,
                batch_size=8,
                discount=0.99,
                exploration=0.01,
                #entropy_regularization=1e-3,
                #l2_regularization=1e-3,
                learning_rate=5e-4,
                config=dict(name="ppo_agent_V3"),
                saver=dict(
                    directory="data/checkpoints",
                    frequency=10  # save checkpoint every 10 updates
                ),  # don't change this
                # environment=env,
            )

        else:
            print(f"RELOADING AGENT.")
            agent = Agent.load(directory="data/checkpoints",
                               filename="ppo_agent_V3")
        return agent
示例#25
0
    def test_config(self):
        # FEATURES.MD
        self.start_tests(name='config')

        # default
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        agent.close()

        agent = Agent.load(directory=self.__class__.directory,
                           environment=environment)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # single then parallel and different episode length
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(memory=50,
                                          update=dict(unit='episodes',
                                                      batch_size=1),
                                          saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        agent, environment = self.prepare(update=dict(unit='episodes',
                                                      batch_size=1),
                                          saver=saver,
                                          max_episode_timesteps=7,
                                          parallel_interactions=2)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # no load
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        saver = dict(directory=self.__class__.directory, load=False)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
示例#26
0
    def test_explicit(self):
        # FEATURES.MD
        self.start_tests(name='explicit')

        # Remove directory if exists
        if os.path.exists(path=self.__class__.directory):
            for filename in os.listdir(path=self.__class__.directory):
                os.remove(path=os.path.join(self.__class__.directory, filename))
            os.rmdir(path=self.__class__.directory)

        agent, environment = self.prepare(memory=50, update=dict(unit='episodes', batch_size=1))
        states = environment.reset()

        # save: default tensorflow format
        weights0 = agent.get_variable(variable='policy/policy-network/dense0/weights')
        agent.save(directory=self.__class__.directory)
        agent.close()
        self.finished_test()

        # load: only directory
        agent = Agent.load(directory=self.__class__.directory, environment=environment)
        x = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertTrue((x == weights0).all())
        self.assertEqual(agent.timesteps, 0)
        self.finished_test()

        # one timestep
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        # save: numpy format, append timesteps
        weights1 = agent.get_variable(variable='policy/policy-network/dense0/weights')
        agent.save(directory=self.__class__.directory, format='numpy', append='timesteps')
        agent.close()
        self.finished_test()

        # load: numpy format and directory
        agent = Agent.load(
            directory=self.__class__.directory,  format='numpy', environment=environment
        )
        x = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertTrue((x == weights1).all())
        self.assertEqual(agent.timesteps, 1)
        self.finished_test()

        # one timestep
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        # save: numpy format, append timesteps
        weights2 = agent.get_variable(variable='policy/policy-network/dense0/weights')
        agent.save(directory=self.__class__.directory, format='numpy', append='timesteps')
        agent.close()
        self.finished_test()

        # load: numpy format and directory
        agent = Agent.load(
            directory=self.__class__.directory, format='numpy', environment=environment
        )
        x = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertTrue((x == weights2).all())
        self.assertEqual(agent.timesteps, 2)
        self.finished_test()

        # one episode
        while not terminal:
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)

        # save: hdf5 format, filename, append episodes
        weights3 = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertFalse(not (weights3 == weights2).all())
        agent.save(
            directory=self.__class__.directory, filename='agent2', format='hdf5', append='episodes'
        )
        agent.close()
        self.finished_test()

        # env close
        environment.close()

        # differing agent config: episode length, update, parallel_interactions
        environment = Environment.create(environment=self.environment_spec(max_episode_timesteps=7))

        # load: filename (hdf5 format implicit)
        agent = Agent.load(
            directory=self.__class__.directory, filename='agent2', environment=environment,
            update=dict(unit='episodes', batch_size=2), parallel_interactions=2
        )
        x = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertTrue((x == weights3).all())
        self.assertEqual(agent.episodes, 1)
        agent.close()
        self.finished_test()

        # load: tensorflow format (filename explicit)
        agent = Agent.load(
            directory=self.__class__.directory, format='tensorflow', environment=environment,
            update=dict(unit='episodes', batch_size=2), parallel_interactions=2
        )
        x = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertTrue((x == weights0).all())
        self.assertEqual(agent.timesteps, 0)
        self.assertEqual(agent.episodes, 0)
        agent.close()
        self.finished_test()

        # load: numpy format, full filename including timesteps suffix
        agent = Agent.load(
            directory=self.__class__.directory, filename='agent-1', format='numpy',
            environment=environment, update=dict(unit='episodes', batch_size=2),
            parallel_interactions=2
        )
        x = agent.get_variable(variable='policy/policy-network/dense0/weights')
        self.assertTrue((x == weights1).all())
        self.assertEqual(agent.timesteps, 1)
        self.assertEqual(agent.episodes, 0)
        agent.close()
        self.finished_test()

        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.meta'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.npz'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.npz'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent2.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent2-1.hdf5'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
from Normal import moving_average
from Normal import prohibition_parameter
from Normal import prohibition_position
from Normal import environment

#training and evaluation with boundary
reward_record_average = np.zeros(
    (len(prohibition_position), len(prohibition_parameter),
     len(measure_length)))
reward_record = np.zeros(
    (len(prohibition_position), len(prohibition_parameter), episode_number))
evaluation_reward_record = np.zeros(
    (len(prohibition_position), len(prohibition_parameter),
     evaluation_episode_number))

coach = Agent.load(directory='Walker_RL', format='numpy')
internals = coach.initial_internals()

for k in range(len(prohibition_position)):
    #training
    for i in range(len(prohibition_parameter)):
        record = []
        agent = Agent.create(agent='agent.json', environment=environment)
        print(
            'training agent with boundary position at %s and prohibitive parameter %s'
            % (prohibition_position[k], prohibition_parameter[i]))
        for _ in tqdm(range(episode_number)):
            episode_reward = 0
            states = environment.reset()
            terminal = False
            while not terminal:
示例#28
0
    def test_config(self):
        # FEATURES.MD
        self.start_tests(name='config')

        # Remove directory if exists
        if os.path.exists(path=self.__class__.directory):
            for filename in os.listdir(path=self.__class__.directory):
                os.remove(path=os.path.join(self.__class__.directory, filename))
            os.rmdir(path=self.__class__.directory)

        # default
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        agent.close()

        agent = Agent.load(directory=self.__class__.directory, environment=environment)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # no load
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        saver = dict(directory=self.__class__.directory, load=False)
        agent, environment = self.prepare(saver=saver)

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
示例#29
0
from tensorforce import Agent, Environment
import matplotlib.pyplot as plt
import numpy as np
import math
import pickle
from tqdm import tqdm
import gym
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from gym.wrappers import Monitor

#environment = Environment.create(environment='gym', level='InvertedPendulum-v2')
environment = gym.make('InvertedPendulum-v2')
RL = Agent.load(directory='model5', format='numpy')
internals = RL.initial_internals()
actions_record = []
theta_states = []
for k in range(1):
    states = environment.reset()
    terminal = False
    integrals = 0
    while not terminal:
        #environment.render()
        integrals += states[1]
        temp = [states[1], integrals, states[3]]
        theta_states.append(temp)
        actions, internals = RL.act(states=states,
                                    internals=internals,
                                    independent=True,
                                    deterministic=True)
示例#30
0
from tensorforce import Agent, Environment
import matplotlib.pyplot as plt
import numpy as np
import math
import pickle
from tqdm import tqdm
import gym

test_episodes=10

ip_pid_episode_record=[]
ip_rl_episode_record=[]
ip_rl = Agent.load(directory='Inverted_Pendulum_RL', format='numpy')
internals = ip_rl.initial_internals()
environment = gym.make('InvertedPendulum-v2')
environment_rl = Environment.create(environment='gym', level='InvertedPendulum-v2')

kp=25
kd=2.3

for i in range(test_episodes):

	episode_reward=0
	states = environment.reset()
	terminal=False
	while not terminal:
		actions = kp*states[1]+kd*states[3]
		states, reward, terminal,info = environment.step(actions)
		episode_reward+=reward
	ip_pid_episode_record.append(episode_reward)