Пример #1
0
    def load_implementation(self, directory: str):
        """Loads a previously trained and saved actor policy from directory.

        The loaded policy may afterwards be used by calling play().

        Args:
            directory: the directory containing the trained policy.
        """
        assert directory
        assert os.path.isdir(directory)
        self._agent = Agent.load(directory)
    def restore_agent(self, directory: str, filename: str = None):
        """Deserialize the strategy's learning agent from a file.

        Arguments:
            directory: The `str` path of the directory the agent checkpoint is stored in.
            filename (optional): The `str` path of the file the agent specification is stored in.
                The `.json` file extension will be automatically appended if not provided.
        """
        self._agent = Agent.load(directory, filename=filename)

        self._runner = Runner(agent=self._agent, environment=self._environment)
Пример #3
0
    def test_explicit(self):
        self.start_tests(name='explicit')

        # default
        agent, environment = self.prepare()

        agent.initialize()
        states = environment.reset()

        agent.save(directory=self.__class__.directory)
        agent.close()

        agent = Agent.load(directory=self.__class__.directory)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # single then parallel
        agent, environment = self.prepare(memory=50,
                                          update=dict(unit='episodes',
                                                      batch_size=1))

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.save(directory=self.__class__.directory)
        agent.close()
        environment.close()

        agent, environment = self.prepare(timestep_range=(6, 10),
                                          update=dict(unit='episodes',
                                                      batch_size=1),
                                          parallel_interactions=2)

        agent.restore(directory=self.__class__.directory)
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
Пример #4
0
    def test_config(self):
        # FEATURES.MD
        self.start_tests(name='config')

        # default
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        agent.close()

        agent = Agent.load(directory=self.__class__.directory)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # single then parallel
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(memory=50,
                                          update=dict(unit='episodes',
                                                      batch_size=1),
                                          saver=saver)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        agent, environment = self.prepare(timestep_range=(6, 10),
                                          update=dict(unit='episodes',
                                                      batch_size=1),
                                          saver=saver,
                                          parallel_interactions=2)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # no load
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        saver = dict(directory=self.__class__.directory, load=False)
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
Пример #5
0
    def test_config_extended(self):
        self.start_tests(name='config extended')

        # filename
        saver = dict(directory=self.__class__.directory, filename='test')
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()

        agent = Agent.load(directory=self.__class__.directory, filename='test')

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'test.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # frequency
        saver = dict(directory=self.__class__.directory, frequency=1)
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        time.sleep(1)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        time.sleep(1)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.'), filename
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # load filename
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        saver = dict(directory=self.__class__.directory, load='agent-0')
        agent, environment = self.prepare(saver=saver)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
Пример #6
0
 def load(self, model_name):
     print("Loading model...")
     self.ppo_agent = Agent.load(directory=model_name, format='hdf5')
Пример #7
0
    def test_config(self):
        # FEATURES.MD
        self.start_tests(name='config')

        # default
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)
        agent.initialize()
        states = environment.reset()

        agent.close()

        agent = Agent.load(directory=self.__class__.directory)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # parallel then single
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(update=dict(unit='episodes',
                                                      batch_size=1),
                                          saver=saver,
                                          parallel_interactions=2)

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()

        agent, _ = self.prepare(saver=saver)
        agent.initialize()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # filename
        saver = dict(directory=self.__class__.directory, filename='test')
        agent, environment = self.prepare(saver=saver)
        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()

        agent = Agent.load(directory=self.__class__.directory, filename='test')

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'test.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # frequency
        saver = dict(directory=self.__class__.directory, frequency=1)
        agent, environment = self.prepare(saver=saver)
        agent.initialize()
        states = environment.reset()

        time.sleep(1)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        time.sleep(1)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-2.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.'), filename
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # no load
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)
        restored_agent = copy.deepcopy(agent)
        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()

        restored_agent.model.saver_spec['load'] = False
        restored_agent.initialize()

        actions = restored_agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        # restored_agent.observe(terminal=terminal, reward=reward)

        restored_agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # load filename
        saver = dict(directory=self.__class__.directory)
        agent, environment = self.prepare(saver=saver)
        restored_agent = copy.deepcopy(agent)
        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()

        restored_agent.model.saver_spec['load'] = 'agent-0'
        restored_agent.initialize()

        actions = restored_agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        # restored_agent.observe(terminal=terminal, reward=reward)

        restored_agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-0.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta'))
        for filename in os.listdir(path=self.__class__.directory):
            os.remove(path=os.path.join(self.__class__.directory, filename))
            assert filename.startswith('events.out.tfevents.')
            break
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
Пример #8
0
    "b": -5.0e2,
    "D": 0.0e-2,
    "x0": None
}

# Path to save the figure
#fig_path = 'figures/RLControl_De_0_Dt_0_dta_0p05_ep50_dense_12_gre_1_wne_1_grt_1_wnt_1_r_ma1em1.png'
fig_path = None

# Create instance of complex Stuart-Landau equation environment
environment = Langevin2D_Env()
environment.env_params = env_params

# Load agent
agent = Agent.load(
    directory=
    "./agents/saver_data_D_0_dta_0p05_maxa_1_ep100_lstm2_6_gr_1_wn_1_r_ma1em2",
    filename='agent')

###############################################################################
#       EVALUTATIONS
###############################################################################

# Set up control time with reference to simulation time
dt_action = 0.05
dt = environment.env_params["dt"]
T = environment.env_params["T"]
n_env_steps = int(dt_action / dt)
n_actions = int(T / dt / n_env_steps)

# Initiate environment to initial state
time = np.zeros((environment.max_episode_timesteps()))
Пример #9
0
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--theory", help="select an agent type [ppo, vpg, dqn]")
args = parser.parse_args()

# Create an OpenAI-Gym environment
environment = Environment.create(environment='cenv.CustomEnvironment',
                                 max_episode_timesteps=100)

agent_directory = './' + args.theory + '_agent'

#Load the Agent Previously Saved
util_agent = Agent.load(directory='./util_agent',
                        format='numpy',
                        environment=environment)

deon_agent = Agent.load(directory='./deon_agent',
                        format='numpy',
                        environment=environment)

agent = util_agent

#Set up the Environment
environment.reset()

while True:
    #Get the Current Dilemma for Output and wait for human input
    dilemma = environment.getCurrentDilemma()
    input("\n ***Press Enter For Next Dilemma*** \n")
Пример #10
0
def test(flags):
    rospy.init_node("rl_algorithm", anonymous=True)
    main_state = State()
    config_file = open('configs/config.json')
    main_state.configs = json.load(config_file)
    config_file.close()
    #Publishers
    drive_announce = rospy.Publisher(main_state.configs['CONTROL_TOPIC'],
                                     AckermannDriveStamped,
                                     queue_size=1)
    reset_announce = rospy.Publisher(main_state.configs['RESET_TOPIC'],
                                     Bool,
                                     queue_size=1)
    main_state.st_display_pub = rospy.Publisher(
        main_state.configs['ST_DISPLAY_TOPIC'], Marker, queue_size=10)
    laser_listen = rospy.Subscriber(main_state.configs['LASER_TOPIC'],
                                    LaserScan,
                                    parser.laser_parser,
                                    main_state,
                                    queue_size=1)
    odom_listen = rospy.Subscriber(main_state.configs['ODOM_TOPIC'],
                                   Odometry,
                                   parser.odom_parser,
                                   main_state,
                                   queue_size=1)

    #Flags for testing
    if (flags.steps):
        test_steps = flags.steps
    else:
        print("The number of steps must be specified")
        return
    if (flags.verbose):
        main_state.verbose = True

    environment = PD_Environment(reset_announce, drive_announce, main_state)
    environment.publish_markers()

    #Initialize agent
    #TODO: Consolidate into configs
    agent = Agent.create(agent="ppo",
                         network=custom_network(),
                         batch_size=10,
                         parallel_interactions=8,
                         environment=environment,
                         max_episode_timesteps=2000,
                         tracking="all")
    if (flags.load):
        files = flags.load.split('/')
        if (len(files) > 1):
            agent = Agent.load(directory=files[0],
                               filename=files[1],
                               environment=environment,
                               agent=agent)
        else:
            agent = Agent.load(directory=flags.load,
                               environment=environment,
                               agent=agent)
    else:
        print("A load file must be specified")
        return

    #Define the tracking tensor names
    ST_TENSOR = 'agent/policy/turning_angle_distribution/probabilities'
    for i in range(test_steps):
        num_steps = 0
        environment.reset()
        states = parser.assemble_state(main_state)
        done = False
        main_state.crash_det = False
        main_state.lap_finish = False

        while not done and num_steps < 2000:
            num_steps += 1
            actions = agent.act(states, independent=True)
            all_probs = agent.tracked_tensors()
            parser.publish_steering_prob(all_probs[ST_TENSOR],
                                         main_state.st_display_pub,
                                         main_state.cur_steer)
            states, done, reward = environment.execute(actions=actions)
            col_detect(main_state)
            if (num_steps < 10):
                done = False
            if (main_state.crash_det):
                print("Crashed")
            if (main_state.lap_finish):
                print("Lap finished")
        print("Episode {} done after {}".format(i, num_steps))
Пример #11
0
def train(flags):
    main_state = State()
    #Load config
    config_file = open('configs/config.json')
    main_state.configs = json.load(config_file)
    config_file.close()
    #Accept flag params
    if (flags.steps):
        train_steps = flags.steps
    else:
        train_steps = main_state.configs["NUM_RUNS_TOT"]
    if (flags.save):
        save_file = flags.save
    else:
        save_file = main_state.configs["MODEL_DIR"]
    if (flags.verbose):
        main_state.verbose = True
    if (not flags.lap_time):
        main_state.default_reward = 0.01
    if (flags.entropy):
        main_state.entropy_reg = flags.entropy
    else:
        main_state.entropy_reg = main_state.configs["DEF_ENTROPY"]
    if (flags.ds_reward):
        main_state.ds_reward = True
    else:
        main_state.ds_reward = False
    environments = list()
    for i in range(8):
        environments.append(Gym_Environment(main_state))
    #Initialize the agent
    agent = Agent.create(agent = "ppo", network = custom_network(),\
        environment = environments[0], max_episode_timesteps=2000,\
        parallel_interactions = 8,\
        learning_rate = 0.002, summarizer = main_state.configs["SUM_DIR"],\
        batch_size = 10, entropy_regularization = main_state.entropy_reg)
    if (flags.load):
        files = flags.load.split('/')
        if (len(files) > 1):
            agent = Agent.load(directory = files[0], filename = files[1],\
                max_episode_timesteps = 2000, learning_rate = 0.002,\
                summarizer = main_state.configs["SUM_DIR"],\
                batch_size = 10,\
                environment = environments[0], agent = agent)
        else:
            agent = Agent.load(directory = files[0], environment = environments[0],\
                max_episode_timesteps = 2000, learning_rate = 0.002,\
                summarizer = main_state.configs["SUM_DIR"],\
                batch_size = 10,\
                agent = agent)
    runner = Runner(agent = agent, environments = environments, num_parallel = 8,\
        remote = 'multiprocessing')
    if (train_steps <= main_state.configs["SAVE_RUNS"]):
        runner.run(num_episodes=train_steps, batch_agent_calls=True)
        agent.save(save_file, format="checkpoint", append="episodes")
    else:
        for i in range(
                int((train_steps - 1) / main_state.configs["SAVE_RUNS"]) + 1):
            runner.run(num_episodes=main_state.configs["SAVE_RUNS"],
                       batch_agent_calls=True)
            agent.save(save_file, format="checkpoint", append="episodes")
Пример #12
0
def train_GUI(flags):
    #Initialize node
    rospy.init_node("rl_algorithm", anonymous=True)
    #Initialize subscribers for laser and odom
    main_state = State()
    #TODO: Put config file into flags

    #Load config
    config_file = open('configs/config.json')
    main_state.configs = json.load(config_file)
    config_file.close()
    #Subscribers and Publishers
    laser_listen = rospy.Subscriber(main_state.configs['LASER_TOPIC'],
                                    LaserScan,
                                    parser.laser_parser,
                                    main_state,
                                    queue_size=1)
    odom_listen = rospy.Subscriber(main_state.configs['ODOM_TOPIC'],
                                   Odometry,
                                   parser.odom_parser,
                                   main_state,
                                   queue_size=1)
    info_listen = rospy.Subscriber(main_state.configs['INFO_TOPIC'],
                                   RaceInfo,
                                   parser.info_parser,
                                   main_state,
                                   queue_size=1)
    drive_announce = rospy.Publisher(main_state.configs['CONTROL_TOPIC'],
                                     AckermannDriveStamped,
                                     queue_size=1)
    reset_announce = rospy.Publisher(main_state.configs['RESET_TOPIC'],
                                     Bool,
                                     queue_size=1)
    main_state.st_display_pub = rospy.Publisher(
        main_state.configs['ST_DISPLAY_TOPIC'], Marker, queue_size=10)
    #Publish True to reset_announce to reset the simulator

    #Accept flag params
    if (flags.steps):
        train_steps = flags.steps
    else:
        train_steps = main_state.configs["NUM_RUNS_TOT"]
    if (flags.save):
        save_file = flags.save
    else:
        save_file = main_state.configs["MODEL_DIR"]
    if (flags.verbose):
        main_state.verbose = True
    if (not flags.lap_time):
        main_state.default_reward = 0.01
    if (flags.entropy):
        main_state.entropy_reg = flags.entropy
    else:
        main_state.entropy_reg = main_state.configs["DEF_ENTROPY"]
    if (flags.ds_reward):
        main_state.ds_reward = True
    else:
        main_state.ds_reward = False

    # Initialize environment
    # TODO: Define max_episode_timesteps from CONFIG file
    #environment = Environment.create(
    #    environment=PD_Environment, max_episode_timesteps=100
    #)
    environment = PD_Environment(reset_announce, drive_announce, main_state)
    environment.publish_markers()

    # Initialize Agent
    agent = Agent.create(
        agent="ppo",
        network=custom_network(),
        batch_size=5,
        entropy_regularization=main_state.entropy_reg,
        environment=environment,
        max_episode_timesteps=2000,
        learning_rate=0.002,
        #tracking="all")
        tracking="all",
        summarizer=main_state.configs["SUM_DIR"])
    if (flags.load):
        files = flags.load.split('/')
        if (len(files) > 1):
            agent = Agent.load(directory=files[0],
                               filename=files[1],
                               environment=environment,
                               agent=agent)
        else:
            agent = Agent.load(directory=flags.load,
                               environment=environment,
                               agent=agent)
        print("Agent loaded from " + flags.load)
    #The agent network configuration could be printed with agent.get_architecture()

    # Run the save loop
    for i in range(
            int((train_steps - 1) / main_state.configs["SAVE_RUNS"]) + 1):
        run(environment, agent, main_state, main_state.configs["SAVE_RUNS"],
            10000, False)
        agent.save(save_file, format="checkpoint", append="episodes")
Пример #13
0
import os
import socket
import numpy as np
import csv
from collections import deque

from tensorforce.agents import Agent

saver_restore = os.getcwd() + "/saver_data/"

agent = Agent.load(directory = saver_restore)

# If folder does not exist, create it
if(not os.path.exists("frequency_response")):
        os.mkdir("frequency_response")

### System parameters ###

# Vortex shedding cycle
t_vs = 6.860
# Forcing sampling time
t_s = 1.0/100.0
# Action time of controller
t_a = 0.5

### Analysis parameters ###

length = 100
start_freq = 1/length
stop_freq = 10.0*(1/t_vs)
num_freqs = 50
Пример #14
0
    def __init__(self, server_address, name, chosen_team_index=0, output_path=None, rounds_to_play=1, log=False,
                 mode=Mode.TRAIN):
        super(Bot, self).__init__(server_address, name, chosen_team_index, rounds_to_play)

        self.mode = mode
        self.episode = 1
        self.stich_number = 0
        self.played_cards_in_game = []
        self.rejected_cards = []
        self.chose_trumpf = False

        self.out_of_color = np.zeros((3, 4), dtype=np.float)

        self.avg_stich_reward = 0
        self.rejected_per_session = 0
        self.avg_rejected_per_session = 0
        self.avg_trumpf_selection = np.ones(5, dtype=np.float) / 5
        self.avg_trumpf_points = np.zeros(5, dtype=np.float)
        self.avg_game_points = np.zeros(2, dtype=np.float)

        if log:
            self.log_game(output_path)
        model_path = os.path.join(output_path, 'checkpoints')
        trumpf_path = os.path.join(output_path, 'trumpf-checkpoints')

        if mode is Mode.TRAIN:
            os.makedirs(output_path, exist_ok=True)
            if os.path.exists(model_path):
                self.agent = Agent.load(model_path)
                self.trumpf_agent = Agent.load(trumpf_path)
            else:
                self.agent = Agent.create(agent='dqn',
                                          states=get_states(),
                                          actions=get_actions(),
                                          max_episode_timesteps=50,
                                          memory=50000,
                                          batch_size=32,
                                          target_sync_frequency=10,
                                          start_updating=10000,
                                          exploration=dict(
                                            type='decaying', decay='exponential', unit='episodes',
                                            num_steps=100000, initial_value=0.2, decay_rate=0.5),
                                          learning_rate=dict(
                                            type='decaying', decay='exponential', unit='episodes',
                                            num_steps=100000, initial_value=0.001, decay_rate=0.75),
                                          variable_noise=dict(
                                            type='decaying', decay='exponential', unit='episodes',
                                            num_steps=100000, initial_value=0.1, decay_rate=0.75),
                                          network=[
                                              [dict(type='retrieve', tensors=['cards']),
                                               dict(type='conv1d', size=512, window=9, stride=9, padding='valid'),
                                               dict(type='conv1d', size=256, window=1, stride=1, padding='valid'),
                                               dict(type='flatten'),
                                               dict(type='register', tensor='cards-embedding')],
                                              [dict(type='retrieve', tensors=['features']),
                                               dict(type='dense', size=64, activation='relu'),
                                               dict(type='register', tensor='features-embedding')],
                                              [dict(type='retrieve', aggregation='concat',
                                                    tensors=['cards-embedding', 'features-embedding']
                                               ),
                                               dict(type='dense', size=512, activation='relu'),
                                               dict(type='dense', size=256, activation='relu'),
                                               dict(type='dense', size=256, activation='relu')]
                                          ],
                                          discount=1.0,
                                          summarizer=dict(
                                            directory=os.path.join(output_path, "summary"),
                                            labels=['entropy', 'kl-divergence', 'loss', 'reward', 'update-norm']
                                          ),
                                          saver=dict(
                                            directory=model_path,
                                            frequency=SAVE_EPISODES  # save checkpoint every 100 updates
                                          )
                )
                self.trumpf_agent = Agent.create(agent='dqn',
                                      states=get_trumpf_states(),
                                      actions=get_trumpf_actions(),
                                      max_episode_timesteps=2,
                                      memory=2000,
                                      batch_size=32,
                                      target_sync_frequency=10,
                                      start_updating=200,
                                      exploration=dict(
                                          type='decaying', decay='exponential', unit='episodes',
                                          num_steps=30000, initial_value=0.2, decay_rate=0.5),
                                      learning_rate=dict(
                                          type='decaying', decay='exponential', unit='episodes',
                                          num_steps=30000, initial_value=0.001, decay_rate=0.75),
                                      variable_noise=dict(
                                          type='decaying', decay='exponential', unit='episodes',
                                          num_steps=100000, initial_value=0.1, decay_rate=0.75),
                                      network=[
                                          [dict(type='retrieve', tensors=['cards']),
                                           dict(type='conv1d', size=128, window=1, stride=1, padding='valid'),
                                           dict(type='conv1d', size=64, window=1, stride=1, padding='valid'),
                                           dict(type='flatten'),
                                           dict(type='dense', size=128, activation='relu'),
                                           dict(type='dense', size=64, activation='relu')]
                                      ],
                                      discount=1.0,
                                      summarizer=dict(
                                          directory=os.path.join(output_path, "summary/trumpf"),
                                          labels=['entropy', 'kl-divergence', 'loss', 'reward', 'update-norm']
                                      ),
                                      saver=dict(
                                          directory=trumpf_path,
                                          frequency=2
                                      )
                )
        else:
            self.agent = Agent.load(model_path)
            self.trumpf_agent = Agent.load(trumpf_path)
Пример #15
0
    def test_explicit_extended(self):
        self.start_tests(name='explicit extended')

        # filename
        agent, environment = self.prepare()

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.save(directory=self.__class__.directory, filename='test')
        agent.close()

        agent = Agent.load(directory=self.__class__.directory, filename='test')

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'test.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'test-1.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()

        # no timestep
        agent, environment = self.prepare()

        agent.initialize()
        states = environment.reset()

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.save(directory=self.__class__.directory, append_timestep=False)
        agent.close()

        agent = Agent.load(directory=self.__class__.directory)

        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        agent.close()
        environment.close()

        os.remove(path=os.path.join(self.__class__.directory, 'agent.json'))
        os.remove(path=os.path.join(self.__class__.directory, 'checkpoint'))
        os.remove(path=os.path.join(self.__class__.directory,
                                    'agent.data-00000-of-00001'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.index'))
        os.remove(path=os.path.join(self.__class__.directory, 'agent.meta'))
        os.rmdir(path=self.__class__.directory)

        self.finished_test()
Пример #16
0
 check_dir = directory + '/checkpoints/'
 print('\nTesting {dir}'.format(dir=directory))
 old_epochs = 27
 agent = Agent.load(
     directory=check_dir,
     filename='agent-{oe}'.format(oe=old_epochs - 1),
     format='hdf5',
     environment=environment,
     agent='ppo',
     network=[
         dict(type='lstm',
              size=lstm_units,
              horizon=lstm_horizon,
              activation='relu'),
     ],
     baseline=[
         dict(type='lstm',
              size=lstm_units,
              horizon=lstm_horizon,
              activation='relu')
     ],
     baseline_optimizer=dict(optimizer='adam',
                             learning_rate=baseline_lr),
     learning_rate=policy_lr,
     batch_size=batch_size,
     tracking=['distribution'],
     discount=discount,
     states=dict(features=dict(type=float, shape=(147, )), ),
     actions=dict(type=int, num_values=num_actions + num_classes))
 # Parameters for test loop
 episode = 0
 correct = 0
Пример #17
0
                         update_frequency=steps_per_episode)

    runner = Runner(agent=agent, environment=env)

    runner.run(num_episodes=num_episodes, evaluation=False)
    agent.save(directory='minigrid_checkpoints/{env}/'.format(env=env_name),
               filename='model-{ep}-{env}'.format(ep=num_episodes,
                                                  env=env_name))

    ########### TEST with visualization #############

    print('Testing agent')
    if env_name == 'MiniGrid-DistShift1-v0':
        env_name = 'MiniGrid-DistShift2-v0'
    env = wrappers.gym.make(env_name)
    env = wrappers.ImgObsWrapper(env)
    num_actions = env.action_space.n
    env = Environment.create(environment=env,
                             max_episode_timesteps=steps_per_episode,
                             states=dict(type='float', shape=obs_shape),
                             actions=dict(type='int', num_values=num_actions),
                             visualize=True)
    # Agent creation
    agent = Agent.load(
        directory='minigrid_checkpoints/{env}/'.format(env=env_name),
        filename='model-{ep}-{env}-1.data-00000-of-00001'.format(
            ep=num_episodes, env=env_name),
        environment=env)
    runner = Runner(agent=agent, environment=env)
    runner.run(num_episodes=50, evaluation=True)
Пример #18
0
 agent = Agent.load(
     directory=directory,
     filename='agent-{x}'.format(x=old_episodes),
     format='hdf5',
     environment=environment,
     agent='ppo',
     network=[
         # First module: shared dense block
         [
             dict(type='dense', size=64, activation='relu'),
             dict(type='dense', size=64, activation='relu'),
             dict(type='dense', size=64, activation='relu'),
             dict(type='lstm',
                  size=64,
                  horizon=steps_per_episode,
                  activation='relu'),
         ],
     ],
     baseline=[
         dict(type='dense', size=64, activation='relu'),
         dict(type='dense', size=64, activation='relu')
     ],
     baseline_optimizer=dict(optimizer='adam',
                             learning_rate=baseline_lr),
     learning_rate=policy_lr,
     batch_size=10,
     tracking=['distribution'],
     discount=0.99,
     states=dict(
         # 64 features + 3 positional coding
         features=dict(type=float, shape=(67, )), ),
     actions=dict(movement=dict(type=int, num_values=num_actions),
                  classification=dict(type=int,
                                      num_values=len(class_names))),
     entropy_regularization=0.01,
     # exploration=0.1
 )
Пример #19
0
    #    max_episode_timesteps=1e6
)
runner.run(num_episodes=num_episodes)

t_ag.save(directory='model-numpy', format='numpy', append='episodes')

# Close agent separately, since created separately

#runner.run(num_episodes=100, evaluation=True)
t_ag.close()
t_env.close()
runner.close()

#%% plot results
df = dl_utils.create_df_of_outputs(test_env.swmm_env, route_step)
fig_name = '3_using_runner_' + str(num_episodes) + '_episodes'
dl_utils.plt_key_states(fig_name, df, test_env.swmm_env)

#%% testing

#load trained model
train_env = dl_utils.custom_tensorflow_env(model_name='theta_test',
                                           threshold=threshold,
                                           scaling=scaling)
route_step = test_env.swmm_env.env.sim._model.getSimAnalysisSetting(
    tkai.SimulationParameters.RouteStep.value)
tr_env = Environment.create(environment=train_env)
t_ag = Agent.load(directory='model-numpy',
                  format='numpy',
                  environment=environment)