Exemplo n.º 1
0
 def test_evolution(self):
     p = Population(10, 3, 1)
     status = p.get_status()
     for s in status.keys():
         output = []
         for i in range(status.get(s, 0)):
             output.append(p.run(s, i, [1, 2, 3]))
             p.set_score(s, i, 1)
     p.evolve()
Exemplo n.º 2
0
    def test_flow(self):
        p = Population(10, 3, 1)
        for _ in range(100):
            status = p.get_status()
            for s in status.keys():
                output = []
                for i in range(status.get(s, 0)):
                    out = p.run(s, i, [1, 2, 3])[0]
                    output.append(out)
                    p.set_score(s, i, random.randrange(1, 10))
                print(s, output)
            p.evolve()

        # print a sample
        pr = Printer(p.population[next(iter(p.population))][0])
        pr.print()
Exemplo n.º 3
0
def train():
    env = gym.make('LunarLanderContinuous-v2')

    try:
        p = Population.load(FULLNAME)
        print('Existing state loaded')
    except FileNotFoundError as e:
        print(str(e) + '. Creating new state')
        p = Population(10000, env.observation_space.shape[0],
                       env.action_space.shape[0])

    while True:
        try:
            max_reward = -99999
            status = p.get_status()
            for s in status.keys():
                for i in range(status.get(s, 0)):
                    ob = env.reset()
                    reward_sum = 0
                    while True:
                        action = action_final_activation(p.run(s, i, ob))
                        ob, reward, done, info = env.step(action)
                        reward_sum = reward_sum + reward
                        if done:
                            break
                    p.set_score(s, i, reward_sum)
                    max_reward = np.max([reward_sum, max_reward])
            print(p.generation, max_reward, p.population.keys())

            try:
                p.save(AUTOSAVE)
            except RuntimeError as e:
                print('error saving: {}'.format(str(e)))

            p.evolve()

        except KeyboardInterrupt as e:

            try:
                print('\nsaving before exit')
                p.save(FULLNAME)
                sys.exit('Bye!')
            except RuntimeError as e:
                print('error saving: {}'.format(str(e)))
                min_position = np.min([min_position, ob[0]])
                reward_sum = reward_sum + reward
                if done:
                    break

            reward_sum = reward_sum + ((max_position + 1.2) - (min_position + 1.2))
            max_reward = np.max([reward_sum, max_reward])
            p.set_score(s, i, reward_sum)

            if max_position >= target_reward:
                winner = (s, i)
                break

        if max_position >= target_reward:
            break
    print('Generation: {} Score: {} Max position: {} Population: {}'.format(p.generation, reward_sum, max_position, p.population.keys()))
    if max_position >= target_reward:
        break
    p.evolve()

print('Species {} is the winner'.format(winner[0]))

ob = env.reset()

while True:
    action = action = p.run(winner[0], winner[1], ob)
    ob, reward, done, info = env.step(np.argmax(action))
    env.render()
    if done:
        ob = env.reset()