def train(self):
        MiscUtils.rm_hist()

        for epi in range(20000):
            s = self.sim.reset()
            if self.best_score > Config.max_fitness():
                break

            while 1:
                a = self.renet.choose_action(s)
                s_, done, r = self.sim.step(a)

                self.renet.store_transition(s, a, r, s_)

                if self.renet.memory_counter > MEM_CAP:
                    self.renet.learn()
                    if done:
                        print('episode: {} score: {}'.format(
                            epi, self.sim.travel_range))
                        if self.sim.travel_range > self.best_score:
                            self.best_score = self.sim.travel_range
                            print('*' * 20)
                            print('New best score! score: {}'.format(
                                self.best_score))
                            print('*' * 20)
                            self.sim.save_gif()

                if done:
                    break
                s = s_

        MiscUtils.finish_info()
    def train(self):
        def plot():
            plt.cla()
            if len(self.range_hist) > 2000:
                self.range_hist.pop(0)
            self.range_hist.append(self.sim.travel_range)

            plt.plot(range(len(self.range_hist)),
                     self.range_hist,
                     linewidth=0.5)
            plt.plot(range(len(self.range_hist)), self.range_hist, 'b^-')
            plt.xlabel('Testing number')
            plt.ylabel('Fitness')
            plt.pause(0.01)
            if self.sim.travel_range > Config.max_fitness():
                plt.savefig('res/rl_statistics.png')

        MiscUtils.rm_hist()

        print('*' * 50)
        print('Gathering experience...')
        print('*' * 50)

        for epi in range(20000):
            s = self.sim.reset()
            if self.best_score > Config.max_fitness():
                break

            while 1:
                a = self.renet.choose_action(s)
                s_, done, r = self.sim.step(a)

                self.renet.store_transition(s, a, r, s_)

                if self.renet.memory_counter > MEM_CAP:
                    self.renet.learn()
                    if done:
                        plot()
                        print('episode: {} score: {}'.format(
                            epi, self.sim.travel_range))
                        if self.sim.travel_range > self.best_score:
                            self.best_score = self.sim.travel_range
                            print('*' * 20)
                            print('New best score! score: {}'.format(
                                self.best_score))
                            print('*' * 20)
                            self.sim.save_gif()

                if done:
                    break
                s = s_

        MiscUtils.finish_info()
    def train(self):
        MiscUtils.rm_hist()


        local_dir = os.path.dirname(__file__)
        config_path = os.path.join(local_dir, 'config-feedforward')
        config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                             neat.DefaultSpeciesSet, neat.DefaultStagnation,
                             config_path)
        p = neat.Population(config)
        p.add_reporter(neat.StdOutReporter(True))
        stats = neat.StatisticsReporter()
        p.add_reporter(stats)
        p.add_reporter(neat.Checkpointer(50))
        winner = p.run(self.eval_genomes, 5000)

        MiscUtils.finish_info()
示例#4
0
    def train(self):

        MiscUtils.rm_hist()

        print('*' * 50)
        print('Starting RF Learning')
        print('*' * 50)

        for epi in range(2000000):

            radar_data = self.sim.reset()

            if self.best_score > self.max_fitness:
                break

            while True:
                action = self.renet.choose_action(radar_data)

                radar_data_, done, r = self.sim.step(action)

                self.renet.store_transition(radar_data, action, r, radar_data_)

                if self.renet.memory_counter > MEM_CAP:
                    self.renet.learn()

                    if done:

                        print('episode: {} score: {}'.format(
                            epi, self.sim.travel_range))
                        if self.sim.travel_range > self.best_score:
                            self.best_score = self.sim.travel_range
                            print('*' * 20)
                            print('New best score! score: {}'.format(
                                self.best_score))
                            print('*' * 20)
                            self.sim.save_gif()

                if done:
                    break
                radar_data = radar_data_