Пример #1
0
def get_agent(name):

    a = TpFinalPlayer(name)
    b = RandomPlayer('PC')
    game = TpFinalGame([a, b])
    print ('Training with a random player, please wait...')
    game.agents = [a, b]
    per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerB'])
    for i in range(3000):
        game.run()
    per.show_statistics()

    return a
Пример #2
0
                else:
                    return -1
        return 0


if __name__ == '__main__':
    a = TicTacToePlayer('X')
    b = RandomPlayer('O')
    c = HumanPlayer('O')
    game = TicTacToeGame([a, b])
    print ('Training with a random player, please wait...')
    game.agents = [a, b]
    for i in range(3000):
        game.run()

    a.dump('qlearner_agent')

    d = TicTacToePlayer.load('qlearner_agent')
    d.play_with = 'O'

    game.agents = [a, d]
    per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerD'])
    for i in range(3000):
        game.run()
    per.show_statistics()

    game.agents = [a, c]
    print ('Do you like to play?')
    game.run()
    print game.state
Пример #3
0
        self.last_agent = None
        random.shuffle(self.agents)
        self.run()

    def reward(self, state, agent):
        if self.is_completed(state):
            if self.last_agent is agent:
                return -1
            else:
                return 1
        return 0


def get_agent(name):
    res = Player(name)
    rand = RandomPlayer('random')
    env = TrainingEnvironment([res, rand])
    for i in range(3000):
        env.play()
    return res


if __name__ == '__main__':
    res = Player('name')
    rand = RandomPlayer('random')
    env = TrainingEnvironment([res, rand])
    per = PerformanceCounter(env.agents, ['q', 'random'])
    for i in range(3000):
        env.play()
    per.show_statistics()
Пример #4
0
    def is_completed(self, state):
        return state in self.rewards.keys()

    def reward(self, state, agent):
        return self.rewards.get(state, -0.08)


class WumpusProblem(RLProblem):

    def actions(self, state):
        actions = ['up', 'down', 'left', 'rigth']
        return actions


if __name__ == '__main__':
    agent = TDQLearner(WumpusProblem(),
                       temperature_function=make_exponential_temperature(1000, 0.01),
                       discount_factor=0.8)
    game = WumpusEnvironment(agent)

    p = PerformanceCounter([agent], ['Q-learner Agent'])

    print 'Training...'
    for i in range(10000):
        game.run()

    p.show_statistics()
    game.run(viewer=WumpusViewer(game))


Пример #5
0
    def is_completed(self, state):
        return state in self.rewards.keys()

    def make_reward(self, state, agent):
        return self.rewards.get(state, -0.08)


class GridProblem(RLProblem):

    def actions(self, state):
        actions = ['up', 'down', 'left', 'rigth']
        return actions

if __name__ == '__main__':
    agent = TDQLearner(GridProblem(),
                       temperature_function=make_exponential_temperature(1000, 0.005),
                       discount_factor=0.9)
    game = GridGame(agent)

    p = PerformanceCounter([agent], ['Q-learner Agent'])

    for i in range(3000):
        game.run()

    p.show_statistics()

    game.run(viewer=Viewer())