示例#1
0
class PacMan:
    def __init__(self, num_frames, radius, level):
        self.game = Game(level, radius)
        self.direction_dict = dict(enumerate(Direction))

        self.num_channels, height, width = self.game.array.shape
        self.state_shape = (self.num_channels * num_frames, height, width)
        self.num_frames = num_frames
        self.num_actions = len(Direction)
        self.frames = []
        self.radius = radius

    @property
    def score(self):
        return self.game.score

    @property
    def won(self):
        return self.game.state is Game.State.WON

    def frame(self):
        y, x = self.game.pacman

        frame = self.game.array
        frame = frame[:, y - self.radius:y + self.radius + 1]
        frame = frame[:, :, x - self.radius:x + self.radius + 1]
        frame = frame.astype(np.float32).tolist()

        return frame

    def render(self):
        print(self.game)

    def reset(self):
        self.game.reset(self.radius)

        self.frames = self.frame()
        for _ in range(self.num_frames - 1):
            self.game.step(self.direction_dict[0])
            self.frames += self.frame()

        return self.frames

    def reward(self, rewards):
        return (10.0 if rewards.food else -2.25 + 2.5 *
                (self.game.state is Game.State.ACTIVE) +
                22.5 * rewards.powerup + 75.0 * rewards.ghost - 65.0 *
                (self.game.state is Game.State.LOST))

    def step(self, action):
        direction = self.direction_dict[action]

        rewards = self.game.step(direction)

        end = self.game.state in (Game.State.WON, Game.State.LOST)
        self.frames[:self.num_channels] = []
        self.frames += self.frame()
        rewards = self.reward(rewards)

        return end, self.frames, rewards
示例#2
0
def play_deep_q_model(level='level-0', model_path='./nn_model_level_0_2k_iter.h5'):
    dq_model = DeepQ(level)
    dq_model.model = load_model(model_path)

    def ai_func(current_game_state):
        return dq_model.pick_optimal_action(current_game_state)

    game = Game(level, init_screen=True, ai_function=ai_func)
    game.run()
示例#3
0
def play_q_learning_model(level='level-0', model_path='./q_table.pkl'):
    q_model = QLearn()
    q_model.q_table = load_pickle(model_path)

    def ai_func(current_game_state):
        return q_model.pick_optimal_action(current_game_state, printing=False)

    game = Game(level, init_screen=True, ai_function=ai_func)
    game.run()
示例#4
0
def run_with_game_loop(level='level-2', model_path='./nn_model4500.h5'):
    dq_model = DeepQ(level)
    dq_model.model = load_model(model_path)

    def ai_func(current_game_state):
        return dq_model.pick_optimal_action(current_game_state)

    game = Game(level, init_screen=True, ai_function=ai_func)
    game.run()
示例#5
0
def run_with_game_loop(level='level-0', model_path='./q_table.pkl'):

    q_model = QLearn()
    q_model.q_table = load_pickle(model_path)

    def ai_func(current_game_state):
        return q_model.pick_optimal_action(current_game_state)

    game = Game(level, init_screen=True, ai_function=ai_func)
    game.run()
示例#6
0
    def __init__(self, num_frames, radius, level):
        self.game = Game(level, radius)
        self.direction_dict = dict(enumerate(Direction))

        self.num_channels, height, width = self.game.array.shape
        self.state_shape = (self.num_channels * num_frames, height, width)
        self.num_frames = num_frames
        self.num_actions = len(Direction)
        self.frames = []
        self.radius = radius
示例#7
0
    def train(self, level='level-0', num_episodes=10):
        game = Game(level)
        discount = 0.8
        alpha = 0.2

        for i in range(num_episodes):
            current_game_state = deepcopy(game.initial_game_state)

            episode_done = False
            while not episode_done:
                if i % 50 == 0:
                    print("Iteration number", i)
                action = self.pick_action(current_game_state)
                new_game_state, action_event = get_next_game_state_from_action(current_game_state, action.name)

                if action_event == ActionEvent.WON or action_event == ActionEvent.LOST:
                    episode_done = True
                    if action_event == ActionEvent.WON:
                        print("Won!!")

                reward = calculate_reward_for_move(action_event)

                if current_game_state not in self.q_table:
                    self.q_table[current_game_state] = {key: 0.0 for key in Action.get_all_actions()}

                self.q_table[current_game_state][action] = self.q_table[current_game_state][action] + alpha * (reward + (discount * self.compute_max_q_value(new_game_state)) - self.q_table[current_game_state][action])

                current_game_state = new_game_state

        save_pickle('./q_table', self.q_table, True)
示例#8
0
文件: main.py 项目: Tesloxi/Pac-Man
def menu():
    run = True

    game = Game(WIN)
    game.update()

    while run:
        for e in event.get():
            if e.type == QUIT:
                run = False
            elif e.type == KEYDOWN:
                game.menu_active = False
                main(game)
                run = False

    quit()
示例#9
0
 def newGame(
     self,
     layout,
     pacmanAgent,
     ghostAgents,
     display,
     quiet=False,
     catchExceptions=False,
 ):
     agents = [pacmanAgent] + ghostAgents[: layout.getNumGhosts()]
     initState = GameState()
     initState.initialize(layout, len(ghostAgents))
     game = Game(agents, display, self, catchExceptions=catchExceptions)
     game.state = initState
     self.initialState = initState.deepCopy()
     self.quiet = quiet
     return game
示例#10
0
 def newGame(self, layout, agents, display, length, muteAgents,
             catchExceptions):
     initState = GameState()
     initState.initialize(layout, len(agents))
     starter = random.randint(0, 1)
     print(('%s' % ['Red', 'Blue'][starter]))
     game = Game(agents,
                 display,
                 self,
                 startingIndex=starter,
                 muteAgents=muteAgents,
                 catchExceptions=catchExceptions)
     game.state = initState
     game.length = length
     game.state.data.timeleft = length
     if 'drawCenterLine' in dir(display):
         display.drawCenterLine()
     self._initBlueFood = initState.getBlueFood().count()
     self._initRedFood = initState.getRedFood().count()
     return game
示例#11
0
def test_setup():
    game = Game('level-0', True)
    game.run()
示例#12
0
文件: main.py 项目: mahoyen/ml-pacman
from pacman.game import Game
from deepq.ai_example import get_suggested_move

if __name__ == '__main__':
    game = Game('level-2')
    game.run()
示例#13
0
    def train(self):
        # Init game
        game = Game('level-2')
        current_game_state = deepcopy(game.game_state)

        # Init Memory
        memory = Memory(max_size=10)

        # TODO: Init DeepQNetwork

        model = DeepQ().model
        gamma = 0.95

        done = False

        count = 0

        # TODO: Pre-train to fill up memory
        while not done:
            pygame.event.get()
            # action = pick_action(current_game_state)
            action = Action.RIGHT
            if count > 8:
                action = Action.LEFT
            next_game_state, action_event = get_next_game_state_from_action(
                current_game_state, action.value)

            reward = calculate_reward_for_move(action_event)

            print(count, action.value, action_event)
            game.game_state = next_game_state
            # print(game.game_state)
            game.animate()

            if action_event == ActionEvent.LOST:
                done = True

            experience = Experience(current_state=current_game_state,
                                    action=action,
                                    reward=reward,
                                    next_state=next_game_state,
                                    done=done)
            memory.add(experience)

            # nparray = np.asarray(next_game_state.get_text_representation_of_gamestate())
            # print(nparray.shape)

            current_game_state = deepcopy(next_game_state)

            count += 1
            # if count == 10:
            #     print(count)
            #     break

        y_train = []  # Target Q-value
        batch = memory.get_mini_batch(batch_size=20)

        sample: Experience
        for sample in batch:
            y_target = model.predict(
                sample.current_state)  # TODO: wrap in list?
            # Terminal state: Q-target = reward
            if sample.done:
                y_target[0][sample.action](sample.reward)
            else:
                y_target.append(
                    sample.reward +
                    gamma * np.max(model.predict(sample.next_state)))  # TODO
示例#14
0
from pacman.game import Game


if __name__ == '__main__':
    game = Game('level-2', True)
    game.run()