Exemplo n.º 1
0
    def __init__(self, **kwargs):
        self.game = FlappyBird()
        self.p = PLE(self.game, **kwargs)
        self.action_set = self.p.getActionSet()

        # 3个输入状态:见函数self._get_obs
        self.observation_space = spaces.Discrete(3)
        # 两个输出状态:跳或者不跳
        self.action_space = spaces.Discrete(2)
Exemplo n.º 2
0
 def __init__(self, reward_values=None, reward_discount=0.99, pip_gap=100,
              display_screen=True, fps=30, force_fps=True):
     if reward_values is None:
         reward_values = {}
     self.game = PLE(FlappyBird(pipe_gap=pip_gap), reward_values=reward_values,
                     fps=fps, force_fps=force_fps, display_screen=display_screen)
     self.game.init()
     self.actions = self.game.getActionSet()
     self.reward_discount = reward_discount
Exemplo n.º 3
0
class FlappyBirdWrapper(Env):
    # 如果想把画面渲染出来,就传参display_screen=True
    def __init__(self, **kwargs):
        self.game = FlappyBird()
        self.p = PLE(self.game, **kwargs)
        self.action_set = self.p.getActionSet()

        # 3个输入状态:见函数self._get_obs
        self.observation_space = spaces.Discrete(3)
        # 两个输出状态:跳或者不跳
        self.action_space = spaces.Discrete(2)

    def _get_obs(self):
        # 获取游戏的状态
        state = self.game.getGameState()
        # 小鸟与它前面一对水管中下面那根水管的水平距离
        dist_to_pipe_horz = state["next_pipe_dist_to_player"]
        # 小鸟与它前面一对水管中下面那根水管的顶端的垂直距离
        dist_to_pipe_bottom = state["player_y"] - state["next_pipe_top_y"]
        # 获取小鸟的水平速度
        velocity = state['player_vel']
        # 将这些信息封装成一个数据返回
        return np.array([dist_to_pipe_horz, dist_to_pipe_bottom, velocity])

    def reset(self):
        self.p.reset_game()
        return self._get_obs()

    def step(self, action):
        reward = self.p.act(self.action_set[action])
        obs = self._get_obs()
        done = self.p.game_over()
        return obs, reward, done, dict()

    def seed(self, *args, **kwargs):
        pass

    def render(self, *args, **kwargs):
        pass
Exemplo n.º 4
0
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":
    game = FlappyBird()
    game.allowed_fps = 30
    p = PLE(game, fps=30, display_screen=True)
    agent = DQNAgent(8, 2)
    if os.path.exists('memory.h5py'):
        agent.load('memory.h5py')
    atexit.register(agent.save, 'memory.h5py')

    p.init()
    reward = 0.0
    max_score = 0
    current_score = 0
    while True:
        state = parse_game_state(game.getGameState())
        for frame in range(1000):
            action = np.argmax(agent.model.predict(state)[0])
Exemplo n.º 5
0
    pipe_1_top_y = list[3]
    pipe_1_bottom_y = list[4]
    list[3] = pipe_1_top_y - y_position
    list[4] = pipe_1_bottom_y - y_position

    pipe_2_top_y = list[6]
    pipe_2_bottom_y = list[7]
    list[6] = pipe_2_top_y - y_position
    list[7] = pipe_2_bottom_y - y_position

    if (len(list) == 8):
        out = np.array([list])
        return out


game = FlappyBird()
p = PLE(game, fps=30, display_screen=False, force_fps=False)
p.init()
scores = []
length = []

agent = DQL.deep_learner(8, 2, p.getActionSet())

nb_games = 0
nb_max = 1000

for layer in agent.model.layers:
    print(layer.get_config())

step = 0
reward = 0
Exemplo n.º 6
0
        self.is_test = True

        self.p.reset_game()
        total_reward = 0

        while not self.p.game_over():
            action = self.select_action(self.state())
            total_reward += self.step(action)
        logger.info("Total-Reward: %s" % total_reward)


def seed_torch(seed_):
    torch.manual_seed(seed_)
    if torch.backends.cudnn.enabled:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True


if __name__ == '__main__':
    hyper = HYPER_PARAMS.copy()
    hyper['obs_dim'] = 8
    hyper['action_dim'] = 2
    hyper['epoch_log'] = 1000
    hyper['epochs'] = 1000000

    game = FlappyBird()

    agent = Agent(hyper, game)
    agent.train()
    agent.test()
Exemplo n.º 7
0
    def act(self, p, action):
        """
        执行动作
        :param p: 通过p来向游戏发出动作命令
        :param action: 动作
        :return: 奖励
        """
        r = p.act(self.action_set[action])
        return r


if __name__ == "__main__":
    # 训练次数
    episodes = 20000
    # 实例化游戏对象
    game = FlappyBird()
    # 类似游戏的一个接口,可以为我们提供一些功能
    p = PLE(game, fps=30, display_screen=True)
    # 初始化
    p.init()
    # 实例化Agent,将动作集传进去
    agent = Agent(p.getActionSet())

    for episode in range(episodes):
        # 重置游戏
        p.reset_game()
        # 获得状态
        state = agent.get_state(game.getGameState())

        while True:
            # 获得最佳动作
Exemplo n.º 8
0
import matplotlib.pyplot as plt
import numpy as np
import rl.deep_Q_learning as DQL
import h5py


def process_obs(obs):
    list = []
    for item in obs:
        list.append(obs[item])
    if (len(list) == 8):
        out = np.array(list)
        return out


game = FlappyBird()
p = PLE(game, fps=30, display_screen=True, force_fps=False)
p.init()

scores = []

agent = DQL.deep_learner(1, 2, p.getActionSet())
agent.learning_rate = 0.001
agent.epsilon = 0.001
agent.epsilon_decay = 1.0
agent.epsilon_min = 0.00001
#agent.load_model_json()
agent.model.load_weights('my_model.h5')

nb_games = 0