Пример #1
0
def main():
    # 创建环境
    game = Snake(width=256, height=256, init_length=10)
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    # 根据parl框架构建agent
    p.reset_game()
    print(p.getActionSet())
    act_dim = len(p.getActionSet())
    obs_dim = 256 * 256

    logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim))

    # 根据parl框架构建agent
    model = Model(act_dim=act_dim)
    alg = PolicyGradient(model, lr=LEARNING_RATE)
    agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim)

    # # 加载模型
    # if os.path.exists('model_dir/pg_pong_episode_19.ckpt'):
    #     agent.restore('model_dir/pg_pong_episode_19.ckpt')

    best_total_reward = -float('inf')
    for i in range(50000):
        obs_list, action_list, reward_list = run_episode(p, agent)
        if i % 10 == 0:
            logger.info("Episode {}, Reward Sum {}.".format(
                i, sum(reward_list)))

        batch_obs = np.array(obs_list)
        batch_action = np.array(action_list)
        batch_reward = calc_reward_to_go(reward_list)
        agent.learn(batch_obs, batch_action, batch_reward)
        if (i + 1) % 50 == 0:
            total_reward = evaluate(p, agent, render=True)
            if total_reward > best_total_reward:
                best_total_reward = total_reward
                agent.save(
                    'model_dir/pg_pong_episode_{}_reward_{}.ckpt'.format(
                        i, total_reward))
            logger.info('Test reward: {}'.format(total_reward))
Пример #2
0
def run_game(nb_episodes, agent):
    """ Runs nb_episodes episodes of the game with agent picking the moves.
        An episode of FlappyBird ends with the bird crashing into a pipe or going off screen.
    """

    reward_values = {"positive": 1.0, "negative": 0.0, "tick": 0.0, "loss": 0.0, "win": 0.0}
    # TODO: when training use the following instead:
    # reward_values = agent.reward_values
    
    env = PLE(FlappyBird(), fps=30, display_screen=False, force_fps=True, rng=None,
            reward_values = reward_values)
    # TODO: to speed up training change parameters of PLE as follows:
    # display_screen=False, force_fps=True 
    env.init()
    totalscore = 0
    count = nb_episodes
    score = 0
    
    while nb_episodes > 0:
        # pick an action
        # TODO: for training using agent.training_policy instead
        action = agent.policy(agent.state_binner(env.game.getGameState()))

        # step the environment
        reward = env.act(env.getActionSet()[action])
        #print("reward=%d" % reward)

        # TODO: for training let the agent observe the current state transition

        score += reward
    
        # reset the environment if the game is over
        if env.game_over():
            totalscore += score
            print(count)
            print("score for this episode: %d" % score)
            env.reset_game()
            nb_episodes -= 1
            score = 0
    print("average for this run is :%d" % (totalscore/count))
Пример #3
0
def main():
    # 创建环境
    game = Snake(width=96, height=96, init_length=6)
    p = PLE(game, fps=30, display_screen=False, force_fps=True)
    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())

    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    model = Model(act_dim=act_dim)
    alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(alg, act_dim=act_dim, e_greed_decrement=0, e_greed=0.1)  # e_greed有一定概率随机选取动作,探索

    # 加载模型
    if os.path.exists('./dqn_snake_60.ckpt'):
        agent.restore('./dqn_snake_60.ckpt')

    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(p, agent, rpm)

    max_episode = 20000
    # 开始训练
    episode = 0
    best_reward = -float('inf')
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 20):
            total_reward = run_episode(p, agent, rpm)
            if i%5==0:
                logger.info('episode:{}  train_reward:{}'.format(episode, total_reward))
            episode += 1
        # test part
        eval_reward = evaluate(p, agent, render=True)  # render=True 查看显示效果
        if eval_reward>best_reward:
            best_reward = eval_reward
            agent.save('model_dir/dqn_snake_{}.ckpt'.format(episode))
        logger.info('episode:{}    e_greed:{}   test_reward:{}'.format(
            episode, agent.e_greed, eval_reward))
Пример #4
0
    def __init__(self, config=None):
        EzPickle.__init__(self)

        # Aid options
        self.pre_play = True
        self.force_calm = False
        self.positive_counts = 0

        self.display_screen = False
        if config:
            self.display_screen = config['display_screen']

        self.observation_space = spaces.Box(0,
                                            1,
                                            shape=(8, ),
                                            dtype=np.float32)
        self.action_space = weightedDiscrete(2)  #spaces.Discrete(2)

        self.vel_max = 15
        self.vel_min = -15
        self.dist_max = 500
        self.dist_min = 0
        self.y_max = 500
        self.y_min = 0

        self.game = FlappyBird(graphics="fancy")
        self.p = PLE(self.game,
                     fps=30,
                     frame_skip=1,
                     num_steps=1,
                     force_fps=True,
                     display_screen=self.display_screen,
                     rng=0)
        self.p.rng = self.game.rng
        self.game.player.rng = self.game.rng

        self.p.init()

        self.current_t = 0
        self.max_t = 1000
Пример #5
0
    def play(self):
        print('Playing {} agent after training for {} episodes or {} frames'.
              format(self.name, self.num_of_episodes, self.num_of_frames))
        reward_values = {
            'positive': 1.0,
            'negative': 0.0,
            'tick': 0.0,
            'loss': 0.0,
            'win': 0.0
        }

        env = PLE(FlappyBird(),
                  fps=30,
                  display_screen=True,
                  force_fps=False,
                  rng=None,
                  reward_values=reward_values)
        env.init()

        score = 0
        last_print = 0

        nb_episodes = 50
        while nb_episodes > 0:
            # pick an action
            state = env.game.getGameState()
            action = self.policy(state)

            # step the environment
            reward = env.act(env.getActionSet()[action])

            score += reward

            # reset the environment if the game is over
            if env.game_over():
                print('Score: {}'.format(score))
                env.reset_game()
                nb_episodes -= 1
                score = 0
Пример #6
0
 def __init__(self, game_name, display_screen=True):
     # set headless mode
     os.environ['SDL_VIDEODRIVER'] = 'dummy'
     # open up a game state to communicate with emulator
     import importlib
     game_module_name = ('ple.games.%s' % game_name).lower()
     game_module = importlib.import_module(game_module_name)
     game = getattr(game_module, game_name)()
     self.game_state = PLE(game,
                           fps=30,
                           frame_skip=2,
                           display_screen=display_screen)
     self.game_state.init()
     self._action_set = self.game_state.getActionSet()
     self.action_space = spaces.Discrete(len(self._action_set))
     self.screen_width, self.screen_height = self.game_state.getScreenDims()
     self.observation_space = spaces.Box(low=0,
                                         high=255,
                                         shape=(self.screen_width,
                                                self.screen_height, 3))
     self.viewer = None
     self.count = 0
Пример #7
0
    def __init__(self, game, display_screen=False):
        from ple import PLE
        assert game in [
            'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong',
            'puckworld', 'raycastmaze', 'snake', 'waterworld'
        ]
        if game == 'catcher':
            from ple.games.catcher import Catcher
            env = Catcher()
        elif game == 'monsterkong':
            from ple.games.monsterkong import MonsterKong
            env = MonsterKong()
        elif game == 'flappybird':
            from ple.games.flappybird import FlappyBird
            env = FlappyBird()
        elif game == 'pixelcopter':
            from ple.games.pixelcopter import Pixelcopter
            env = Pixelcopter()
        elif game == 'pong':
            from ple.games.pong import Pong
            env = Pong()
        elif game == 'puckworld':
            from ple.games.puckworld import PuckWorld
            env = PuckWorld()
        elif game == 'raycastmaze':
            from ple.games.raycastmaze import RaycastMaze
            env = RaycastMaze()
        elif game == 'snake':
            from ple.games.snake import Snake
            env = Snake()
        elif game == 'waterworld':
            from ple.games.waterworld import WaterWorld
            env = WaterWorld()

        self.p = PLE(env, fps=30, display_screen=display_screen)
        self.action_set = self.p.getActionSet()
        self.action_size = len(self.action_set)
        self.screen_dims = self.p.getScreenDims()
        self.p.init()
Пример #8
0
    def play(self, fast=True):
        """Use athlete to play.
        Args:
            fast <bool>: set to True if the screen should be hidden and speed
            enhanced
        """
        game = FlappyBird()
        env = PLE(game,
                  fps=30,
                  frame_skip=1,
                  num_steps=1,
                  force_fps=fast,
                  display_screen=not fast)
        env.init()
        pipes = []
        i = 0
        while i < 100:
            env.reset_game()
            pipes.append(0)
            while not env.game_over():
                A = self.act(game.getGameState())
                r = env.act(ACTIONS[A])
                if r == 1.:
                    pipes[-1] += 1
            if not fast:
                print('\n- Score: {} pipes'.format(pipes[-1]))
                print('- Played {} games'.format(len(pipes)))
                print('- Average score: {} pipes'.format(np.round(np.mean(pipes), decimals=1)))
            else:
                i += 1

        print('\n- Max score: {} pipes'.format(np.max(pipes)))
        print('- Games < 15 pipes: {}'.format(
            len(tuple(filter(lambda x: x < 15, pipes)))
        ))
        print('- Played {} games'.format(100))
        print('- Average score: {} pipes'.format(
            np.round(np.mean(pipes), decimals=1))
        )
Пример #9
0
    def __init__(self, game_name='FlappyBird', display_screen=True):
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        #* converts non-visual state representation to numpy array
        def process_state(state):
                return np.array([ state.values() ])

        self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor=process_state) #* added state_preprocessor
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        #self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
        #self.observation_space = spaces.Box(self.low, self.high)
        self.viewer = None
Пример #10
0
def play_with_saved_agent(agent_file_path, agent_file_name, test_rounds=20):
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state)
    my_agent = load_agent(env, agent_file_path, agent_file_name)
    env.init()

    print "Testing model:", agent_file_name

    total_reward = 0.0
    for _ in range(test_rounds):
        my_agent.start_episode()
        episode_reward = 0.0
        while env.game_over() == False:
            state = env.getGameState()
            reward, action = my_agent.act(state, epsilon=0.05)
            episode_reward += reward

        print "Agent score {:0.1f} reward for episode.".format(episode_reward)
        total_reward += episode_reward
        my_agent.end_episode()

    return total_reward/test_rounds
Пример #11
0
def play_flappy_bird(play_game=True, train_agent=True, agent_model_path='model.h5'):
    game = FlappyBird()
    environment = PLE(game, fps=30, display_screen=True)
    action_len = 2
    states = []
    for key, value in game.getGameState().items():
        states.append(value)
    print(states)
    state_len = len(states)

    agent_explored_states = FlappyBirdAgent(state_len, action_len)

    if os.path.exists(agent_model_path):
        agent_explored_states.load_agent_experience(agent_model_path)
    # environment.init()
    if train_agent:
        agent_explored_states.train(environment, game)
        print("Trained")
    if play_game:
        agent_explored_states.play(environment, game)
        print("Played")
    agent_explored_states.save_agent_experience(agent_model_path)
Пример #12
0
    def __init__(self, task={}):
        self._task = task
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        import importlib
        game_module = importlib.import_module('ple.games.customgame')
        game = getattr(game_module, 'customgame')()

        self.game_state = PLE(game, fps=30, display_screen=False)
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3))

        self.num_actions = len(self._action_set)
        self.viewer = None

        # env tracking variables
        self.done_counter = 0
        self.curr_task = None
        self.t = 0
        self.reward_mult = 1.0
Пример #13
0
def main(w, seed=SEED, headless=False):
    """
    Let an agent play flappy bird
    """
    if headless:
        display_screen = False
        force_fps = True
    else:
        display_screen = True
        force_fps = False

    game = PLE(FLAPPYBIRD,
               display_screen=display_screen,
               force_fps=force_fps,
               rng=seed)
    game.init()
    game.reset_game()
    FLAPPYBIRD.rng.seed(seed)

    agent_score = 0
    num_frames = 0

    while True:
        if game.game_over():
            break

        obs = game.getGameState()
        x = normalize(obs)
        action = agent(x, w)

        reward = game.act(ACTION_MAP[action])

        if reward > 0:
            agent_score += 1

        num_frames += 1

    print('Frames  :', num_frames)
    print('Score   :', agent_score)
Пример #14
0
 def __init__(self, screen=False, forcefps=True):
     self.game = FlappyBird(pipe_gap=125)
     self.env = PLE(self.game, fps=30, display_screen=screen, force_fps=forcefps)
     self.env.init()
     self.env.getGameState = self.game.getGameState
     self.X = tf.placeholder(tf.float32, (None, None, self.INPUT_SIZE))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     cell = tf.nn.rnn_cell.LSTMCell(512, state_is_tuple = False)
     self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * 512))
     self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
                                                 dtype=tf.float32,
                                                 initial_state=self.hidden_layer)
     self.tensor_action, self.tensor_validation = tf.split(self.rnn[:, -1,:],2,1)
     self.feed_action = tf.matmul(self.tensor_action, action_layer)
     self.feed_validation = tf.matmul(self.tensor_validation, action_layer)
     self.logits = self.feed_validation + tf.subtract(self.feed_action,tf.reduce_mean(self.feed_action,axis=1,keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
     self.saver = tf.train.Saver(tf.global_variables())
     self.rewards = []
Пример #15
0
def test_model_G(nb_games, model):
    game = FlappyBird(
        graphics="fixed"
    )  # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
    p = PLE(game,
            fps=30,
            frame_skip=1,
            num_steps=1,
            force_fps=True,
            display_screen=False)
    p.init()
    reward = 0.0

    cumulated = np.zeros((nb_games))
    list_actions = [0, 119]

    for i in range(nb_games):
        p.reset_game()

        while (not p.game_over()):
            state = game.getGameState()

            screen_x = process_screen(p.getScreenRGB())
            stacked_x = deque([screen_x, screen_x, screen_x, screen_x],
                              maxlen=4)
            x = np.stack(stacked_x, axis=-1)
            action = list_actions[np.argmax(
                model.predict(np.expand_dims(x, axis=0)))]

            reward = p.act(action)

            cumulated[i] = cumulated[i] + reward

    avg_score = np.mean(cumulated)
    print('Average : ' + str(avg_score))
    mx_score = np.max(cumulated)
    print('Max : ' + str(mx_score))
    return avg_score, mx_score
Пример #16
0
    def __init__(self, map_config):
        self.map_config = map_config
        self.game = MonsterKong(self.map_config)

        self.fps = 30
        self.frame_skip = 1
        self.num_steps = 1
        self.force_fps = True
        self.display_screen = True
        self.nb_frames = 500
        self.reward = 0.0
        self.episode_end_sleep = 0.2

        if 'fps' in map_config:
            self.fps = map_config['fps']
        if 'frame_skip' in map_config:
            self.frame_skip = map_config['frame_skip']
        if 'force_fps' in map_config:
            self.force_fps = map_config['force_fps']
        if 'display_screen' in map_config:
            self.display_screen = map_config['display_screen']
        if 'episode_length' in map_config:
            self.nb_frames = map_config['episode_length']
        if 'episode_end_sleep' in map_config:
            self.episode_end_sleep = map_config['episode_end_sleep']
        self.current_step = 0

        self._seed()

        self.p = PLE(self.game, fps=self.fps, frame_skip=self.frame_skip, num_steps=self.num_steps,
        force_fps=self.force_fps, display_screen=self.display_screen, rng=self.rng)

        self.p.init()

        self._action_set = self.p.getActionSet()[1:]
        self.action_space = spaces.Discrete(len(self._action_set))
        (screen_width, screen_height) = self.p.getScreenDims()
        self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
def run_game(nb_episodes, agent):
    """ Runs nb_episodes episodes of the game with agent picking the moves.
        An episode of FlappyBird ends with the bird crashing into a pipe or going off screen.
    """
    reward_values = {
        "positive": 1.0,
        "negative": 0.0,
        "tick": 0.0,
        "loss": 0.0,
        "win": 0.0
    }

    env = PLE(FlappyBird(),
              fps=30,
              display_screen=True,
              force_fps=False,
              rng=None,
              reward_values=reward_values)
    env.init()

    score = 0
    while nb_episodes > 0:
        # pick an action
        action = agent.policy(env.game.getGameState())

        # step the environment
        reward = env.act(env.getActionSet()[action])

        score += reward
        # reset the environment if the game is over
        if env.game_over():
            print(score, nb_episodes)
            env.reset_game()
            nb_episodes -= 1
            if score > agent.highestScore:
                agent.highestScore = score
            agent.totalScore += score
            score = 0
Пример #18
0
    def __init__(self, rng, game=None, frame_skip=4,
            ple_options={"display_screen": True, "force_fps":True, "fps":30}):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frame_skip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng
        self._hist_size = 1

        if game is None:
            raise ValueError("Game must be provided")


        self._ple = PLE(game, **ple_options)
        self._ple.init()

        self._actions = self._ple.getActionSet()
        self._state_size = self._ple.getGameStateDims()[0]
        self._state_saved = np.zeros((self._state_size), dtype=np.float32)
        self.previous_score = 0.
        self.episode_scores = []
Пример #19
0
    def __init__(self, duration, size=(48, 48)):
        """
        Create a new PuckWorld Environment
        :param size: Game window dimensions
        """
        super(PuckWorld, self).__init__()
        self.width, self.height = size

        self.game = ExtPuckWorld(width=self.width,
                                 height=self.height,
                                 duration=duration,
                                 r_m=self._r_m)
        self.game.screen = pygame.display.set_mode(self.game.getScreenDims(),
                                                   0, 32)
        self.game.clock = pygame.time.Clock()
        self.game.rng = np.random.RandomState(24)

        self.ple = PLE(self.game)
        self.ple.init()
        self.epsilon = 2 * self.game.good_creep.radius  # Size of epsilon-region around goal state

        self.terminal = False
        self.reset()
Пример #20
0
 def __init__(self, screen=False, forcefps=True):
     self.game = FlappyBird(pipe_gap=125)
     self.env = PLE(self.game,
                    fps=30,
                    display_screen=screen,
                    force_fps=forcefps)
     self.env.init()
     self.env.getGameState = self.game.getGameState
     self.X = tf.placeholder(tf.float32, (None, self.INPUT_SIZE))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     input_layer = tf.Variable(
         tf.random_normal([self.INPUT_SIZE, self.LAYER_SIZE]))
     output_layer = tf.Variable(
         tf.random_normal([self.LAYER_SIZE, self.OUTPUT_SIZE]))
     feed_forward = tf.nn.relu(tf.matmul(self.X, input_layer))
     self.logits = tf.matmul(feed_forward, output_layer)
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
     self.saver = tf.train.Saver(tf.global_variables())
     self.rewards = []
Пример #21
0
    def __init__(self, game_name='FlappyBird', display_screen=True):
        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()
        self.game = game
        self.game_state = PLE(game, fps=30, display_screen=display_screen)
        self.game_state.init()

        # increase gap for checking
        #self.game.pipe_gap = 115
        #self.game.player.height = 14

        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        #print(self.screen_width, self.screen_height)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.viewer = None
Пример #22
0
 def __init__(self,
              game_name='FlappyBird',
              display_screen=True,
              observe_state=False):
     # open up a game state to communicate with emulator
     import importlib
     game_module_name = ('ple.games.%s' % game_name).lower()
     game_module = importlib.import_module(game_module_name)
     game = getattr(game_module, game_name)()
     self.game_state = PLE(game,
                           fps=30,
                           display_screen=display_screen,
                           state_preprocessor=state_preprocessor)
     self.game_state.init()
     self._action_set = self.game_state.getActionSet()
     self.action_space = spaces.Discrete(len(self._action_set))
     self.screen_width, self.screen_height = self.game_state.getScreenDims()
     if self.screen_height + self.screen_width > 500:
         img_scale = 0.25
     else:
         img_scale = 1.0
     self.screen_width = int(self.screen_width * img_scale)
     self.screen_height = int(self.screen_height * img_scale)
     self.observe_state = observe_state
     if self.observe_state:
         # the bounds are typically not infinity
         self.observation_space = spaces.Box(
             low=-float('inf'),
             high=float('inf'),
             shape=self.game_state.state_dim)
     else:
         self.observation_space = spaces.Box(low=0,
                                             high=255,
                                             shape=(self.screen_height,
                                                    self.screen_width, 3))
     self.viewer = None
Пример #23
0
def test():
    game2 = FlappyBird()
    p2 = PLE(game2,
             fps=30,
             frame_skip=1,
             num_steps=1,
             force_fps=True,
             display_screen=False)
    p2.init()
    reward = 0.0

    nb_games = 10
    cumulated = np.zeros((nb_games))
    for i in range(nb_games):
        p2.reset_game()

        while (not p2.game_over()):
            state = game2.getGameState()
            screen = p2.getScreenRGB()
            action = FlappyPolicy(state, screen)

            reward = p2.act(action)
            cumulated[i] = cumulated[i] + reward
    return np.mean(cumulated)
Пример #24
0
    def __init__(self,
                 game_name='FlappyBird',
                 display_screen=True,
                 ple_game=True):
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        ##################################################################
        # old one
        #self.game_state = PLE(game, fps=30, display_screen=display_screen)

        self.game_state = PLE(game,
                              fps=30,
                              display_screen=display_screen,
                              state_preprocessor=self.process_state)
        ##################################################################

        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.viewer = None
Пример #25
0
    def __init__(self,
                 game_name='FlappyBird',
                 display_screen=True,
                 ple_game=True,
                 root_game_name=None,
                 reward_type='sparse',
                 obs_type=None,
                 **kwargs):
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
        os.environ['SDL_AUDIODRIVER'] = 'dummy'
        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = F"{root_game_name.lower()}.envs"
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)(**kwargs)
        self.ple_wrapper = PLE(game, fps=30, display_screen=display_screen)
        self.ple_wrapper.init()
        game.reward_type = reward_type
        self._action_set = self.ple_wrapper.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.ple_wrapper.getScreenDims(
        )
        # Assume observation space to be (64, 64, 3) due to procgen
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(64, 64, 3),
                                            dtype=np.uint8)
        self.viewer = None
        assert obs_type is not None, obs_type
        self.obs_type = obs_type

        self.reward_range = game.rewards['win']
Пример #26
0
def watch():

    import torch

    game = FlappyBird()
    env = PLE(game,
              fps=30,
              frame_skip=4,
              display_screen=True,
              force_fps=False,
              reward_values={"tick": 0.00},
              state_preprocessor=None)
    env.init()

    model = Model(obs_dim=OBS_DIM, act_dim=ACT_DIM)
    if torch.cuda.is_available():
        model = model.cuda()

    model.load_state_dict(torch.load('checkpoint.pt'))

    from parl.algorithms.torch import PolicyGradient
    alg = PolicyGradient(model, LEARNING_RATE)
    agent = Agent(alg)

    for i in range(10000):  # 1000 episodes
        obs_list, action_list, reward_list = run_episode(env, agent)

        batch_obs = np.array(obs_list)
        batch_action = np.array(action_list)
        batch_reward = calc_reward_to_go(reward_list)

        # agent.learn(batch_obs, batch_action, batch_reward)

        _, _, reward_list = run_episode(env, agent, train_or_test='test')
        total_reward = np.sum(reward_list)
        logger.info('Test reward: {}'.format(total_reward))
Пример #27
0
def random_play(episodes = 100):
    # Initialize game and agent
    game = FlappyBird()
    p = PLE(game, display_screen=True, state_preprocessor=process_state)
    p.init()
    agent = Agent(p)
    total_reward = []

    # Run given number of episodes
    for _ in range(episodes):
        # Initialize episode
        p.reset_game()
        total_episode_reward = 0

        # Episode loop
        while not p.game_over():
            action = agent.choose_action()
            reward = p.act(action)
            total_episode_reward += reward

        # Save episode reward and return
        total_reward.append(total_episode_reward)

    return total_reward
Пример #28
0
def main():
    # 创建环境
    game = Pong(width=200, height=200, MAX_SCORE=11)
    p = PLE(game, fps=30, display_screen=True, force_fps=False)
    p.reset_game()
    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())
    print("act_dim:", act_dim)

    obs_dim = 200 * 200
    # 使用parl框架搭建Agent:QuadrotorModel, DDPG, QuadrotorAgent三者嵌套
    model = PongModel(act_dim)
    algorithm = DDPG(model,
                     gamma=GAMMA,
                     tau=TAU,
                     actor_lr=ACTOR_LR,
                     critic_lr=CRITIC_LR)
    agent = PongAgent(algorithm, obs_dim, act_dim)
    rpm = ReplayMemory(int(MEMORY_SIZE), obs_dim, act_dim)

    max_episode = 20000
    # 开始训练
    episode = 0
    best_reward = -float('inf')
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 50):
            total_reward = run_episode(p, agent, rpm)
            episode += 1
        # test part
        eval_reward = evaluate(p, agent, render=True)  # render=True 查看显示效果
        if eval_reward > best_reward:
            best_reward = eval_reward
            agent.save('model_dir/ddpg_pong_{}.ckpt'.format(episode))
        logger.info('episode:{}   test_reward:{}'.format(episode, eval_reward))
Пример #29
0
def init_flappy_bird(mode, graphics="fixed"):

    # use "Fancy" for full background, random bird color and random pipe color,
    # use "Fixed" (default) for black background and constant bird and pipe colors.
    game = FlappyBird(graphics=graphics)

    # Set parameters, depending on the mode specified
    force_fps = (mode == Mode.TRAIN)
    display_screen = (mode == Mode.PLAY)

    # Note: if you want to see you agent act in real time, set force_fps to False.
    # But don't use this setting for learning, just for display purposes.
    env = PLE(game,
              fps=30,
              frame_skip=1,
              num_steps=1,
              force_fps=force_fps,
              display_screen=display_screen)

    # Init the environment (settings, display...) and reset the game
    env.init()
    env.reset_game()

    return game, env
Пример #30
0
        states = np.array(states)
        targets = np.array(targets)
        self.model.fit(states, targets, nb_epoch=1, verbose=0)  # 학습하기
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):  # 학습된 네트워크 로드
        self.model.load_weights(name)

    def save(self, name):  # 네트워크 저장
        self.model.save_weights(name)


if __name__ == "__main__":
    game = Catcher(width=320, height=320)
    env = PLE(game, display_screen=True, state_preprocessor=process_state)
    agent = DQNAgent(env)
    agent.load("./save/catcher.h5")

    #초기화
    #pylab.title("reward")
    #pylab.xlabel("episodes")
    #pylab.ylabel("rewards")
    env.init()
    scores, time = [], []
    for e in range(EPISODES):

        env.reset_game()
        state = env.getGameState()
        state = np.array([list(state[0])])
        score = 0