Пример #1
0
    def __init__(self, env_config):
        game = Catcher(width=screen_wh, height=screen_wh)

        fps = 30  # fps we want to run at
        frame_skip = 2
        num_steps = 2
        force_fps = False  # False for slower speed
        display_screen = True
        # make a PLE instance.
        self.env = PLE(game,
                       fps=fps,
                       frame_skip=frame_skip,
                       num_steps=num_steps,
                       force_fps=force_fps,
                       display_screen=display_screen)
        self.env.init()
        self.action_dict = {0: None, 1: 97, 2: 100}
        #PLE env starts with black screen
        self.env.act(self.env.NOOP)

        self.action_space = Discrete(3)
        self.k = 4
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(screen_wh, screen_wh,
                                                   1 * self.k))
        self.frames = deque([], maxlen=self.k)
Пример #2
0
    def __init__(self,
                 random_seed=0,
                 init_lives=3,
                 normalise=True,
                 display=False):

        self._random_seed = random_seed
        self._game = Catcher(init_lives=init_lives)
        self._normalise = normalise
        self._display = display

        if self._display == False:
            os.putenv('SDL_VIDEODRIVER', 'fbcon')
            os.environ["SDL_VIDEODRIVER"] = "dummy"

        if self._normalise:
            self._env = PLE(self._game,
                            fps=30,
                            state_preprocessor=self._normalise_ob,
                            display_screen=display)
        else:
            self._env = PLE(self._game,
                            fps=30,
                            state_preprocessor=self._ob,
                            display_screen=display)

        self._env.init()
        self._actions = self._env.getActionSet()
        self._env.rng.seed(random_seed)

        # Tracker
        self._cum_reward = 0
Пример #3
0
    def __init__(self, width, lives=1):
        '''
            @width : width of game window
            @lives : number of deaths before the episode terminates (death = pallet does not catch ball)
        '''
        self.width = width
        self.game = None
        self.actions = None
        self.max_game_len = 150
        self.visitation_map = {}
        self.timer = 0
        self.coordinates = (0, 0)

        # Create game env
        catcher = Catcher(width=width, height=width, init_lives=lives)
        self.game = self.set_catcher_game_setup(catcher)
Пример #4
0
def get_envs():
    envs = [
        EnvWrapper('cartpole', gym.make('CartPole-v1'), 500, 550, 4000),
        EnvWrapper('catcher', PLE(Catcher(init_lives=1), display_screen=False, reward_values={
            "positive": 1,
            "negative": -1,
            "loss": -1,
        }), 100, 110, 3000),
        EnvWrapper('snake', PLE(Snake(height=256, width=256), display_screen=False, reward_values={
            "tick": -0.01,
            "positive": 5,
            "loss": -1,
        }), 100, 110, 3000),
        EnvWrapper('flappybird', PLE(FlappyBird(), display_screen=False, reward_values={
            "positive": 1,
            "tick": 0.1,
            "loss": -1,
        }), 100, 110, 3000),
    ]
    return envs
Пример #5
0
    def __init__(self, game, display_screen=False):
        from ple import PLE
        assert game in [
            'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong',
            'puckworld', 'raycastmaze', 'snake', 'waterworld'
        ]
        if game == 'catcher':
            from ple.games.catcher import Catcher
            env = Catcher()
        elif game == 'monsterkong':
            from ple.games.monsterkong import MonsterKong
            env = MonsterKong()
        elif game == 'flappybird':
            from ple.games.flappybird import FlappyBird
            env = FlappyBird()
        elif game == 'pixelcopter':
            from ple.games.pixelcopter import Pixelcopter
            env = Pixelcopter()
        elif game == 'pong':
            from ple.games.pong import Pong
            env = Pong()
        elif game == 'puckworld':
            from ple.games.puckworld import PuckWorld
            env = PuckWorld()
        elif game == 'raycastmaze':
            from ple.games.raycastmaze import RaycastMaze
            env = RaycastMaze()
        elif game == 'snake':
            from ple.games.snake import Snake
            env = Snake()
        elif game == 'waterworld':
            from ple.games.waterworld import WaterWorld
            env = WaterWorld()

        self.p = PLE(env, fps=30, display_screen=display_screen)
        self.action_set = self.p.getActionSet()
        self.action_size = len(self.action_set)
        self.screen_dims = self.p.getScreenDims()
        self.p.init()
Пример #6
0
def a3c_main(save_path, shared_model,\
            model,\
            select_action,\
            perform_action,\
            save_model,\
            optimizer=None,\
            train=True,\
            display=False,\
            gamma =.99,\
            tau=1.):

    fps = 30  # fps we want to run at
    frame_skip = 2
    num_steps = 1
    force_fps = False  # slower speed

    game = Catcher(width=256, height=256)

    p = PLE(game,
            fps=fps,
            frame_skip=frame_skip,
            num_steps=num_steps,
            force_fps=force_fps,
            display_screen=display)

    p.init()

    def p_action(action):
        # reward, action
        return p.act(action)

    def main(lstm_shape, steps):

        values = []
        log_probs = []
        rewards = []
        entropies = []

        x_t = extract_image(p.getScreenRGB(), (80, 80))

        stack_x = np.stack((x_t, x_t, x_t, x_t), axis=0)
        model.load_state_dict(shared_model.state_dict())

        cx = Variable(torch.zeros(1, lstm_shape[-1]))
        hx = Variable(torch.zeros(1, lstm_shape[-1]))

        try:
            while p.game_over() == False and steps > 0:
                steps -= 1

                x_t = extract_image(p.getScreenRGB(), (80, 80))

                x_t = np.reshape(x_t, (1, 80, 80))

                st = np.append(stack_x[1:4, :, :], x_t, axis=0)

                if train:
                    # print()
                    reward, action, hx, cx, info_dict = train_and_play(p_action, st,\
                                                        select_action, perform_action,\
                                                        possible_actions, opt_nothing, \
                                                        model, {"isTrain":True, "hx":hx,"cx":cx})
                    rewards.append(reward)
                    # reward += r

                    entropies.append(info_dict["entropies"])
                    values.append(info_dict["values"])
                    log_probs.append(info_dict["log_probs"])

                else:
                    _, _, hx, cx, _ = play(p_action, st, select_action,\
                        perform_action, possible_actions, model, {"hx":hx,"cx":cx, "isTrain":False})

                stack_x = st

            if train:
                state = torch.from_numpy(stack_x)
                R = torch.zeros(1, 1)
                if steps > 0:
                    value, _, _ = model(
                        (Variable(state.unsqueeze(0).float()), (hx, cx)))

                values.append(Variable(R))
                policy_loss = 0
                value_loss = 0
                R = Variable(R)
                gae = torch.zeros(1, 1)

                for i in reversed(range(len(rewards))):
                    R = gamma * R + rewards[i]
                    advantage = R - values[i]
                    value_loss = value_loss + 0.5 * advantage.pow(2)

                    # Generalized Advantage Estimataion
                    delta_t = rewards[i] + gamma * \
                        values[i + 1].data - values[i].data
                    gae = gae * gamma * tau + delta_t

                    policy_loss = policy_loss - \
                        log_probs[i] * Variable(gae) - 0.01 * entropies[i]

                optimizer.zero_grad()

                (policy_loss + 0.5 * value_loss).backward()
                torch.nn.utils.clip_grad_norm(model.parameters(), 40)

                ensure_shared_grads(model, shared_model)
                optimizer.step()

        except Exception as e:
            print("Exception >>", e)
            print("Saving model")
            if train: save_model(shared_model, save_path)

        score = p.score()
        p.reset_game()
        if train: save_model(shared_model, save_path)
        return score

    return main
Пример #7
0
        episode_reward = 0
        while True:
            action = agent.predict(obs)  # 预测动作,只选最优动作
            action = env.getActionSet()[action]
            reward = env.act(action)
            obs = list(env.getGameState().values())
            episode_reward += reward
            if render:
                env.getScreenRGB()
            if env.game_over():
                break
        eval_reward.append(episode_reward)
    return np.mean(eval_reward)


env = Catcher(500, 500)
env = PLE(env, fps=10, display_screen=True, force_fps=False)
act_dim = len(env.getActionSet())
obs_dim = len(env.getGameState())

rpm = ReplayMemory(MEMORY_SIZE)
model = Model(act_dim=act_dim)
alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)

agent = Agent(alg,
              obs_dim=obs_dim,
              act_dim=act_dim,
              e_greed_decrement=0.1,
              e_greed=1e-6)
"""
#添加经验池
Пример #8
0
        obs = obs / norm_coeff

        arr = np.hstack((np.identity(3), np.tile(obs, (3, 1))))
        inputs = torch.FloatTensor(arr)
        outputs = self.model(inputs)
        _, action_index = outputs.max(0)
        action_index = int(action_index)
        action = self.actions[action_index]
        return action


# load trained neural network
model = torch.load('../model/neural_network.pt')

# initialize game
game = Catcher(width=100, height=100, init_lives=1)
p = PLE(game,
        fps=30,
        frame_skip=3,
        num_steps=1,
        force_fps=False,
        display_screen=True)
p.init()

# initialize agent
agent = PlayingAgent(p.getActionSet(), model)

# run training
episodes = 10
max_timestamps = 300
Пример #9
0
catcher_dict['pygame']=True
catcher_dict['state_means'] = [29.88745927,0.15930137,22.5392288,24.73781436] 
catcher_dict['state_stds'] = [13.89457683,2.04087944,17.41686248,23.38546788]

game_params = {'cartpole': cartpole_dict, 'catcher': catcher_dict}

# save all params
if args.save:
    all_params = {'sim': sim_params, 'train':train_params,'arch':arch_params,'bf':bf_params,'game':game_params}
    pickle.dump(all_params, open(save_file+"_params.pkl", "wb"))

if __name__=="__main__":
    # Initiate cartpole envs
    cartpole_env = gym.make('CartPole-v1')
    # Initiate catcher envs
    catcher_env = PLE(Catcher(init_lives=1), state_preprocessor=process_state, display_screen=False)
    catcher_env.init()

    game_params['catcher']['actions'] = catcher_env.getActionSet()

    envs = {'cartpole': cartpole_env, 'catcher': catcher_env}

    # Initialise the first task: cartpole
    curr_task = sim_params['first_task']

    env = envs[curr_task]

    # Multiple replay databases maintained if multitasking
    if train_params['multitask']:
        mem_length = train_params['replay_sizes']
    else:
 def test_catcher(self):
     from ple.games.catcher import Catcher
     game = Catcher()
     self.run_a_game(game)
Пример #11
0
from ple.games.catcher import Catcher
from ple import PLE

import pygame
import numpy as np
import NaiveAgent

if __name__ == '__main__':
    pygame.init()
    game = Catcher(width=256, height=256)
    game.rng = np.random.RandomState(24)
    game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32)
    game.clock = pygame.time.Clock()
    game.init()
    
    ''' create learning environment '''
    p = PLE(game, fps=30, display_screen=True, force_fps=False)
    p.init()


    ''' set my agent actions and rewards '''
    myAgent = NaiveAgent(p.getActionSet())
    reward = 0.0

    while True:
        dt = game.clock.tick_busy_loop(30)
        if game.game_over():
            game.reset()

        game.step(dt)
        pygame.display.update()
# Catcher

from ple import PLE
from ple.games.catcher import Catcher
import pygame
import time, sys
from pygame.locals import *
import random

game = Catcher(256, 256, 1)

p = PLE(game, display_screen=True)
p.init()

print(p.getActionSet())
action_set = p.getActionSet()

nb_frames = 1000

for f in range(nb_frames):
    p.act(random.choice(action_set))
    time.sleep(.01)
    if p.game_over():
        sys.exit()
Пример #13
0
    MOMENTUM = 0
    CLIP_DELTA = 1.0
    EPSILON_START = 1.0
    EPSILON_MIN = .1
    EPSILON_DECAY = 10000
    UPDATE_FREQUENCY = 1
    REPLAY_MEMORY_SIZE = 1000000
    BATCH_SIZE = 32
    FREEZE_INTERVAL = 1000
    DETERMINISTIC = True




if __name__ == "__main__":
    game = Catcher(width=64, height=64) 
    logging.basicConfig(level=logging.INFO)
    
    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    env = PLE_env(rng, game=game, frame_skip=parameters.frame_skip,
            ple_options={"display_screen": True, "force_fps":True, "fps":30})
    
    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
Пример #14
0
def init_main(save_path, model, train=True, display=False):
    push_to_memory, select_action, perform_action, optimize, save_model = model

    fps = 30  # fps we want to run at
    frame_skip = 2
    num_steps = 1
    force_fps = False  # slower speed

    game = Catcher(width=256, height=256)

    p = PLE(game,
            fps=fps,
            frame_skip=frame_skip,
            num_steps=num_steps,
            force_fps=force_fps,
            display_screen=display)

    p.init()

    def p_action(action):
        # reward, action
        return p.act(action)

    def main(steps):
        x_t = extract_image(p.getScreenRGB(), (80, 80))

        stack_x = np.stack((x_t, x_t, x_t, x_t), axis=0)

        while p.game_over() == False and steps > 0:
            try:
                steps -= 1

                x_t = extract_image(p.getScreenRGB(), (80, 80))

                x_t = np.reshape(x_t, (1, 80, 80))

                st = np.append(stack_x[1:4, :, :], x_t, axis=0)

                if train:
                    reward, action, _, _, _ = train_and_play(
                        p_action, st, select_action, perform_action,
                        possible_actions, optimize, None, {})
                    push_to_memory(stack_x, action, st, reward)

                else:
                    play(p_action, st, select_action, perform_action,
                         possible_actions, None, {})

                stack_x = st

            except Exception as e:
                print("Exception >>", e)
                print("Saving model")
                if train: save_model(save_path)
                break

        score = p.score()
        p.reset_game()
        if train: save_model(save_path)
        return score

    return main
# save all params
if args.save:
    all_params = {
        'sim': sim_params,
        'train': train_params,
        'arch': arch_params,
        'bf': bf_params,
        'game': game_params
    }
    pickle.dump(all_params, open(save_file + "_params.pkl", "wb"))

if __name__ == "__main__":
    # Initiate cartpole envs
    cartpole_env = gym.make('CartPole-v1')
    # Initiate catcher envs
    catcher_env = PLE(Catcher(init_lives=1),
                      state_preprocessor=process_state,
                      display_screen=False)
    catcher_env.init()

    game_params['catcher']['actions'] = catcher_env.getActionSet()

    envs = {'cartpole': cartpole_env, 'catcher': catcher_env}

    # Initialise the first task: cartpole
    curr_task = sim_params['first_task']

    env = envs[curr_task]

    # Multiple replay databases maintained if multitasking
    if train_params['multitask']:
Пример #16
0
        return random.choice(self.actions)


'''
State Formate:
{
    'player_x': int,
    'player_vel': float,
    'fruit_x': int,
    'fruit_y': int
}
Actions:
[97, 100, None]
'''

game = Catcher(width=256, height=256, init_lives=3)

p = PLE(game, fps=30, display_screen=True, force_fps=False)
p.init()

agent = RandomAgent(p.getActionSet())
nb_frames = 1000
reward = 0.0

print(game.getGameState())
print(p.getActionSet())

for f in range(nb_frames):
    if p.game_over():  #check if the game is over
        p.reset_game()
Пример #17
0
    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))
Пример #18
0
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_episode_length          int             500000          The max number of steps executed in an episoe
                                                                    before forcing a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        display                     bool            False           Whether to display the screen of the game
        init_lives                  int             3               Number of lives at the start of the game
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment parameters
        self.max_episode_length = check_attribute(config,
                                                  'max_episode_length',
                                                  default_value=500000)
        self.norm_state = check_attribute(config,
                                          'norm_state',
                                          default_value=True)
        self.display = False
        self.init_lives = 3
        # self.display = check_attribute(config, 'display', default_value=False)
        # self.init_lives = check_attribute(config, 'init_lives', default_value=3)

        # summary parameters
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.summary = summary
        self.number_of_steps = check_attribute(config, 'number_of_steps',
                                               500000)

        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        # setting up original catcher environment with the specified parameters
        self.catcherOb = Catcher(init_lives=self.init_lives)
        if not self.display:
            # do not open a pygame window
            os.putenv('SDL_VIDEODRIVER', 'fbcon')
            os.environ["SDL_VIDEODRIVER"] = "dummy"
        if self.norm_state:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob_normalize,
                           display_screen=self.display)
        else:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob,
                           display_screen=self.display)
        self.pOb.init()

        # environment internal state
        self.actions = [
            97, None, 100
        ]  # self.pOb.getActionSet() (left = 97, do nothing = None, right = 100)
        self.num_action = 3
        self.num_state = 4
        self.episode_step_count = 0
        self.pOb.reset_game()
        self.current_state = self.pOb.getGameState()
Пример #19
0
    epsilon = 0.15
    epsilon_steps = 30000  # decay steps
    epsilon_min = 0.1
    lr = 0.01
    discount = 0.95  # discount factor
    rng = np.random.RandomState(24)

    # memory settings
    max_memory_size = 100000
    min_memory_size = 1000  # number needed before model training starts

    epsilon_rate = (epsilon - epsilon_min) / epsilon_steps

    # PLE takes our game and the state_preprocessor. It will process the state
    # for our agent.
    game = Catcher(width=128, height=128)
    env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor)

    agent = Agent(env,
                  batch_size,
                  num_frames,
                  frame_skip,
                  lr,
                  discount,
                  rng,
                  optimizer="sgd_nesterov")
    agent.build_model()

    memory = ReplayMemory(max_memory_size, min_memory_size)

    env.init()
Пример #20
0
            targets.append(target_f[0])
        states = np.array(states)
        targets = np.array(targets)
        self.model.fit(states, targets, nb_epoch=1, verbose=0)  # 학습하기
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):  # 학습된 네트워크 로드
        self.model.load_weights(name)

    def save(self, name):  # 네트워크 저장
        self.model.save_weights(name)


if __name__ == "__main__":
    game = Catcher(width=320, height=320)
    env = PLE(game, display_screen=True, state_preprocessor=process_state)
    agent = DQNAgent(env)
    agent.load("./save/catcher.h5")

    #초기화
    #pylab.title("reward")
    #pylab.xlabel("episodes")
    #pylab.ylabel("rewards")
    env.init()
    scores, time = [], []
    for e in range(EPISODES):

        env.reset_game()
        state = env.getGameState()
        state = np.array([list(state[0])])
Пример #21
0
    max_memory_size = 100000
    min_memory_size = 1000  # number needed before model training starts

    epsilon_rate = (epsilon - epsilon_min) / epsilon_steps

    rewardsVals = {
        "positive": 1.0,
        "negative": -0.01,
        "tick": -0.0,
        "loss": -5.0,
        "win": 5.0
    }

    # PLE takes our game and the state_preprocessor. It will process the state
    # for our agent.
    game = Catcher(128, 128)

    #game = FlappyBird()
    #game = RaycastMaze()

    env = PLE(game,
              fps=60,
              state_preprocessor=nv_state_preprocessor,
              reward_values=rewardsVals)

    agent = Agent(env,
                  batch_size,
                  num_frames,
                  frame_skip,
                  lr,
                  discount,