Exemplo n.º 1
0
def main():
    force_fps = True  # slower speed
    reward = 0.0
    game = WaterWorld()
    p = PLE(game,
            force_fps=force_fps,
            display_screen=False,
            state_preprocessor=nv_state_preprocessor)
    agent = Agent_DQN(p)
    agent.train()
Exemplo n.º 2
0
def main():
    # 创建环境
    game = WaterWorld(width=200, height=200,num_creeps=5)
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    p.reset_game()
    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())

    obs = get_obs(p)
    obs_dim = 200*200

    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    model = Model(act_dim=act_dim)
    alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim, e_greed_decrement=1e-6, e_greed=0.1)  # e_greed有一定概率随机选取动作,探索

    # # 加载模型
    # if os.path.exists('./water_world_dqn.ckpt'):
    #     agent.restore('./water_world_dqn.ckpt')

    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(p, agent, rpm)

    max_episode = 200000
    # 开始训练
    episode = 0
    best_reward = -float('inf')
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 10):
            total_reward = run_episode(p, agent, rpm)
            episode += 1
        # test part
        eval_reward = evaluate(p, agent, render=True)  # render=True 查看显示效果
        if eval_reward>best_reward:
            best_reward = eval_reward
            agent.save('model_dir/dqn_pong_{}_reward_{}.ckpt'.format(episode,best_reward))
        logger.info('episode:{}    e_greed:{}   test_reward:{}'.format(
            episode, agent.e_greed, eval_reward))
Exemplo n.º 3
0
    def __init__(self, game, display_screen=False):
        from ple import PLE
        assert game in [
            'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong',
            'puckworld', 'raycastmaze', 'snake', 'waterworld'
        ]
        if game == 'catcher':
            from ple.games.catcher import Catcher
            env = Catcher()
        elif game == 'monsterkong':
            from ple.games.monsterkong import MonsterKong
            env = MonsterKong()
        elif game == 'flappybird':
            from ple.games.flappybird import FlappyBird
            env = FlappyBird()
        elif game == 'pixelcopter':
            from ple.games.pixelcopter import Pixelcopter
            env = Pixelcopter()
        elif game == 'pong':
            from ple.games.pong import Pong
            env = Pong()
        elif game == 'puckworld':
            from ple.games.puckworld import PuckWorld
            env = PuckWorld()
        elif game == 'raycastmaze':
            from ple.games.raycastmaze import RaycastMaze
            env = RaycastMaze()
        elif game == 'snake':
            from ple.games.snake import Snake
            env = Snake()
        elif game == 'waterworld':
            from ple.games.waterworld import WaterWorld
            env = WaterWorld()

        self.p = PLE(env, fps=30, display_screen=display_screen)
        self.action_set = self.p.getActionSet()
        self.action_size = len(self.action_set)
        self.screen_dims = self.p.getScreenDims()
        self.p.init()
Exemplo n.º 4
0
        act_values = self.model.predict(state)

        return np.argmax(act_values[0])


rewards = {
    "tick":
    -0.1,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=256, height=256, num_creeps=15)

# p = PLE(game, reward_values=rewards)
p = PLE(game,
        fps=30,
        force_fps=False,
        display_screen=True,
        reward_values=rewards)


def process_state(current_state):
    processed_state = list()
    processed_state.append(current_state['player_x'])
    processed_state.append(current_state['player_y'])

    for creep in current_state['creep_pos']['GOOD']:
 def test_waterworld(self):
     from ple.games.waterworld import WaterWorld
     game = WaterWorld()
     self.run_a_game(game)
Exemplo n.º 6
0
from ple import PLE
from ple.games.waterworld import WaterWorld

game_rewards = {
    "tick":
    -0.1,  # each time the game steps forward in time the agent gets -0.01
    "positive": 100000.0,  # each time the agent collects a green circle
    "negative": -100000.0,  # each time the agent bumps into a red circle
}  # THIS IS NOT EVEN USED ???

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
max_creeps = 10
game_global_size = 356
game = WaterWorld(width=game_global_size,
                  height=game_global_size,
                  num_creeps=max_creeps)
p = PLE(game,
        fps=30,
        force_fps=True,
        display_screen=False,
        reward_values=game_rewards)

nr_episodes = 20
nr_steps_per_episode = 1000
min_nr_training_states = 2000
nr_states_for_training = 200
current_NN_states = list()  # touple of elements :
# state, action(0->4), next_state, reward_of_next_state
max_speed = game_global_size * 8.3  # parameters to normalize Imput in NN
max_distance = (game_global_size * game_global_size * 2)**0.5
Exemplo n.º 7
0
    epsilon = 0.15
    epsilon_steps = 30000  # decay steps
    epsilon_min = 0.1
    lr = 0.01
    discount = 0.95  # discount factor
    rng = np.random.RandomState(24)

    # memory settings
    max_memory_size = 100000
    min_memory_size = 1000  # number needed before model training starts

    epsilon_rate = (epsilon - epsilon_min) / epsilon_steps

    # PLE takes our game and the state_preprocessor. It will process the state
    # for our agent.
    game = WaterWorld()
    env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor)

    agent = Agent(env, batch_size, num_frames, frame_skip, lr, discount, rng)
    agent.build_model2()

    memory = ReplayMemory(max_memory_size, min_memory_size)

    env.init()

    for epoch in range(1, num_epochs + 1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = True

        # training loop
Exemplo n.º 8
0
                # print "down"
                return 115
            elif gamestate["player_y"] > state[1]:
                # print "up"
                return 119


# Don't display window.
os.putenv('SDL_VIDEODRIVER', 'fbcon')
os.environ["SDL_VIDEODRIVER"] = "dummy"

# create our game
force_fps = True  # slower speed
display_screen = False
reward = 0.0
game = WaterWorld()

# make a PLE instance.
p = PLE(game, force_fps=force_fps)

# our Naive agent!
agent = WaitedAgent(p.getActionSet())

# init agent and game.
p.init()
p.display_screen = True

# start our loop
score = 0.0
for i in range(10):
    # if the game is over
Exemplo n.º 9
0
import numpy as np
from ple import PLE
from ple.games.waterworld import WaterWorld

# lets adjust the rewards our agent recieves
rewards = {
    "tick":
    -0.01,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1.0,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=256, height=256, num_creeps=8)
p = PLE(game,
        fps=15,
        force_fps=False,
        display_screen=True,
        reward_values=rewards)
# we pass in the rewards and PLE will adjust the game for us

p.init()
actions = p.getActionSet()
for i in range(1000):
    if p.game_over():
        p.reset_game()

    action = actions[np.random.randint(0, len(actions))]  # random actions
    reward = p.act(action)
Exemplo n.º 10
0
    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))
Exemplo n.º 11
0
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        act_values = self.model.predict(state)

        return np.argmax(act_values[0])

rewards = {
    "tick": -0.1,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=100, height=100, num_creeps=15)

# p = PLE(game, reward_values=rewards)
p = PLE(game, fps=30, force_fps=False, display_screen=True,
        reward_values=rewards)

p.init()
actions = p.getActionSet()[:-1]
agent = Agent(len(actions))

epochs = 10000000
game_duration = 1000
for epoch in range(epochs):
    p.reset_game()

    for it in range(1000):
Exemplo n.º 12
0
# Water World

from ple import PLE
from ple.games.waterworld import WaterWorld
import pygame
import time, sys
from pygame.locals import *
import random

game = WaterWorld(800, 800, 4)

p = PLE(game, display_screen=True)
p.init()

print(p.getActionSet())
action_set = p.getActionSet()

nb_frames = 1000000

for f in range(nb_frames):
    if p.game_over():
        sys.exit()
    p.act(random.choice(action_set))
    time.sleep(.05)