def __init__(self, model, screen=False, forcefps=True):
     self.model = model
     self.game = Pixelcopter(width=int(48 * 5), height=int(48 * 5))
     self.env = PLE(self.game,
                    fps=30,
                    display_screen=screen,
                    force_fps=forcefps)
     self.env.init()
     self.env.getGameState = self.game.getGameState
     self.es = Deep_Evolution_Strategy(self.model.get_weights(),
                                       self.get_reward,
                                       self.POPULATION_SIZE, self.SIGMA,
                                       self.LEARNING_RATE)
Пример #2
0
def main():

    env = PLE(Pixelcopter(),
              fps=30,
              display_screen=True,
              state_preprocessor=None)
    action_dim = len(env.getActionSet())
    obs_shape = len(env.getGameState())

    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    # 根据parl框架构建agent
    model = Model(act_dim=action_dim)
    algorithm = DQN(model, act_dim=action_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(
        algorithm,
        obs_dim=obs_shape,
        act_dim=action_dim,
        e_greed=0.1,  # 有一定概率随机选取动作,探索
        e_greed_decrement=1e-6)  # 随着训练逐步收敛,探索的程度慢慢降低

    # 加载模型
    # save_path = './dqn_model.ckpt'
    # agent.restore(save_path)

    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(env, agent, rpm)

    max_episode = 30000

    # 开始训练
    episode = 0
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 50):
            total_reward = run_episode(env, agent, rpm)
            episode += 1

        # test part
        eval_reward, max_reward = evaluate(env, agent,
                                           render=False)  # render=True 查看显示效果
        logger.info(
            'episode:{}    e_greed:{}   test_reward:{}   max_reward:{}'.format(
                episode, agent.e_greed, eval_reward, max_reward))

    # 训练结束,保存模型
    save_path = './dqn_model.ckpt'
    agent.save(save_path)
Пример #3
0
    def __init__(self, screen=False, forcefps=True):
        self.game = Pixelcopter(width=int(48 * 5), height=int(48 * 5))
        self.env = PLE(self.game,
                       fps=30,
                       display_screen=screen,
                       force_fps=forcefps)
        self.env.init()
        self.env.getGameState = self.game.getGameState

        def conv_layer(x, conv, stride=1):
            return tf.nn.conv2d(x,
                                conv, [1, stride, stride, 1],
                                padding='SAME')

        def pooling(x, k=2, stride=2):
            return tf.nn.max_pool(x,
                                  ksize=[1, k, k, 1],
                                  strides=[1, stride, stride, 1],
                                  padding='SAME')

        self.X = tf.placeholder(tf.float32, [None, 80, 80, 4])
        self.Y = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE])
        w_conv1 = tf.Variable(tf.truncated_normal([8, 8, 4, 32], stddev=0.1))
        b_conv1 = tf.Variable(tf.truncated_normal([32], stddev=0.01))
        conv1 = tf.nn.relu(conv_layer(self.X, w_conv1, stride=4) + b_conv1)
        pooling1 = pooling(conv1)
        w_conv2 = tf.Variable(tf.truncated_normal([4, 4, 32, 64], stddev=0.1))
        b_conv2 = tf.Variable(tf.truncated_normal([64], stddev=0.01))
        conv2 = tf.nn.relu(conv_layer(pooling1, w_conv2, stride=2) + b_conv2)
        w_conv3 = tf.Variable(tf.truncated_normal([3, 3, 64, 64], stddev=0.1))
        b_conv3 = tf.Variable(tf.truncated_normal([64], stddev=0.01))
        conv3 = tf.nn.relu(conv_layer(conv2, w_conv3) + b_conv3)
        pulling_size = int(conv3.shape[1]) * int(conv3.shape[2]) * int(
            conv3.shape[3])
        conv3 = tf.reshape(conv3, [-1, pulling_size])
        w_fc1 = tf.Variable(
            tf.truncated_normal([pulling_size, 512], stddev=0.1))
        b_fc1 = tf.Variable(tf.truncated_normal([512], stddev=0.01))
        w_fc2 = tf.Variable(tf.truncated_normal([512, 2], stddev=0.1))
        b_fc2 = tf.Variable(tf.truncated_normal([2], stddev=0.01))
        fc_1 = tf.nn.relu(tf.matmul(conv3, w_fc1) + b_fc1)
        self.logits = tf.matmul(fc_1, w_fc2) + b_fc2
        self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.LEARNING_RATE).minimize(self.cost)
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(tf.global_variables())
        self.rewards = []
Пример #4
0
 def __init__(self, game="pixelcopter", fps=30):
     os.environ['SDL_VIDEODRIVER'] = 'dummy'
     self.game_name = game
     if game == "flappy":
         engine = FlappyBird()
     elif game == "pixelcopter":
         engine = Pixelcopter()
     else:
         assert False, "This game is not available"
     engine.rewards["loss"] = -5  # reward at terminal state
     self.reward_terminal = -5
     self.game = PLE(engine, fps=fps, display_screen=False)
     self.game.init()
     self.game.act(0)  # Start the game by providing arbitrary key as input
     self.key_input = self.game.getActionSet()
     self.reward = 0
 def __init__(self, screen=False, forcefps=True):
     self.game = Pixelcopter(width=int(48 * 5), height=int(48 * 5))
     self.env = PLE(self.game, fps=30, display_screen=screen, force_fps=forcefps)
     self.env.init()
     self.env.getGameState = self.game.getGameState
     self.X = tf.placeholder(tf.float32, (None, self.INPUT_SIZE))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     input_layer = tf.Variable(tf.random_normal([self.INPUT_SIZE, self.LAYER_SIZE]))
     bias = tf.Variable(tf.random_normal([self.LAYER_SIZE]))
     output_layer = tf.Variable(tf.random_normal([self.LAYER_SIZE, self.OUTPUT_SIZE]))
     feed_forward = tf.nn.relu(tf.matmul(self.X, input_layer) + bias)
     self.logits = tf.matmul(feed_forward, output_layer)
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
     self.saver = tf.train.Saver(tf.global_variables())
     self.rewards = []
Пример #6
0
    def __init__(self, game, display_screen=False):
        from ple import PLE
        assert game in [
            'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong',
            'puckworld', 'raycastmaze', 'snake', 'waterworld'
        ]
        if game == 'catcher':
            from ple.games.catcher import Catcher
            env = Catcher()
        elif game == 'monsterkong':
            from ple.games.monsterkong import MonsterKong
            env = MonsterKong()
        elif game == 'flappybird':
            from ple.games.flappybird import FlappyBird
            env = FlappyBird()
        elif game == 'pixelcopter':
            from ple.games.pixelcopter import Pixelcopter
            env = Pixelcopter()
        elif game == 'pong':
            from ple.games.pong import Pong
            env = Pong()
        elif game == 'puckworld':
            from ple.games.puckworld import PuckWorld
            env = PuckWorld()
        elif game == 'raycastmaze':
            from ple.games.raycastmaze import RaycastMaze
            env = RaycastMaze()
        elif game == 'snake':
            from ple.games.snake import Snake
            env = Snake()
        elif game == 'waterworld':
            from ple.games.waterworld import WaterWorld
            env = WaterWorld()

        self.p = PLE(env, fps=30, display_screen=display_screen)
        self.action_set = self.p.getActionSet()
        self.action_size = len(self.action_set)
        self.screen_dims = self.p.getScreenDims()
        self.p.init()
 def test_pixelcopter(self):
     from ple.games.pixelcopter import Pixelcopter
     game = Pixelcopter()
     self.run_a_game(game)
Пример #8
0
    """
  This interprets the human player's keyboard actions. If the user presses 
  the space bar, the pixel will move up.
  """
    def __init__(self, actions):
        self.actions = actions

    def pickAction(self, state):
        if keyboard.is_pressed('space'):
            return self.actions[0]  # move up
        return self.actions[1]  # do nothing


############################################################

game = Pixelcopter(width=200, height=200)
env = PLE(game, fps=70, display_screen=True)

agent = Human(actions=env.getActionSet())
env.init()

scores = []
min_reward = float('inf')
max_reward = float('-inf')

while True:
    episode_reward = 0.0
    print 'Press s to start'
    while not keyboard.is_pressed('s'):
        continue
Пример #9
0
    img = img.reshape(1, img.shape[0], img.shape[1], 1)
    return img

def act(state):
    if np.random.rand() <= epsilon:
        # The agent acts randomly
        return act_dict_decode[np.random.randint(2)]
    action_predict = sess.run(y, {x: state})
    # Pick the action based on the predicted reward
    return act_dict_decode[np.argmax(action_predict)]

EPISODES = 6000
OBSERVATIONS = 300
#reward_discount = 0.99
time_per_episode = 1000
game = Pixelcopter(img_size,img_size)
env = PLE(game)
action_size = 2
score_mean = np.zeros(EPISODES//10)
score_std = np.zeros(EPISODES//10)
score_last10 = []
training_count = 0
plt.figure()
max_score = 0

for e in range(EPISODES):
    env.init()
    state = process(env.getScreenGrayscale())
    for time in range(time_per_episode):
        # Set actions
        if time < 3:
Пример #10
0
    def __init__(self, lr):

        self.lr = lr
        self.game = Pixelcopter(width=480, height=480)
        self.p = PLE(self.game, fps=60, display_screen=True)
        self.actions = self.p.getActionSet()
Пример #11
0
    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))
Пример #12
0
from ple.games.pixelcopter import Pixelcopter
from ple import PLE
import random
import matplotlib.pyplot as plt
import numpy as np
import rl.deep_Q_learning as DQL
import h5py

game = Pixelcopter(500, 500)
p = PLE(game, fps=30, display_screen=True, force_fps=False)
p.init()


def process_obs(obs):
    list = []
    max_values = np.array([])
    for item in obs:
        list.append(obs[item])
    if (len(list) == 7):
        out = np.array([list]) / np.array([500, 500, 500, 500, 500, 500, 500])
        return out


scores = []

agent = DQL.deep_learner(7, 2, p.getActionSet())

nb_games = 0
nb_max = 1000

for layer in agent.model.layers:
Пример #13
0
        elif state['player_vel'] < deterministic_vel:
            return self.actions[1]
        return self.qLearning.getAction(state)

    def incorporateFeedback(self, state, action, reward, newState):
        self.qLearning.incorporateFeedback(state, action, reward, newState)

    def printWeights(self):
        print str(self.qLearning.getWeights())
        print 'num weights: %d' % len(self.qLearning.getWeights())


############################################################
if __name__ == '__main__':
    start_time = datetime.datetime.now()
    game = Pixelcopter(width=200, height=200)
    env = PLE(game, fps=30, display_screen=displayScreen)

    agent = Bot(actions=env.getActionSet())
    env.init()

    total_reward = 0.0
    min_reward = float('inf')
    max_reward = float('-inf')
    min_vel = float('inf')
    max_vel = float('-inf')

    all_episode_scores = []
    plot_episode_scores = []
    plotted_episodes = []
    all_final_velocities = []
Пример #14
0
import matplotlib.pyplot as plt
import numpy as np
import rl.deep_Q_learning as DQL
import h5py


def process_obs(obs):
    list = []
    for item in obs:
        list.append(obs[item])
    if (len(list) == 7):
        out = np.array(list)
        return out


game = Pixelcopter(500, 500)
p = PLE(game, fps=30, display_screen=True, force_fps=False)
p.init()

scores = []

agent = DQL.deep_learner(1, 2, p.getActionSet())
agent.learning_rate = 0.001
agent.epsilon = 0.001
agent.epsilon_decay = 1.0
agent.epsilon_min = 0.00001
#agent.load_model_json()
agent.load_alt()

nb_games = 0
Пример #15
0
from matplotlib import pyplot as plt
import skimage
from PIL import Image
from skimage import color,transform,exposure
from scipy.misc import toimage


PLAY_GAME = False #Set to True if you want to agent to play without training
uses_critic = True
uses_parameter_noising = False


IMG_DIM = 80

ENVIRONMENT_NAME = "Pong-v0"
game = Pixelcopter(width=160, height=160)
p = PLE(game, fps=30, display_screen=True)
num_env_variables = 8
num_env_actions = 2

num_initial_observation = 3
learning_rate =  0.001
apLearning_rate = 0.01

MUTATION_PROB = 0.4

littl_sigma = 0.00006
big_sigma = 0.003
upper_delta = 0.0375
lower_delta = 0.015
#gaussSigma = 0.01
Пример #16
0
            sys.exit(0)
        else:
            action = K_s

    def on_release(key):
        global action
        action = K_s

    # Collect events until released
    with keyboard.Listener(on_press=on_press,
                           on_release=on_release) as listener:
        listener.join()


print("Creating Environment..")
game = Pixelcopter(512, 512)
p = PLE(game, fps=25, force_fps=False, display_screen=True)
p.init()

t = Thread(
    target=start_listen)  # Start listening to key presses and update actions
t.start()

print("Start playing..... :)")
while True:

    r = 0.0
    for _ in range(frame_skip):
        observation = p.getGameState()
        prev_action = action
        action = K_s  # A bad hack to get things go smooth