def __init__(self, model, screen=False, forcefps=True): self.model = model self.game = Pixelcopter(width=int(48 * 5), height=int(48 * 5)) self.env = PLE(self.game, fps=30, display_screen=screen, force_fps=forcefps) self.env.init() self.env.getGameState = self.game.getGameState self.es = Deep_Evolution_Strategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE)
def main(): env = PLE(Pixelcopter(), fps=30, display_screen=True, state_preprocessor=None) action_dim = len(env.getActionSet()) obs_shape = len(env.getGameState()) rpm = ReplayMemory(MEMORY_SIZE) # DQN的经验回放池 # 根据parl框架构建agent model = Model(act_dim=action_dim) algorithm = DQN(model, act_dim=action_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = Agent( algorithm, obs_dim=obs_shape, act_dim=action_dim, e_greed=0.1, # 有一定概率随机选取动作,探索 e_greed_decrement=1e-6) # 随着训练逐步收敛,探索的程度慢慢降低 # 加载模型 # save_path = './dqn_model.ckpt' # agent.restore(save_path) # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够 while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(env, agent, rpm) max_episode = 30000 # 开始训练 episode = 0 while episode < max_episode: # 训练max_episode个回合,test部分不计算入episode数量 # train part for i in range(0, 50): total_reward = run_episode(env, agent, rpm) episode += 1 # test part eval_reward, max_reward = evaluate(env, agent, render=False) # render=True 查看显示效果 logger.info( 'episode:{} e_greed:{} test_reward:{} max_reward:{}'.format( episode, agent.e_greed, eval_reward, max_reward)) # 训练结束,保存模型 save_path = './dqn_model.ckpt' agent.save(save_path)
def __init__(self, screen=False, forcefps=True): self.game = Pixelcopter(width=int(48 * 5), height=int(48 * 5)) self.env = PLE(self.game, fps=30, display_screen=screen, force_fps=forcefps) self.env.init() self.env.getGameState = self.game.getGameState def conv_layer(x, conv, stride=1): return tf.nn.conv2d(x, conv, [1, stride, stride, 1], padding='SAME') def pooling(x, k=2, stride=2): return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding='SAME') self.X = tf.placeholder(tf.float32, [None, 80, 80, 4]) self.Y = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE]) w_conv1 = tf.Variable(tf.truncated_normal([8, 8, 4, 32], stddev=0.1)) b_conv1 = tf.Variable(tf.truncated_normal([32], stddev=0.01)) conv1 = tf.nn.relu(conv_layer(self.X, w_conv1, stride=4) + b_conv1) pooling1 = pooling(conv1) w_conv2 = tf.Variable(tf.truncated_normal([4, 4, 32, 64], stddev=0.1)) b_conv2 = tf.Variable(tf.truncated_normal([64], stddev=0.01)) conv2 = tf.nn.relu(conv_layer(pooling1, w_conv2, stride=2) + b_conv2) w_conv3 = tf.Variable(tf.truncated_normal([3, 3, 64, 64], stddev=0.1)) b_conv3 = tf.Variable(tf.truncated_normal([64], stddev=0.01)) conv3 = tf.nn.relu(conv_layer(conv2, w_conv3) + b_conv3) pulling_size = int(conv3.shape[1]) * int(conv3.shape[2]) * int( conv3.shape[3]) conv3 = tf.reshape(conv3, [-1, pulling_size]) w_fc1 = tf.Variable( tf.truncated_normal([pulling_size, 512], stddev=0.1)) b_fc1 = tf.Variable(tf.truncated_normal([512], stddev=0.01)) w_fc2 = tf.Variable(tf.truncated_normal([512, 2], stddev=0.1)) b_fc2 = tf.Variable(tf.truncated_normal([2], stddev=0.01)) fc_1 = tf.nn.relu(tf.matmul(conv3, w_fc1) + b_fc1) self.logits = tf.matmul(fc_1, w_fc2) + b_fc2 self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.LEARNING_RATE).minimize(self.cost) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(tf.global_variables()) self.rewards = []
def __init__(self, game="pixelcopter", fps=30): os.environ['SDL_VIDEODRIVER'] = 'dummy' self.game_name = game if game == "flappy": engine = FlappyBird() elif game == "pixelcopter": engine = Pixelcopter() else: assert False, "This game is not available" engine.rewards["loss"] = -5 # reward at terminal state self.reward_terminal = -5 self.game = PLE(engine, fps=fps, display_screen=False) self.game.init() self.game.act(0) # Start the game by providing arbitrary key as input self.key_input = self.game.getActionSet() self.reward = 0
def __init__(self, screen=False, forcefps=True): self.game = Pixelcopter(width=int(48 * 5), height=int(48 * 5)) self.env = PLE(self.game, fps=30, display_screen=screen, force_fps=forcefps) self.env.init() self.env.getGameState = self.game.getGameState self.X = tf.placeholder(tf.float32, (None, self.INPUT_SIZE)) self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE)) input_layer = tf.Variable(tf.random_normal([self.INPUT_SIZE, self.LAYER_SIZE])) bias = tf.Variable(tf.random_normal([self.LAYER_SIZE])) output_layer = tf.Variable(tf.random_normal([self.LAYER_SIZE, self.OUTPUT_SIZE])) feed_forward = tf.nn.relu(tf.matmul(self.X, input_layer) + bias) self.logits = tf.matmul(feed_forward, output_layer) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(tf.global_variables()) self.rewards = []
def __init__(self, game, display_screen=False): from ple import PLE assert game in [ 'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong', 'puckworld', 'raycastmaze', 'snake', 'waterworld' ] if game == 'catcher': from ple.games.catcher import Catcher env = Catcher() elif game == 'monsterkong': from ple.games.monsterkong import MonsterKong env = MonsterKong() elif game == 'flappybird': from ple.games.flappybird import FlappyBird env = FlappyBird() elif game == 'pixelcopter': from ple.games.pixelcopter import Pixelcopter env = Pixelcopter() elif game == 'pong': from ple.games.pong import Pong env = Pong() elif game == 'puckworld': from ple.games.puckworld import PuckWorld env = PuckWorld() elif game == 'raycastmaze': from ple.games.raycastmaze import RaycastMaze env = RaycastMaze() elif game == 'snake': from ple.games.snake import Snake env = Snake() elif game == 'waterworld': from ple.games.waterworld import WaterWorld env = WaterWorld() self.p = PLE(env, fps=30, display_screen=display_screen) self.action_set = self.p.getActionSet() self.action_size = len(self.action_set) self.screen_dims = self.p.getScreenDims() self.p.init()
def test_pixelcopter(self): from ple.games.pixelcopter import Pixelcopter game = Pixelcopter() self.run_a_game(game)
""" This interprets the human player's keyboard actions. If the user presses the space bar, the pixel will move up. """ def __init__(self, actions): self.actions = actions def pickAction(self, state): if keyboard.is_pressed('space'): return self.actions[0] # move up return self.actions[1] # do nothing ############################################################ game = Pixelcopter(width=200, height=200) env = PLE(game, fps=70, display_screen=True) agent = Human(actions=env.getActionSet()) env.init() scores = [] min_reward = float('inf') max_reward = float('-inf') while True: episode_reward = 0.0 print 'Press s to start' while not keyboard.is_pressed('s'): continue
img = img.reshape(1, img.shape[0], img.shape[1], 1) return img def act(state): if np.random.rand() <= epsilon: # The agent acts randomly return act_dict_decode[np.random.randint(2)] action_predict = sess.run(y, {x: state}) # Pick the action based on the predicted reward return act_dict_decode[np.argmax(action_predict)] EPISODES = 6000 OBSERVATIONS = 300 #reward_discount = 0.99 time_per_episode = 1000 game = Pixelcopter(img_size,img_size) env = PLE(game) action_size = 2 score_mean = np.zeros(EPISODES//10) score_std = np.zeros(EPISODES//10) score_last10 = [] training_count = 0 plt.figure() max_score = 0 for e in range(EPISODES): env.init() state = process(env.getScreenGrayscale()) for time in range(time_per_episode): # Set actions if time < 3:
def __init__(self, lr): self.lr = lr self.game = Pixelcopter(width=480, height=480) self.p = PLE(self.game, fps=60, display_screen=True) self.actions = self.p.getActionSet()
def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2, hold_action=2, visualize=False, width=84, height=84, lives=1): """ Initialize Pygame Learning Environment https://github.com/ntasfi/PyGame-Learning-Environment Args: env_name: PLE environment fps: frames per second force_fps: False for slower speeds frame_skip: number of env frames to skip hold_action: number of env frames to hold each action for isRGB: get color or greyscale version of statespace #isRGB = False, game_height,game_width: height and width of environment visualize: If set True, the program will visualize the trainings, will slow down training lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score) """ self.env_name = game_name self.rewards = rewards self.lives = lives self.state_as_image = state_as_image self.fps = fps #30 # frames per second self.force_fps = force_fps #True # False for slower speeds self.frame_skip = frame_skip # frames to skip self.ple_num_steps = hold_action # frames to continue action for #self.isRGB = isRGB #always returns color, lets tensorforce due the processing self.visualize = visualize self.width = width self.height = height #testing self.reached_terminal = 0 self.episode_time_steps = 0 self.episode_reward = 0 self.total_time_steps = 0 if self.env_name == 'catcher': self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives) elif self.env_name == 'pixelcopter': self.game = Pixelcopter(width=self.width, height=self.height) elif self.env_name == 'pong': self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives) elif self.env_name == 'puckworld': self.game = PuckWorld(width=self.width, height=self.height) elif self.env_name == 'raycastmaze': self.game = RaycastMaze(width=self.width, height=self.height) elif self.env_name == 'snake': self.game = Snake(width=self.width, height=self.height) elif self.env_name == 'waterworld': self.game = WaterWorld(width=self.width, height=self.height) elif self.env_name == 'monsterkong': self.game = MonsterKong() elif self.env_name == 'flappybird': self.game = FlappyBird(width=144, height=256) # limitations on height and width for flappy bird else: raise TensorForceError('Unknown Game Environement.') if self.state_as_image: process_state = None else: #create a preprocessor to read the state dictionary as a numpy array def process_state(state): # ret_value = np.fromiter(state.values(),dtype=float,count=len(state)) ret_value = np.array(list(state.values()), dtype=np.float32) return ret_value # make a PLE instance self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip, num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize, state_preprocessor = process_state) #self.env.init() #self.env.act(self.env.NOOP) #game starts on black screen #self.env.reset_game() #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.reset_game() # setup gamescreen object if state_as_image: w, h = self.env.getScreenDims() self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) else: self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32) # if isRGB: # self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) # else: # self.gamescreen = np.empty((h, w), dtype=np.uint8) # setup action converter # PLE returns legal action indexes, convert these to just numbers self.action_list = self.env.getActionSet() self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))
from ple.games.pixelcopter import Pixelcopter from ple import PLE import random import matplotlib.pyplot as plt import numpy as np import rl.deep_Q_learning as DQL import h5py game = Pixelcopter(500, 500) p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() def process_obs(obs): list = [] max_values = np.array([]) for item in obs: list.append(obs[item]) if (len(list) == 7): out = np.array([list]) / np.array([500, 500, 500, 500, 500, 500, 500]) return out scores = [] agent = DQL.deep_learner(7, 2, p.getActionSet()) nb_games = 0 nb_max = 1000 for layer in agent.model.layers:
elif state['player_vel'] < deterministic_vel: return self.actions[1] return self.qLearning.getAction(state) def incorporateFeedback(self, state, action, reward, newState): self.qLearning.incorporateFeedback(state, action, reward, newState) def printWeights(self): print str(self.qLearning.getWeights()) print 'num weights: %d' % len(self.qLearning.getWeights()) ############################################################ if __name__ == '__main__': start_time = datetime.datetime.now() game = Pixelcopter(width=200, height=200) env = PLE(game, fps=30, display_screen=displayScreen) agent = Bot(actions=env.getActionSet()) env.init() total_reward = 0.0 min_reward = float('inf') max_reward = float('-inf') min_vel = float('inf') max_vel = float('-inf') all_episode_scores = [] plot_episode_scores = [] plotted_episodes = [] all_final_velocities = []
import matplotlib.pyplot as plt import numpy as np import rl.deep_Q_learning as DQL import h5py def process_obs(obs): list = [] for item in obs: list.append(obs[item]) if (len(list) == 7): out = np.array(list) return out game = Pixelcopter(500, 500) p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() scores = [] agent = DQL.deep_learner(1, 2, p.getActionSet()) agent.learning_rate = 0.001 agent.epsilon = 0.001 agent.epsilon_decay = 1.0 agent.epsilon_min = 0.00001 #agent.load_model_json() agent.load_alt() nb_games = 0
from matplotlib import pyplot as plt import skimage from PIL import Image from skimage import color,transform,exposure from scipy.misc import toimage PLAY_GAME = False #Set to True if you want to agent to play without training uses_critic = True uses_parameter_noising = False IMG_DIM = 80 ENVIRONMENT_NAME = "Pong-v0" game = Pixelcopter(width=160, height=160) p = PLE(game, fps=30, display_screen=True) num_env_variables = 8 num_env_actions = 2 num_initial_observation = 3 learning_rate = 0.001 apLearning_rate = 0.01 MUTATION_PROB = 0.4 littl_sigma = 0.00006 big_sigma = 0.003 upper_delta = 0.0375 lower_delta = 0.015 #gaussSigma = 0.01
sys.exit(0) else: action = K_s def on_release(key): global action action = K_s # Collect events until released with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: listener.join() print("Creating Environment..") game = Pixelcopter(512, 512) p = PLE(game, fps=25, force_fps=False, display_screen=True) p.init() t = Thread( target=start_listen) # Start listening to key presses and update actions t.start() print("Start playing..... :)") while True: r = 0.0 for _ in range(frame_skip): observation = p.getGameState() prev_action = action action = K_s # A bad hack to get things go smooth