def make_env(): env_spec = gym.spec('ppaquette/DoomBasic-v0') env_spec.id = 'DoomBasic-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) return e
def make_env(arg_env_spec, arg_env_spec_id): env_spec = gym.spec(arg_env_spec) env_spec.id = arg_env_spec_id env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) return e
def make_env(): env_spec = gym.spec('MsPacman-v0') env_spec.id = 'MsPacman-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(env), width=80, height=80, grayscale=True) return e
def make_env(): e = wrappers.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")( gym.make(run.get("defaults", "env")))), width=im_width, height=im_height, grayscale=grayscale) if args.monitor: e = gym.wrappers.Monitor(e, args.monitor) return e
def _thunk(): env_spec = gym.spec('ppaquette/DoomBasic-v0') env_spec.id = 'DoomBasic-v0' env = env_spec.make() env.seed(seed + rank) env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env)))) if logger.get_dir(): env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank))) gym.logger.setLevel(logging.WARN) return ScaleRewardEnv(env)
def make_env(scenario, grayscale, input_shape): width, height = input_shape env_spec = gym.spec('ppaquette/' + scenario) env_spec.id = scenario #'DoomBasic-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=width, height=height, grayscale=grayscale) return e
def simpleSSBMEnv(act_every=3, **kwargs): env = SSBMEnv(**kwargs) # TODO: make this a wrapper env.action_space = spaces.Discrete(len(ssbm.simpleControllerStates)) env.realController = lambda action: ssbm.simpleControllerStates[action].realController() from .box_wrapper import BoxWrapper env = BoxWrapper(env) from gym.wrappers import SkipWrapper return SkipWrapper(3)(env)
def wrap_env(env): # Turns standard env into env trained from pixels env = RenderEnv(env) # Applys an action for k frames env = SkipWrapper(4)(env) # Reduces frame to 84x84 per DeepMind Atari env = WarpFrame(env) # Stack frames to maintain Markov property env = FrameStack(env, 4) # Maybe clip rewards but probably not nessary # env = ClipRewardEnv(env) return env
def _make_env(): env_spec = gym.spec(env_name) env_spec.id = env_name.split('/')[1] env = env_spec.make() env = SetResolution('160x120')(env) env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))), width=80, height=80) scale = 1.0 if 'DoomBasic' in env_name: scale = 400.0 return ScaleRewardEnv(env, scale)
def run(env, n_epochs, discount_factor, plot_stats=False, api_key=None, network=None, batch_size=32, buffer_len=10000, initial_epsilon=0.25, load=False, gpu_option=0.4, initial_lr=1e-4): env_name = env make_env = lambda: PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")(gym.make(env_name))), width=80, height=80, grayscale=True) env = make_env() n_actions = env.action_space.n state_shape = env.observation_space.shape special = { "batch_size": batch_size, "buffer_len": buffer_len, "lr": initial_lr } network = network or conv_network agent = DqnAgent( state_shape, n_actions, network, gamma=discount_factor, special=special) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_option) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: saver = tf.train.Saver() model_dir = "./logs_" + env_name.replace(string.punctuation, "_") if not load: sess.run(tf.global_variables_initializer()) else: saver.restore(sess, "{}/model.ckpt".format(model_dir)) stats = q_learning(sess, agent, env, n_epochs, initial_epsilon=initial_epsilon) create_if_need(model_dir) saver.save(sess, "{}/model.ckpt".format(model_dir)) if plot_stats: save_stats(stats) if api_key is not None: env = gym.wrappers.Monitor(env, "{}/monitor".format(model_dir), force=True) sessions = [generate_session(sess, agent, env, 0.0, int(1e10)) for _ in range(300)] env.close() gym.upload("{}/monitor".format(model_dir), api_key=api_key)
def make_image_env(env_name, n_games=1, episode_limit=None, n_frames=1, autoreload_envs=False, width=64, height=64, grayscale=True, crop=None): env = gym.make(env_name) if episode_limit is None else gym.make( env_name).env if "ppaquette" in env_name: env = SkipWrapper(4)(ToDiscrete("minimal")(env)) env = PreprocessImage(env, width=width, height=height, grayscale=grayscale, crop=crop) env = FrameBuffer(env, n_frames=n_frames) if n_frames > 1 else env if episode_limit is not None: env = TimeLimit(env, max_episode_steps=episode_limit) return EnvPool(env, n_games, autoreload_envs) if n_games > 0 else env
def test_skip(): every_two_frame = SkipWrapper(2) env = gym.make("FrozenLake-v0") env = every_two_frame(env) obs = env.reset() env.render()
def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32))) #converting the image into torch format ie. convert input images into numpy array, then convert them into torch tensor and then put it inside torch Variable which contains both tensor and gradient. output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True) #gym.make("ppaquette/DoomCorridor-v0") : DoomCorridor-v0 is the environment name of the game which we are playing is imported using gym.make() #We used PreprocessImage class from image_preprocessing.py to pre-process the input images in square format with dimension 80*80 with gray scale that will come into neural network doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True) #whole game's video is imported with above line of code into "videos" folder. After end of the game, we can see the videos of it. number_actions = doom_env.action_space.n #no. of actions (7) for the doom move left/right/straight, turn left/right, run & shoot # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T = 1.0) #Temperature = 1.0 ai = AI(brain = cnn, body = softmax_body) # Setting up Experience Replay using Eligibility trace (step size = 10) n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10) memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000) #memory capacity is 10000 ie. memory is dependent on the last 10000 steps performed by the AI. It gonna learn every 10 steps.
def __init__(self, name, sess, env='ppaquette/DoomDefendCenter-v0', eps=100, eps_save=0, time=400, learning=1e-6, gamma=.96, tau=.94, seed=None, render=False, rec=False): self.name = 'agent_' + str(name) self.eps = eps self.eps_ran = 0 self.time = time self.render = render self.learning = learning self.gamma = gamma self.tau = tau self.sess = sess self.eps_save = eps_save self.env = gym.make(env) self.env = ToDiscrete("minimal")(self.env) self.env = SkipWrapper(2)(self.env) self.env = PreprocessImage(self.env, height=80, width=80, grayscale=True) if rec: self.env = gym.wrappers.Monitor(self.env, 'videos', force=True) if seed is not None: self.env.seed(seed) # rd.seed(seed) # tf.set_random_seed(seed) self.output = self.env.action_space.n self.input = np.concatenate( ([None], list(self.env.observation_space.shape))) with tf.variable_scope('global', reuse=True): self.global_step = tf.Variable(0, name='global_step', trainable=False) global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') if self.eps_save > 0: vars_save = {v.op.name: v for v in global_vars} self.saver = tf.train.Saver(vars_save) self.check_save = self.save else: self.check_save = lambda: None with tf.variable_scope(self.name): self.ac = AC(self.input, self.output, self.sess) local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) with tf.variable_scope('loss_critic'): self.target_critic = tf.placeholder(tf.float32, [None], name='target_critic') self.batch = tf.placeholder(tf.int32, name='batch') self.loss_critic = tf.reduce_sum( tf.square(self.target_critic - tf.reshape(self.ac.value_critic, [self.batch]), name='square_loss_critic') * .5, name='reduce_sum_critic') with tf.variable_scope('loss_actor'): self.td_error = tf.placeholder(tf.float32, [None], name='td_error') self.action = tf.placeholder(tf.int32, [None], name='action') self.loss_actor = tf.reduce_sum( self.td_error * tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.ac.action_logits, labels=self.action)) entropy = -tf.reduce_sum( tf.nn.softmax(self.ac.action_logits) * tf.nn.log_softmax(self.ac.action_logits)) self.loss_actor -= entropy * .02 with tf.variable_scope('loss'): self.loss_total = self.loss_actor + .5 * self.loss_critic gradients = tf.gradients(self.loss_total, local_vars) gradients, _ = tf.clip_by_global_norm(gradients, 10.) with tf.variable_scope('global', reuse=tf.AUTO_REUSE): opt = tf.train.AdagradOptimizer(self.learning) self.train_op = [ opt.apply_gradients(zip(gradients, global_vars)), self.global_step.assign_add(self.batch) ] self.reset_model = [ l.assign(g) for l, g in zip(local_vars, global_vars) ]
class AI: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Warrior environment warrior_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/WarriorCorridor-v0"))), width=80, height=80, grayscale=True) warrior_env = gym.wrappers.Monitor(warrior_env, "videos", force=True) number_actions = warrior_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=warrior_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
class ai: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() env = gym.make('ppaquette/DoomBasic-v0') game_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) game_env = gym.wrappers.Monitor(game_env, "videos", force=True) number_actions = game_env.action_space.n #building AI cnn = CNN(number_actions) softmaxbody = SoftmaxBody(temp=1.0) ai = ai(brain=cnn, body=softmaxbody) #setting up experiecne replay n_steps = experience_replay.NStepProgress(env=game_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
class AI: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(SkipWrapper(1)( ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
# test events, set key states if event.type == pygame.KEYDOWN: if event.key in relevant_keys: pressed_keys.append(event.key) elif event.key == 27: running = False elif event.type == pygame.KEYUP: if event.key in relevant_keys: pressed_keys.remove(event.key) elif event.type == pygame.QUIT: running = False elif event.type == VIDEORESIZE: video_size = event.size screen = pygame.display.set_mode(video_size) print(video_size) pygame.display.flip() clock.tick(fps) pygame.quit() if __name__ == '__main__': env = gym.make('Breakout-v0') wrapper = SkipWrapper(SPEED) # 0 = don't skip env = wrapper(env) env = PreproWrapper(env, prepro=lambda x: downsample(x), shape=(105, 80, 3)) record_game(env, RECORD_FILE, zoom=4)
# Make ai class AI: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) brain_out = self.brain.forward(input) actions = self.body.forward(brain_out) return actions.data.numpy() # train deep q conv doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))), height=80, width=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # setting up exp Replay n_steps = experience_replay.NStepProgress(doom_env, ai, 10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) #eligibility retrace ... n-step Q-Learning
n_colors = 1 if self.grayscale else 3 self.observation_space = Box(0.0, 1.0, [n_colors, height, width]) def _observation(self, img): img = self.crop(img) img = imresize(img, self.img_size) if self.grayscale: img = img.mean(-1, keepdims=True) img = np.transpose(img, (2, 0, 1)) img = img.astype('float32') / 255. return img # Getting the Doom environment doom_env = PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) print("Environment created!") number_actions = doom_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Replay n_steps = NStepProgress(env=doom_env, ai=ai, n_step=10) memory = ReplayMemory(n_steps=n_steps, capacity=10000)
#Putting data through the brain and taking the output output = self.brain(input_data) #Putting output into body and taking the actions actions = self.body(output) #Returning actions in right format return actions.data.numpy() #Part 2 - Implementing Deep COnvolutional Q-Learning #Getting the doom enviroment #gym.make imports the enviroment #image_preprocessing proccesses images coming in with 80 by 80 size in grayscale doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete('minimal')(gym.make('ppaquette/DoomCorridor-v0'))), width=80, height=80, grayscale=True) #Saves videos of AI playing doom into the videos folder doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True) #Getting number of actions from doom_enviroment number_actions = doom_env.action_space.n #Building an AI #Creating an object of our CNN class cnn = CNN(number_actions) #Creating an object of our SoftmaxBoddy class and inputing temperature softmax_body = SoftmaxBody(T=1.0) #Creating an object of our AI class and inputing the brain and body
def __call__(self, inputs): #converting images to correct format input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) #converting image to numpy array #converting pixels to float 32 #converting mupy to torch tensor(variable class) output = self.brain(input) #passing tensor to brain actions = self.body(output) #passing output to body return actions.data.numpy() #converting action to numpy # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")( gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) # importing the environment and preprocessing it #dimensions should be same as neural network doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) #saving to videos number_actions = doom_env.action_space.n # number of actions possible in the environment # Building an AI cnn = CNN(number_actions) #calling cnn softmax_body = SoftmaxBody(T=1.0) #setting value of temperature for softmax ai = AI(brain=cnn, body=softmax_body) #calling brain # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai,