def run(self): super(MarioEnv, self).run() self.env = gym_super_mario_bros.make(self.env_id) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.reset() print('[ Worker %2d ] ' % (self.idx), end='') print('Playing <', self.env_id, '>') self.request_action(0, False) while True: action = self.child_conn.recv() next_state, reward, done, info = self.env.step(action) self.steps += 1 self.accum_reward += reward next_state = rgb2dataset(next_state) if self.is_render and self.idx == 0: self.env.render() # make a transition self.transition.append(next_state) if len(self.transition) > 4: self.transition.pop(0) if done: self.send_result(info['x_pos']) self.reset() self.request_action(reward, True) else: self.request_action(reward, False)
def __init__( self, env_id, is_render, env_idx, child_conn, history_size=4, life_done=False, h=84, w=84, movement=COMPLEX_MOVEMENT, sticky_action=True, p=0.25): super(MarioEnvironment, self).__init__() self.daemon = True self.env = BinarySpaceToDiscreteSpaceEnv( gym_super_mario_bros.make(env_id), COMPLEX_MOVEMENT) self.is_render = is_render self.env_idx = env_idx self.steps = 0 self.episode = 0 self.rall = 0 self.recent_rlist = deque(maxlen=100) self.child_conn = child_conn self.life_done = life_done self.sticky_action = sticky_action self.last_action = 0 self.p = p self.history_size = history_size self.history = np.zeros([history_size, h, w]) self.h = h self.w = w self.reset()
def __init__(self, args, env_idx, child_conn, history_size=4, h=84, w=84): super(MoMarioEnv, self).__init__() self.daemon = True self.env = BinarySpaceToDiscreteSpaceEnv( gym_super_mario_bros.make(args.env_id), SIMPLE_MOVEMENT) self.is_render = args.render self.env_idx = env_idx self.steps = 0 self.episode = 0 self.rall = 0 self.coin = 0 self.x_pos = 0 self.time = 0 self.score = 0 self.n_mo = 5 self.morall = np.zeros(self.n_mo) self.recent_rlist = deque(maxlen=100) self.recent_morlist = deque(maxlen=100) self.child_conn = child_conn self.life_done = args.life_done self.single_stage = args.single_stage self.stage_bonus = 0 self.history_size = history_size self.history = np.zeros([history_size, h, w]) self.h = h self.w = w self.reset()
def __init__(self, env_id, is_render, env_idx, child_conn, history_size=4, h=84, w=84): super(MarioEnvironment, self).__init__() self.daemon = True self.env = BinarySpaceToDiscreteSpaceEnv( gym_super_mario_bros.make(env_id), movement) self.is_render = is_render self.env_idx = env_idx self.steps = 0 self.episode = 0 self.rall = 0 self.recent_rlist = deque(maxlen=100) self.child_conn = child_conn self.history_size = history_size self.history = np.zeros([history_size, h, w]) self.h = h self.w = w self.reset()
def main(): #env = gym_super_mario_bros.make('SuperMarioBros-v0') env = gym_super_mario_bros.make('SuperMarioBros-1-1-v1') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) timestart = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M:%S') # env = VideoRecorderWrapper(env, PROJ_DIR + "/../video", str(timestart), 50) env = VideoRecorderWrapper(env, PROJ_DIR + "/../video/final", str(timestart), 1) env = DownsampleEnv(env, (84, 84)) env = PenalizeDeathEnv(env, penalty=-25) env = FrameStackEnv(env, 4) # good #act = deepq.load(PROJ_DIR+"/../models/mario_model_2018-08-12-13:00:58.pkl") # better act = deepq.load(PROJ_DIR + "/../models/mario_model_2018-08-12-19:21:50.pkl") episode = 0 while True: obs, done = env.reset(), False stepnr = 0 episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) if stepnr % 20 == 0: plot_obs(obs) episode_rew += rew stepnr += 1 print("Episode reward", episode_rew, episode) episode = episode+1
def mariocontext(marioEnv): mario_env = 'SuperMarioBros' + marioEnv.noFrameSkip + '-' + str( marioEnv.world) + '-' + str(marioEnv.stage) + '-v' + str( marioEnv.version) env = gym_super_mario_bros.make(mario_env) env = BinarySpaceToDiscreteSpaceEnv(env, marioEnv.action_encoding) yield env env.close()
def run(self): super(MarioEnv, self).run() self.model = A3C( self.s_dim, self.a_dim, gamma=0.95, epsilon_start=1.0, epsilon_end=0.1, epsilon_length=100000, use_cuda=self.use_cuda, ) self.model.l_net.load_state_dict(self.g_net.state_dict()) self.env = gym_super_mario_bros.make(self.env_id) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.reset() print('[ Worker %2d ] ' % (self.idx), end='') print('Playing <', self.env_id, '>') while True: if len(self.transition) != 4: action = self.model.get_action(self.transition, is_random=True) else: action = self.model.get_action(self.transition, is_random=False) next_state, reward, done, info = self.env.step(action) self.steps += 1 self.accum_reward += reward next_state = rgb2dataset(next_state) if self.is_render and self.idx == 0: self.env.render() self.buffer_state.append(self.transition) self.buffer_action.append(action) self.buffer_reward.append(reward) if len(self.buffer_state ) > 0 and self.steps % self.update_iter == 0: next_transition = self.transition[1:] next_transition.append(next_state) self.train(next_transition, done) self.buffer_state.clear() self.buffer_action.clear() self.buffer_reward.clear() # make a transition self.transition.append(next_state) if len(self.transition) > 4: self.transition.pop(0) if done: self.send_result(info['x_pos']) self.reset()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) #parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) #parser.add_argument('--checkpoint-freq', type=int, default=10000) parser.add_argument('--checkpoint-freq', type=int, default=10) parser.add_argument('--checkpoint-path', type=str, default=None) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) #env = make_atari(args.env) env = gym_super_mario_bros.make('SuperMarioBros-v3') #env = gym_super_mario_bros.make('SuperMarioBrosNoFrameskip-v3') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = ProcessFrame84(env) print("logger.get_dir():", logger.get_dir()) print("PROJ_DIR:", PROJ_DIR) env = bench.Monitor(env, logger.get_dir()) #env = deepq.wrap_atari_dqn(env) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, checkpoint_freq=args.checkpoint_freq, # checkpoint_path=args.checkpoint_path, #callback=deepq_callback, print_freq=1) print("Saving model to mario_model.pkl") act.save("../models/mario_model_{}.pkl".format( datetime.datetime.now().isoformat())) env.close()
class MarioBrosEnvironment(AbstractEnvironment): def __init__(self, config): self.config = config if config.env == 'mario': from gym_super_mario_bros.actions import RIGHT_ONLY from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv import gym_super_mario_bros env = gym_super_mario_bros.make('SuperMarioBros-v0') self.env = BinarySpaceToDiscreteSpaceEnv(env, RIGHT_ONLY) elif config.env == 'montezuma': self.env = gym.make('MontezumaRevengeDeterministic-v0') self.state_buffer = [] self.reward_buffer = [] self.counter = 0 def process_image(self, s, subsample=4): if self.config.env == 'mario': s = skimage.color.rgb2gray(s) s = skimage.transform.resize( s, (s.shape[0] / subsample, s.shape[1] / subsample), anti_aliasing=True, mode='constant') s = torch.from_numpy(s) elif self.config.env == 'montezuma': s = s[34:34 + 160, :160] s = skimage.color.rgb2gray(s) s = skimage.transform.resize( s, (s.shape[0] / subsample, s.shape[1] / subsample), anti_aliasing=True, mode='constant') s = torch.from_numpy(s).float() return s def reset(self): self.counter = 0 self.state_buffer = [] for _ in range(self.config.n_input_frames): state = self.process_image(self.env.reset(), self.config.image_subsample) self.state_buffer.append(state) return torch.stack(self.state_buffer) def step(self, action): total_reward = 0 for _ in range(self.config.n_action_repeat): state, reward, done, info = self.env.step(action) total_reward += reward if done: break state = self.process_image(state, self.config.image_subsample) self.state_buffer.append(state) self.state_buffer = self.state_buffer[-self.config.n_input_frames:] return torch.stack(self.state_buffer), total_reward, done, info
def train(env_id, num_timesteps, seed): from baselines.ppo1 import pposgd_simple, cnn_policy import baselines.common.tf_util as U rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() if rank == 0: logger.configure() else: logger.configure(format_strs=[]) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) #env = make_atari(env_id) env = gym_super_mario_bros.make('SuperMarioBros-v1') # env = gym_super_mario_bros.make('SuperMarioBrosNoFrameskip-v3') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = ProcessFrame84(env) env = FrameMemoryWrapper(env) def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env.seed(workerseed) #env = wrap_deepmind(env) env.seed(workerseed) def render_callback(lcl, _glb): # print(lcl['episode_rewards']) total_steps = lcl['env'].total_steps #if total_steps % 1000 == 0: # print("Saving model to mario_model.pkl") # act.save("../models/mario_model_{}.pkl".format(modelname)) env.render() # pass pposgd_simple.learn(env, policy_fn, max_timesteps=int(num_timesteps * 1.1), timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.01, optim_epochs=4, optim_stepsize=1e-3, # 3e-4 optim_batchsize=64, #256 gamma=0.99, lam=0.95, schedule='linear', callback = render_callback ) env.close()
class Environment: actionMap = { 0: 'NOOP', 1: 'Right', 2: 'Right-Jump', 3: 'Right-Sprint', 4: 'Right-Jump-Sprint', 5: 'Jump', 6: 'Left' } def __init__(self, rows=19, columns=16, verbose=True, raw=True, variant=1): self.verbose = verbose self.raw = raw self.variant = variant self.img2state = Img2State(rows=19, columns=16) self.game = BinarySpaceToDiscreteSpaceEnv( gym_super_mario_bros.make('SuperMarioBros-v3'), SIMPLE_MOVEMENT) self.state = self.img2state.transfrom(self.game.reset(), raw=self.raw, variant=self.variant) self.reward = 0 # Actions self.A = list(Environment.actionMap.keys()) def step(self, action: int): if action not in self.A: raise Exception('Wrong Action...') state, self.reward, done, info = self.game.step(action) self.state = self.img2state.transfrom(state, raw=self.raw, variant=self.variant) if done and self.state[8]: self.reward = 100 elif self.state[8]: self.reward = 30 elif self.state[9]: self.reward = 15 if self.verbose: self.game.render() return done def reset(self): self.state = self.img2state.transfrom(self.game.reset(), raw=self.raw, variant=self.variant) self.reward = 0
def make_env(env_id): """ Return an OpenAI Gym environment wrapped with appropriate wrappers. Throws error if env_id is not recognized. Parameters ---------- env_id : str OpenAI Gym ID for environment. Returns ------- env Wrapped OpenAI environment. """ if env_id in [ 'CartPole-v0', 'LunarLander-v2', 'Acrobot-v1', 'MountainCar-v0' ]: env = gym.make(env_id) env = TorchWrapper(env) elif env_id == 'PongNoFrameskip-v4': env = make_atari(env_id) env = wrap_deepmind(env, frame_stack=True) env = TorchWrapper(env) env = AtariPermuteWrapper(env) elif env_id == 'SuperMarioBros-v0': from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv import gym_super_mario_bros from gym_super_mario_bros.actions import SIMPLE_MOVEMENT env = gym_super_mario_bros.make('SuperMarioBros-v0') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = WarpFrame(env) env = ScaledFloatFrame(env) env = TorchWrapper(env) env = AtariPermuteWrapper(env) elif env_id == 'SuperMarioBros-v1': from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv import gym_super_mario_bros from gym_super_mario_bros.actions import SIMPLE_MOVEMENT env = gym_super_mario_bros.make('SuperMarioBros-v1') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = WarpFrame(env) env = ScaledFloatFrame(env) env = TorchWrapper(env) env = AtariPermuteWrapper(env) else: raise ValueError('{} is not a supported environment.'.format(env_id)) return env
class Environment(threading.Thread): stop_signal = False def __init__(self, render=False, eps_start=EPS_START, eps_end=EPS_STOP, eps_steps=EPS_STEPS): threading.Thread.__init__(self) self.render = render # Make the super mario gym environment and apply wrappers self.env = gym.make(ENV) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.env = preprocess.GrayScaleImage(self.env, height=HIGHT, width=WIDTH, grayscale=True) # self.env = wrappers.Monitor(self.env, "./Super_Mario_AI/videos", force = True, write_upon_reset=True) self.agent = Agent(TEMPERATURE) def runEpisode(self): s = self.env.reset() R = 0 while True: time.sleep(THREAD_DELAY) # yield if self.render: self.env.render() a = self.agent.act(s) s_, r, done, info = self.env.step(a) if done: # terminal state s_ = None self.agent.train(s, a, r, s_) s = s_ R += r if done or self.stop_signal: break print("Total R:", R) def run(self): while not self.stop_signal: self.runEpisode() def stop(self): self.stop_signal = True
def __init__(self, rows=19, columns=16, verbose=True, raw=True, variant=1): self.verbose = verbose self.raw = raw self.variant = variant self.img2state = Img2State(rows=19, columns=16) self.game = BinarySpaceToDiscreteSpaceEnv( gym_super_mario_bros.make('SuperMarioBros-v3'), SIMPLE_MOVEMENT) self.state = self.img2state.transfrom(self.game.reset(), raw=self.raw, variant=self.variant) self.reward = 0 # Actions self.A = list(Environment.actionMap.keys())
def run(self, solution, level, render, mode): env = gym_super_mario_bros.make(level) env = BinarySpaceToDiscreteSpaceEnv(env, COMPLEX_MOVEMENT) done = True reason_finish = "no_more_commands" pos = 0 total_r = 0 for step in range(len(solution)): if done: state = env.reset() state, reward, done, info = env.step(solution[pos]) pos+=1 if reward == -15: #faleceu reason_finish = "death" break if mode == "level" and info['flag_get'] == True: reason_finish = "win" break total_r = total_r + reward if render == "true": env.render() env.close() return total_r, pos, info, reason_finish
def __init__(self, config): self.config = config if config.env == 'mario': from gym_super_mario_bros.actions import RIGHT_ONLY from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv import gym_super_mario_bros env = gym_super_mario_bros.make('SuperMarioBros-v0') self.env = BinarySpaceToDiscreteSpaceEnv(env, RIGHT_ONLY) elif config.env == 'montezuma': self.env = gym.make('MontezumaRevengeDeterministic-v0') self.state_buffer = [] self.reward_buffer = [] self.counter = 0
def __init__(self, os='mac', display=False): self.display = display if os == 'mac' or os == 'linux': env = gym_super_mario_bros.make('SuperMarioBros-v0') self.env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) else: raise Exception("bad os") self.act_dim = self.env.action_space.n self.obs_dim = (1, 128, 128) print("env created with act_dim", self.act_dim, "obs_dim", self.obs_dim) self.transform = transforms.Compose([ transforms.ToTensor(), # chain 2 transforms together using list. transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ])
def build_mario(lvl): from gym_super_mario_bros.actions import SIMPLE_MOVEMENT env = base_env(lvl) env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = WarpFrame(env) env = ClipScaleReward(env, scale=.01, lower_bound=-1, upper_bound=1.) env = ScaledFloatFrame(env) env = MaxAndSkipEnv(env) env = StickyActionEnv(env) env = FrameStackWrapper(env) env.recognized = "Mario" return env
def make_test(): """ Create an environment with some standard wrappers. """ # Make the environment env = gym_super_mario_bros.make('SuperMarioBros-v0') env = BinarySpaceToDiscreteSpaceEnv(env, RIGHT_ONLY) print(env.action_space) # Build the actions array # env = ActionsDiscretizer(env) # Scale the rewards # env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 6) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. # env = AllowBacktracking(env) return env
def make_env(env_idx): """ Create an environment with some standard wrappers. """ # Make the environment levelList = [ 'SuperMarioBros-1-1-v2', 'SuperMarioBros-2-1-v0', 'SuperMarioBros-3-1-v0', 'SuperMarioBros-4-1-v0', 'SuperMarioBros-5-1-v0', 'SuperMarioBros-6-1-v0', 'SuperMarioBros-7-1-v0', 'SuperMarioBros-8-1-v0' ] # record_path = "./records/" + dicts[env_idx]['state'] env = gym_super_mario_bros.make(levelList[env_idx]) #SuperMarioBros-v0 #SuperMarioBrosRandomStages # env = BinarySpaceToDiscreteSpaceEnv(env,RIGHT_ONLY) env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) # env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames # env = FrameStack(env, 4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def __init__(self, game_id, obs_size, skip_frame=4, mode='train'): self.game_id = game_id env = gym_super_mario_bros.make(game_id) temp_obs = env.reset() height, width, _ = temp_obs.shape self.env = BinarySpaceToDiscreteSpaceEnv(env, COMPLEX_MOVEMENT) self.obs_last2max = np.zeros((2, obs_size, obs_size, 1), np.uint8) self.obstack = np.zeros((obs_size, obs_size, 4)) self.rewards = [] self.lives = 3 self.skip = skip_frame self.mode = mode if self.mode == 'play': self.monitor = Monitor(width=width, height=height)
def setup_env(env_id: str, monitor_dir: str = None) -> gym.Env: """ Make and environment and set it up with wrappers. Args: env_id: the id for the environment to load output_dir: the output directory to route monitor output to Returns: a loaded and wrapped Open AI Gym environment """ if 'Tetris' in env_id: import gym_tetris env = gym_tetris.make(env_id) env = gym_tetris.wrap(env, clip_rewards=False) elif 'SuperMarioBros' in env_id: import gym_super_mario_bros env = gym_super_mario_bros.make(env_id) env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = nes_py_wrap(env) else: env = build_atari_environment(env_id) if monitor_dir is not None: env = gym.wrappers.Monitor(env, monitor_dir, force=True) return env
def __init__(self, render=False, eps_start=EPS_START, eps_end=EPS_STOP, eps_steps=EPS_STEPS): threading.Thread.__init__(self) self.render = render # Make the super mario gym environment and apply wrappers self.env = gym.make(ENV) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.env = preprocess.GrayScaleImage(self.env, height=HIGHT, width=WIDTH, grayscale=True) # self.env = wrappers.Monitor(self.env, "./Super_Mario_AI/videos", force = True, write_upon_reset=True) self.agent = Agent(TEMPERATURE)
def __init__(self, movements, max_steps): """ Creates a new Simulator. The Simulator lets individuals play the game and assigns their resulting fitness to them. :param movements: a list of movements the individuals are allowed to make :param max_steps: the maximum number of simulation steps an individual is allowed to use """ self.movements = movements self.max_steps = max_steps # TODO maybe another name on "env_expanded"? self.env_expanded = gym_super_mario_bros.SuperMarioBrosEnv( frames_per_step=1, rom_mode='vanilla') self.env = BinarySpaceToDiscreteSpaceEnv(self.env_expanded, self.movements) # self.env.metadata['video.frames_per_second'] = 120 # self.env_expanded.metadata['video.frames_per_second'] = 120 self._log = logging.getLogger('MLProject.Simulator')
def build_environment(mario_env_name: str, action_space: list = actions.COMPLEX_MOVEMENT, stochastic: bool = True) -> gym.Env: env = gym_super_mario_bros.make(mario_env_name) env = ResizeFrameEnvWrapper(env, grayscale=True) env = ReshapeRewardEnvWrapper(env) if stochastic: env = StochasticFrameSkipEnvWrapper(env, n_frames=4) return BinarySpaceToDiscreteSpaceEnv(env, action_space)
def main(path="./models/deepq/mario_reward_1736.7.pkl"): step_mul = 16 steps = 200 FLAGS = flags.FLAGS flags.DEFINE_string("env", "SuperMarioBros-v0", "RL environment to train.") flags.DEFINE_string("algorithm", "deepq", "RL algorithm to use.") FLAGS(sys.argv) # 1. Create gym environment env = gym_super_mario_bros.make('SuperMarioBros-v0') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) act = deepq.load(path) nstack = 4 nh, nw, nc = env.observation_space.shape history = np.zeros((1, nh, nw, nc * nstack), dtype=np.uint8) obs, done = env.reset(), False # history = update_history(history, obs) episode_rew = 0 while not done: env.render() action = act([obs])[0] obs, rew, done, _ = env.step(action) # history = update_history(history, obs) episode_rew += rew print("action : %s reward : %s" % (action, rew)) print("Episode reward", episode_rew)
def run(self): super(MarioEnv, self).run() self.env = gym_super_mario_bros.make(self.env_id) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.reset() print('[ Worker %2d ] ' % (self.idx), end='') print('Playing <', self.env_id, '>') self.request_action(0, False) while True: action = self.child_conn.recv() # print(SIMPLE_MOVEMENT[action]) next_state, reward, done, info = self.env.step(action) force_done = False if reward == -15: force_done = True self.steps += 1 self.accum_reward += reward next_state = rgb2dataset(next_state) if self.is_render and self.idx == 0: self.env.render() # make a transition self.transition[:3, :, :] = self.transition[1:, :, :] self.transition[3, :, :] = next_state if done: self.send_result(self.prev_xpos) self.reset() self.request_action(reward, force_done) else: self.request_action(reward, force_done) self.prev_xpos = info['x_pos']
def __init__(self, game_name, task_name, action_mode=SIMPLE_MOVEMENT, state_size=None): """ Args: game_name : string game_name = name of the game (e.g. SuperMarioBros-5-1-v0) task_name : string task_name = name of the task state_size : list or tuple or None state_size = size of state, [h, w] or [h, w, c] """ self.game_name = game_name self.task_name = task_name self.action_mode = action_mode self.env = gym_super_mario_bros.make(game_name) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, self.action_mode) self.n_action = self.env.action_space.n self.actions = [a for a in range(self.n_action)] self.new_episode()
def create_super_mario_env(): import gym from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv import gym_super_mario_bros from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT env = gym_super_mario_bros.make('SuperMarioBrosRandomStages-v1') env = BinarySpaceToDiscreteSpaceEnv(env, COMPLEX_MOVEMENT) #env = wrappers.MaxAndSkipEnv(env, skip=4) env = wrappers.wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=True, scale=True) return env
def make_atari(env_id, max_episode_steps=None): if "SuperMario" in env_id: import gym_super_mario_bros from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv from gym_super_mario_bros.actions import SIMPLE_MOVEMENT env = gym_super_mario_bros.make(env_id) env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) else: env = gym.make(env_id) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env