def make_sonic_env(game=None, state=None, test=False, record=False): retrowrapper.set_retro_make(sonic_wrapper) env = retrowrapper.RetroWrapper(game=game, state=state, test=test, record=record) return env
def __init__(self, game, start_state, states_size, frame_skip, path, thread, test): self.game = game self.test = test # Thread 0 is used for logging/debugging, saving the model and rendering one env when necessary if thread is 0: self.env = retrowrapper.RetroWrapper(game=self.game, state=start_state, record=path) else: self.env = retrowrapper.RetroWrapper(game=self.game, state=start_state) self.frame_skip = frame_skip self.states_size = states_size self.states_counter = 0 # Accelerate training by making the action space fit the actual actions for the game if game == 'SpaceInvaders-Atari2600': self.action_space = [1, 2, 3] elif game == 'Breakout-Atari2600': self.action_space = [1, 2, 3] elif game == 'Pong-Atari2600': self.action_space = [1, 4, 5] else: # Otherwise, use the actions specified by Open AI. self.action_space = range(self.env.action_space.n) self.action_size = len(self.action_space) self.observation_shape = np.array(self.env.observation_space.shape) # Account for the fact that we halve the size of each frame on the pre-processing step self.width = np.uint32(self.env.observation_space.shape[0] / 2) self.height = np.uint32(self.env.observation_space.shape[1] / 2) # Initialize vectors of the right shapes and types to speed up training self.action = np.zeros(self.env.action_space.n, dtype=np.uint8) self.states = np.zeros((1, self.width, self.height, self.states_size), dtype=np.float32) self.frame_temp1 = np.zeros((self.observation_shape[0], self.observation_shape[1], self.observation_shape[2]), dtype=np.int8) self.frame_temp2 = np.zeros((self.observation_shape[0], self.observation_shape[1], self.observation_shape[2]), dtype=np.int8)
def __init__(self, render=False, multi=True, skip=True): ''' Wrapper class for street fighter II environment. This has implementations for simple movements for easy training as well as wait times for move animations. ''' if multi: self.env = retrowrapper.RetroWrapper( 'StreetFighterIISpecialChampionEdition-Genesis') else: self.env = retro.make( game='StreetFighterIISpecialChampionEdition-Genesis') self.render = render self.ob, self.reward, self.done, self.info = None, None, None, None self.actions_space = len(ACTIONS) + len(COMBOS) self.actions_names = list(ACTIONS.keys()) self.actions_names.extend(['HURRICANE_KICK', 'SHORYUKEN', 'HADOKEN']) self.dead = False self.skip = skip
def __init__(self, config): self.config = config # 生成指定数量的游戏环境 self.envs = [] for _ in range(config['env_num']): env = retrowrapper.RetroWrapper( game=self.config['env_name'], use_restricted_actions=retro.Actions.DISCRETE, skill_frame=self.config['skill_frame'], resize_shape=self.config['obs_shape'], render_preprocess=False, is_train=True) self.envs.append(env) # 把全部的游戏环境打包,通过这个工具可以方便对跟个游戏操作 self.vector_env = VectorEnv(self.envs) # 获取全部环境的初始界面 self.obs_batch = self.vector_env.reset() self.obs_dim = env.observation_space.shape # 获取每个Actor的模型 model = Model(self.config['action_dim']) algorithm = parl.algorithms.A3C( model, vf_loss_coeff=self.config['vf_loss_coeff']) self.agent = Agent(algorithm, self.config, self.obs_dim)
import retrowrapper if __name__ == "__main__": game = "Airstriker-Genesis" env1 = retrowrapper.RetroWrapper(game) env2 = retrowrapper.RetroWrapper(game) _obs = env1.reset() _obs = env2.reset() done = False while not done: action = env1.action_space.sample() _obs, _rew, done, _info = env1.step(action) action = env2.action_space.sample() _obs, _rew, done, _info = env2.step(action)
import retrowrapper import retro if __name__ == "__main__": # 使用retrowrapper可以同时创建多个retro游戏 env1 = retrowrapper.RetroWrapper( game='SuperMarioBros-Nes', use_restricted_actions=retro.Actions.DISCRETE, skill_frame=1, resize_shape=(1, 112, 112), render_preprocess=True) env2 = retrowrapper.RetroWrapper( game='SuperMarioBros-Nes', use_restricted_actions=retro.Actions.DISCRETE, skill_frame=1, resize_shape=(1, 112, 112), render_preprocess=True) _obs = env1.reset() _obs2 = env2.reset() while True: action = env1.action_space.sample() _obs, _rew, done, _info = env1.step(action) env1.render() if done: env1.reset() action = env2.action_space.sample() _obs2, _rew2, done2, _info2 = env2.step(action) env2.render() if done2:
def create_train_env(game, skill_frame=4, resize_shape=(1, 84, 84), render_preprocess=False): env = retrowrapper.RetroWrapper(game=game, skill_frame=skill_frame, resize_shape=resize_shape, render_preprocess=render_preprocess) return env
import retrowrapper if __name__ == "__main__": game = "SonicTheHedgehog-Genesis" state = "GreenHillZone.Act1" env1 = retrowrapper.RetroWrapper(game, state=state) env2 = retrowrapper.RetroWrapper(game, state=state) _obs = env1.reset() _obs = env2.reset() done = False while not done: action = env1.action_space.sample() _obs, _rew, done, _info = env1.step(action) env1.render() action = env2.action_space.sample() _obs, _rew, done, _info = env2.step(action) _obs, _rew, done, _info = env2.step(action) env2.render()
def work(self): best_fitness = 0 randstate = random.choice(states) fitness_current = 0 current_max_fitness = 0 net = neat.nn.recurrent.RecurrentNetwork.create( self.genome, self.config) for st in states: frame = 0 counter = 0 xpos = 0 xpos_max = 0 health = 2048 health_current = 2048 done = False self.env = retrowrapper.RetroWrapper('FZero-Snes', state=st) ob = self.env.reset() ac = self.env.action_space.sample() inx, iny, inc = self.env.observation_space.shape inx = int(inx / 8) iny = int(iny / 8) ob, rew, done, info = self.env.step(actions[0]) while not done: frame += 1 self.env.render() ob = cv2.resize(ob, (inx, iny)) ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY) ob = np.reshape(ob, (inx, iny)) imgarray = np.ndarray.flatten(ob) #imgarray = [info['x'], info['y'], info['pos'], info['speed'], info['health']] nnOutput = net.activate(imgarray) ob, rew, done, info = self.env.step( actions[np.argmax(nnOutput)]) health = info['health'] xpos = info['pos'] if health < health_current: health_current = health if xpos > xpos_max: fitness_current += 10 xpos_max = xpos if info['speed'] > 0 and health == health_current and info[ 'reverse'] == 0: fitness_current += 1 else: fitness_current -= 1 if fitness_current > 9030: fitness_current += 100000 done = True #fitness_current += rew if fitness_current > current_max_fitness: current_max_fitness = fitness_current counter = 0 else: counter += 1 if done or counter == 250: done = True #self.env.close() if fitness_current > best_fitness: best_fitness = fitness_current print("Best Fitness So Far!") with open('best_yet.pkl', 'wb') as output: pickle.dump(self.genome, output, 1) print("Current Fitness: ", fitness_current, "Best Ever: ", best_fitness) if fitness_current < 0: fitness_current = -1 return fitness_current
https://colab.research.google.com/drive/1z4IUxUtPWAf8xz6PCY672reMz2lFjnPW """ !apt-get install pkg-config lua5.1 build-essential ffmpeg git !pip install tqdm retrowrapper gym-retro !pip install tqdm retrowrapper gym-retro !pip install -U git+git://github.com/frenchie4111/dumbrain.git !python -m dumbrain.rl.retro_contest.install_games http://aiml.mikelyons.org/datasets/sonic/Sonic%20Roms.zip import retro import retrowrapper env = retrowrapper.RetroWrapper( game='SonicTheHedgehog2-Genesis', state='MetropolisZone.Act1' ) import retrowrapper import matplotlib.pyplot as plt observation = env.reset() for i in range(3600): random_action = env.action_space.sample() observation, reward, done, info = env.step( random_action) if done: observation = env.reset() plt.imshow(observation)
print("Finished building the model") print(model.summary()) return model def get_empty_action_space(num_actions): """ Returns an action space with nothing selected. """ return [0] * num_actions # len(env.BUTTONS) if __name__ == "__main__": epsilon = .1 # exploration env = retrowrapper.RetroWrapper(game='AlteredBeast-Genesis', state='Level1') # Initialize experience replay object exp_replay = ExperienceReplay(max_memory=MAX_MEMORY) model = build_model() # Uncomment the line below to continue training model.load_weights("model.h5") # Train tick = 0 # Frame count loss = 0 # Cumulative loss q_max = 0 print('starting') state = 'exploring' for episode in range(MAX_EPISODES):
def store_effect(self, idx, action, reward, done): """Store effects of action taken after obeserving frame stored at index idx. The reason `store_frame` and `store_effect` is broken up into two functions is so that once can call `encode_recent_observation` in between. Paramters --------- idx: int Index in buffer of recently observed frame (returned by `store_frame`). action: int Action that was performed upon observing this frame. reward: float Reward that was received when the actions was performed. done: bool True if episode was finished after performing that action. """ self.action[idx] = action self.reward[idx] = reward self.done[idx] = done game_states = [(game, state) for game in sonic_envs for state in sonic_envs[game]] game_envs = [retrowrapper.RetroWrapper(game=game, state=state) for game, state in game_states] def sample_env(): # env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1') # return env return random.choice(game_envs)