def load_movie(movie_file): movie = retro.Movie(movie_file) duration = -1 while movie.step(): duration += 1 movie = retro.Movie(movie_file) movie.step() emulator = retro.make(game=movie.get_game(), state=retro.State.NONE, use_restricted_actions=retro.Actions.ALL, players=movie.players) data = movie.get_state() emulator.initial_state = data emulator.reset() return emulator, movie, duration
def render(file): print(file) movie = retro.Movie(file) movie.step() env = retro.make(game=movie.get_game(), state=retro.State.NONE, use_restricted_actions=retro.Actions.ALL) #env = retro.make(game='SonicTheHedgehog-Genesis', state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() num_buttons = len(env.buttons) frame = 0 framerate = 4 while movie.step(): time.sleep(0.001) if frame == framerate: env.render() frame = 0 else: frame += 1 keys = [] for i in range(num_buttons): keys.append(movie.get_key(i, 0)) _obs, _rew, _done, _info = env.step(keys) env.close()
def main(argv=sys.argv[1:]): parser = argparse.ArgumentParser() parser.add_argument('movies', type=str, nargs='+') group = parser.add_mutually_exclusive_group() group.add_argument('--jobs', '-j', type=int, default=1) group.add_argument('--csv-out', '-c', type=str) parser.add_argument('--ending', '-e', type=int) parser.add_argument('--viewer', '-v', type=str) parser.add_argument('--no-video', '-V', action='store_true') parser.add_argument('--info-dict', '-i', action='store_true') parser.add_argument('--npy-actions', '-a', action='store_true') parser.add_argument('--lossless', '-L', type=str, choices=['mp4', 'mp4rgb', 'png', 'ffv1']) args = parser.parse_args(argv) monitor_csv = None monitor_file = None if args.csv_out: m0 = retro.Movie(args.movies[0]) game = m0.get_game() reward_fields = ['r'] if m0.players == 1 else ['r%d' % i for i in range(m0.players)] monitor_file = open(args.csv_out, 'w') monitor_file.write('#{"t_start": 0.0, "gym_version": "gym_retro", "env_id": "%s"}\n' % game) monitor_csv = csv.DictWriter(monitor_file, fieldnames=reward_fields + ['l', 't']) monitor_csv.writeheader() with Executor(args.jobs or None) as pool: list(pool.map(_play, *zip(*[(movie, args, monitor_csv) for movie in args.movies]))) if monitor_file: monitor_file.close()
def render(file): movie = retro.Movie(file) movie.step() env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) _obs, _rew, _done, _info = env.step(keys) y = _info['y'] x = _info['x'] highlight = [[[min(x[0][0]+hf,255), min(x[0][1]+hf,255), min(x[0][2]+hf,255)], [min(x[1][0]+hf,255), min(x[1][1]+hf,255), min(x[1][2]+hf,255)], [min(x[2][0]+hf,255), min(x[2][1]+hf,255), min(x[2][2]+hf,255)], [min(x[3][0]+hf,255), min(x[3][1]+hf,255), min(x[3][2]+hf,255)], [min(x[4][0]+hf,255), min(x[4][1]+hf,255), min(x[4][2]+hf,255)], [min(x[5][0]+hf,255), min(x[5][1]+hf,255), min(x[5][2]+hf,255)], [min(x[6][0]+hf,255), min(x[6][1]+hf,255), min(x[6][2]+hf,255)], [min(x[7][0]+hf,255), min(x[7][1]+hf,255), min(x[7][2]+hf,255)], ] for x in level_map[y:(y+8), x:(x+8)]] level_map[y:(y+8), x:(x+8)] = highlight env.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('movies', type=str, nargs='+') parser.add_argument('--csv-out', '-c', type=str) parser.add_argument('--ending', '-e', type=int) parser.add_argument('--viewer', '-v', type=str) args = parser.parse_args() monitor_csv = None monitor_file = None if args.csv_out: game = retro.Movie(args.movies[0]).get_game() monitor_file = open(args.csv_out, 'w') monitor_file.write( '#{"t_start": 0.0, "gym_version": "gym_retro", "env_id": "%s"}\n' % game) monitor_csv = csv.DictWriter(monitor_file, fieldnames=['r', 'l', 't']) monitor_csv.writeheader() for movie in args.movies: emulator, m, duration = load_movie(movie) if args.ending is not None: delay = duration - args.ending else: delay = 0 playback_movie(emulator, m, monitor_csv, movie.replace('.bk2', '.mp4'), args.viewer, delay) del emulator if monitor_file: monitor_file.close()
def extract_human_data(movie_path): """ Given a path to a replay file, load it and extract the series of state-action pairs. """ movie = retro.Movie(movie_path) movie.step() env = retro.make(game=movie.get_game(), state=retro.State.NONE, use_restricted_actions=retro.Actions.ALL) env.initial_state = movie.get_state() state = env.reset() state = np.swapaxes(state, 0, 2) state = np.swapaxes(state, 1, 2) states, actions, next_states, rewards, dones = [], [], [], [], [] while movie.step(): keys = [] for i in range(len(env.buttons)): keys.append(movie.get_key(i, 0)) next_state, reward, done, info = env.step(keys) # Switch the channels to be first for pytorch next_state = np.swapaxes(next_state, 0, 2) next_state = np.swapaxes(next_state, 1, 2) actions.append(np.int8(keys)) states.append(state) next_states.append(next_state) rewards.append(reward) dones.append(done) state = next_state return states, actions, next_states, rewards, dones
def _setup(self, exp_win): self.movie = retro.Movie(self.movie_filename) self.emulator = retro.make( self.game_name, record=False, state=retro.State.NONE, scenario=self.scenario, # use_restricted_actions=retro.Actions.ALL, players=self.movie.players, ) self.emulator.initial_state = self.movie.get_state() super()._setup(exp_win)
def add_movies(agent): path = os.path.join(os.getcwd(), "movies") for _, _, files in os.walk(path): for file in files: if os.path.splitext(file)[1] != ".bk2": continue print(file) movie = retro.Movie(os.path.join(path, file)) movie.step() environment = retro.make(game=movie.get_game(), state=None, use_restricted_actions=retro.Actions.ALL, players=movie.players) environment.initial_state = movie.get_state() environment.reset() steps = 0 stack_size = 1 prev = None actions = [] rewards = [] stacked_frames = [] while movie.step(): steps += 1 action = [] for player in range(movie.players): for index in range(environment.num_buttons): action.append(movie.get_key(index, player)) current_frame, reward, done, info = environment.step(action) reward += 2 * compute_added_reward(prev, info, current_frame) if done: break current_frame = downscale(current_frame, info['x'], info['y']) stacked_frames.append(current_frame) info["action"] = action info["reward"] = reward actions.append(action) rewards.append(reward) for i in range(1, stack_size): if steps - i >= 0: stacked_frames[steps - i] = (np.hstack( (stacked_frames[steps - i], current_frame))) if steps > stack_size: offset = steps - (stack_size + 1) average = np.array(rewards[-offset:]).mean() agent.memorize(stacked_frames[offset], actions[offset], average, stacked_frames[offset + 1], done) if steps % 10 == 0: agent.train() prev = info environment.close() print()
def setup(self, exp_win, output_path, output_fname_base): super().setup(exp_win, output_path, output_fname_base) self.movie = retro.Movie(self.movie_filename) self.emulator = retro.make( self.game_name, record=False, state=retro.State.NONE, scenario=self.scenario, #use_restricted_actions=retro.Actions.ALL, players=self.movie.players) self.emulator.initial_state = self.movie.get_state() self.emulator.reset() self.game_vis_stim = visual.ImageStim(exp_win,size=exp_win.size,units='pixels',autoLog=False) self.game_sound = SoundDeviceBlockStream(stereo=True, blockSize=735)
def play_back(bk2_file): movie = retro.Movie(bk2_file) movie.step() env = retro.make(game=movie.get_game(), use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) env.step(keys) env.render() time.sleep(1/60) # play_back(NEAT_DIR + '/SonicTheHedgehog-Genesis-GreenHillZone-000000.bk2')
def render(file): movie = retro.Movie(file) movie.step() env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() frame = 0 framerate = 2 while movie.step(): if frame == framerate * 250: plt.imshow(env.render("rgb_array")) plt.pause(0.000000000000000000000000000000000001) frame = 0 else: frame += 1 keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) _obs, _rew, _done, _info = env.step(keys) env.close()
def replay(recording): movie = retro.Movie(recording) movie.step() env = retro.make( game=movie.get_game(), state=None, # bk2s can contain any button presses, so allow everything use_restricted_actions=retro.Actions.ALL, players=movie.players, ) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for p in range(movie.players): for i in range(env.num_buttons): keys.append(movie.get_key(i, p)) env.step(keys) env.render()
def add_movies(agent, positions): path = os.path.join(os.getcwd(), "movies") for _, _, files in os.walk(path): for file in files: if os.path.splitext(file)[1] != ".bk2": continue print(file) movie = retro.Movie(os.path.join(path, file)) movie.step() environment = retro.make(game=movie.get_game(), state=None, use_restricted_actions=retro.Actions.ALL, players=movie.players) environment.initial_state = movie.get_state() current_frame = downscale(environment.reset(), 0, 0) steps = 0 prev = None while movie.step(): steps += 1 action = [] for player in range(movie.players): for index in range(environment.num_buttons): action.append(movie.get_key(index, player)) next_frame, reward, done, info = environment.step(action) reward += 2 * compute_added_reward( positions, prev, info, movie=True) if done: break next_frame = downscale(next_frame, info['x'], info['y']) info["reward"] = reward info["action"] = action agent.memorize(current_frame, action, reward, next_frame, done) if steps % 10 == 0: agent.train() prev = info current_frame = next_frame environment.close() print()
def main(): movie = retro.Movie("movies\\level1_1.bk2") movie.step() environment = retro.make(game=movie.get_game(), state=None, use_restricted_actions=retro.Actions.ALL, players=movie.players) environment.initial_state = movie.get_state() environment.reset() actions = set() last_state = None while movie.step(): keys = [] for player in range(movie.players): for index in range(environment.num_buttons): keys.append(movie.get_key(index, player)) print((str(keys), str(environment.get_action_meaning(keys)))) actions.add((str(keys), str(environment.get_action_meaning(keys)))) next_state, reward, done, information = environment.step(keys) environment.render() last_state = environment.em.get_state() if done: break print(actions) if last_state is not None: environment.initial_state = last_state environment.reset() while True: next_state, reward, done, information = environment.step( environment.action_space.sample()) environment.render() if done: break environment.close()
def render(file): movie = retro.Movie(file) file_out = splitext(basename(file))[0]+'.mp4' movie.step() video = VideoWriter(file_out, fourcc, float(FPS), (rwidth, rheight)) env = retro.make(game=movie.get_game(), state=retro.State.NONE, use_restricted_actions=retro.Actions.ALL) env.initial_state = movie.get_state() env.reset() frame = 0 framerate = 1 while movie.step(): if frame == timedelay: video.write(cv2.cvtColor(resized, cv2.COLOR_RGB2BGR)) frame = 0 keys = [] for i in range(env.num_buttons): keys.append(movie.get_key(i, 0)) _obs, _rew, _done, _info = env.step(keys) resized = cv2.resize(_obs, (rwidth, rheight)) frame += 1 env.render() env.close() video.release()
def play_bk2(self, render=False, write=True): keysdict = {} for f in glob.glob(os.path.join(self.path,'{}-{}*.bk2'.format(self.game,self.state))): movie = retro.Movie(f) movie.step() env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() base = os.path.basename(f) base = os.path.splitext(base)[0] ep = int(base.split('-')[-1]) keysarr = [] while movie.step(): keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) keysarr.append([int(k) for k in keys]) _obs, _rew, _done, _info = env.step(keys) if _done: env.close() else: if render: env.render() keysdict[ep] = keysarr if write: with open(os.path.join(self.path,'{}-{}.json'.format(self.game,self.state)), 'w') as outfile: outfile.write(to_json(keysdict))
def bk2_to_events(bk2_file, buttons_names=SNES_BUTTON_NAMES, frame_rate=60): movie = retro.Movie(bk2_file) keys = [[False] * len(SNES_BUTTON_NAMES)] while movie.step(): keys.append( [movie.get_key(i, 0) for i in range(len(SNES_BUTTON_NAMES))]) keys.append([False] * len(SNES_BUTTON_NAMES)) keys = np.asarray(keys).astype(np.int8) key_diff = np.diff(keys, 1, 0) events = pd.DataFrame() for k, kd in zip(SNES_BUTTON_NAMES, key_diff.T): onsets = np.argwhere(kd > 0)[:, 0] if len(onsets) == 0: continue key_events = pd.DataFrame({ "trial_type": [k] * len(onsets), "onset": onsets / frame_rate, "duration": (np.argwhere(kd < 0)[:, 0] - onsets) / frame_rate, }) events = events.append(key_events) return events.sort_values("onset")
import pickle from baselines.common.atari_wrappers import WarpFrame, FrameStack import numpy as np #from sonic_util_test import AllowBacktracking #, make_env from collections import OrderedDict #1 for viewing videos, 0 for creating waypoints debug = int(sys.argv[1]) level_string = 'BustAMove.1pplay.Level1' #'BustAMove.Challengeplay0' replay_number = '0' #movie_path = 'human/BustAMove-Snes/scenario/BustAMove-Snes-{}.state-0{}.bk2'.format(level_string,replay_number) movie_path = 'videos/BustAMove-Snes-{}-00000{}.bk2'.format( level_string, replay_number) print(movie_path) movie = retro.Movie(movie_path) movie.step() scenario_string = 'scenario' #'test_retro' #'trajectory_max' env = retro.make(game=movie.get_game(), state=level_string, scenario=scenario_string) env.initial_state = movie.get_state() env.reset() button_dict = [ 'B', 'A', 'MODE', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'C', 'Y', 'X', 'Z' ] num_buttons = 12 #len(button_dict)
def record_movie(self, path): self.movie = retro.Movie(path, True, self.players) self.movie.configure(self.gamename, self.em) if self.initial_state: self.movie.set_state(self.initial_state)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--deterministic', action='store_true', help='use deterministic wrapper') parser.add_argument('--suffix', default='', help='run against games matching this suffix') parser.add_argument( '--movie-file', help= 'load a bk2 and use states obtained from replaying actions from the bk2' ) args = parser.parse_args() if args.movie_file is None: games = [ g for g in sorted(retro.data.list_games()) if g.endswith(args.suffix) ] failed_games = [] for game in games: print(game) def make_env(): env = retro.make(game=game) if args.deterministic: env = MoreDeterministicRetroState(env) else: env = MoreDeterministicRetroState(env, reset_on_step=False) return env env = make_env() env.action_space.seed(0) acts = [env.action_space.sample() for _ in range(CHUNK_LENGTH * 2)] env.close() if not check_env(make_env, acts, timeout=128): failed_games.append(game) for game in failed_games: print('failed:', game) elif args.movie_file is not None: movie = retro.Movie(args.movie_file) movie.step() def make_env(): env = retro.make(movie.get_game(), state=retro.State.DEFAULT, use_restricted_actions=retro.Actions.ALL) env.initial_state = movie.get_state() if args.deterministic: env = MoreDeterministicRetroState(env) else: env = RetroState(env) return env env = make_env() acts = [] while movie.step(): act = [] for p in range(movie.players): for i in range(env.num_buttons): act.append(movie.get_key(i, p)) acts.append(act) env.close() check_env(make_env, acts, verbose=True) else: raise Exception('must specify --suffix or --movie-file')
import numpy as np import pandas as pd # Hacen lista de los archivos que generaro jugadas = [] for numeroJugada in range(27): jugadas.append('SonicTheHedgehog-Genesis-GreenHillZone.Act1-000000' + str(numeroJugada) + '.bk2') for i in jugadas: print(i) for n in jugadas: print() movie = retro.Movie("records/" + n) merge = pd.read_csv('dataset.csv', names=["data", "target"], header=None, skiprows=1) # Los cargan movie.step() ## Y leen otra vez el csv ,de manera tal que si se va la luz, se cae la compu, o paso algo fortuito, ustedes siguen desdel punto que quedaron ###vuelven a leer csv #pd.read_csv('dataset.csv') ### env = retro.make( game=movie.get_game(), state=None, # bk2s can contain any button presses, so allow everything use_restricted_actions=retro.Actions.ALL,
import retro import argparse parser = argparse.ArgumentParser(description='Play a vid') parser.add_argument('--vid', type=str, help='bk2 file name') args = parser.parse_args() print(args.vid) movie = retro.Movie(args.vid) movie.step() env = retro.make(game=movie.get_game(), state=None, use_restricted_actions=retro.Actions.ALL) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for p in range(movie.players): for i in range(env.num_buttons): keys.append(movie.get_key(i, p)) _obs, _rew, _done, _info = env.step(keys) env.render()
def parse_demo(env, rep_buffer, movie_path, nsteps=10): print("Parsing demo:", movie_path) movie = retro.Movie(movie_path) movie.step() env.initial_state = movie.get_state() curr_obs = env.reset() button_dict = [ 'B', 'A', 'MODE', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'C', 'Y', 'X', 'Z' ] num_buttons = len(button_dict) action_dict = define_action_dict() parse_ts = 0 episode_start_ts = 0 nstep_gamma = 0.99 nstep_state_deque = deque() nstep_action_deque = deque() nstep_rew_list = [] nstep_nexts_deque = deque() nstep_done_deque = deque() total_rew = 0. while movie.step(): #env.render() #time.sleep(0.01) keys = [] for i in range(num_buttons): keys.append(movie.get_key(i)) game_a = action_dict[game_get_dict_key(keys)] _obs, _rew, _done, _info = env.step(keys) episode_start_ts += 1 parse_ts += 1 #paper limits reward _rew = np.sign(_rew) * np.log(1. + np.abs(_rew)) #total_rew += _rew #print(total_rew,_rew) # print(keys) # print(game_a) nstep_state_deque.append(curr_obs) nstep_action_deque.append(game_a) nstep_rew_list.append(_rew) nstep_nexts_deque.append(_obs) nstep_done_deque.append(_done) if episode_start_ts > 10: add_transition(rep_buffer, nstep_state_deque, nstep_action_deque, nstep_rew_list, nstep_nexts_deque, nstep_done_deque, _obs, False, nsteps, nstep_gamma) # if episode done we reset if _done: #emptying the deques add_transition(rep_buffer, nstep_state_deque, nstep_action_deque, nstep_rew_list, nstep_nexts_deque, nstep_done_deque, _obs, True, nsteps, nstep_gamma) # reset the environment, get the current state curr_obs = env.reset() nstep_state_deque.clear() nstep_action_deque.clear() nstep_rew_list.clear() nstep_nexts_deque.clear() nstep_done_deque.clear() episode_start_ts = 0 else: curr_obs = _obs # resulting state becomes the current state #replay is over emptying the deques add_transition(rep_buffer, nstep_state_deque, nstep_action_deque, nstep_rew_list, nstep_nexts_deque, nstep_done_deque, _obs, True, nsteps, nstep_gamma) print('Parse finished. {} expert samples added.'.format(parse_ts)) return rep_buffer
import retro import time movie = retro.Movie('GalagaDemonsOfDeath-Nes-1Player.Level1-000079.bk2') movie.step() env = retro.make(game=movie.get_game(), state=None, use_restricted_actions=retro.Actions.ALL, players=movie.players) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for p in range(movie.players): for i in range(env.num_buttons): keys.append(movie.get_key(i, p)) _obs, _rew, _done, _info = env.step(keys) env.render() time.sleep(0.01)
import retro movie = retro.Movie('1140best.bk2') movie.step() env = retro.make( game=movie.get_game(), state=None, # bk2s can contain any button presses, so allow everything use_restricted_actions=retro.Actions.ALL, players=movie.players, ) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for p in range(movie.players): for i in range(env.num_buttons): keys.append(movie.get_key(i, p)) env.step(keys)
import retro if __name__ == "__main__": # Path to the replay data and the files contained # therein. data_path = os.path.abspath("./data/human/") data_files = os.listdir(data_path) # Start processing the human replays for data_file in data_files: level = data_file.split("-")[-2] print("Processing level: {}".format(level)) # Load the movie from the replay file movie = retro.Movie(os.path.join(data_path, data_file)) movie.step() # Setup an environment for the agent to play in and # get rewards out from. env = retro.make(game=movie.get_game(), state=retro.State.NONE, use_restricted_actions=retro.Actions.ALL) env.initial_state = movie.get_state() state = env.reset() # Initialize an empty set of places to store the data # and begin stepping through the movie frames. states, actions, next_states, rewards, dones = [], [], [], [], [] while movie.step():
def render(file): movie = retro.Movie(file) movie.step() env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() frame = 0 framerate = 10 while movie.step(): if frame == framerate: # env.render() frame = 0 else: frame += 1 keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) _obs, _rew, _done, _info = env.step(keys) # print(_info); y = _info['y'] x = _info['x'] # level_map[ _info['y'], _info['x']] == np.array([255, 255, 255], dtype=np.uint8) # level_map[y:(y+8), x:(x+8)] = np.full((8,8,3),fill_value=255, dtype=np.uint8) # level_map[y:(y+8), x:(x+8)] =np.array(level_map[y:(y+8), x:(x+8)])**1.1 highlight = [[ [ min(x[0][0] + hf, 255), min(x[0][1] + hf, 255), min(x[0][2] + hf, 255) ], [ min(x[1][0] + hf, 255), min(x[1][1] + hf, 255), min(x[1][2] + hf, 255) ], [ min(x[2][0] + hf, 255), min(x[2][1] + hf, 255), min(x[2][2] + hf, 255) ], [ min(x[3][0] + hf, 255), min(x[3][1] + hf, 255), min(x[3][2] + hf, 255) ], [ min(x[4][0] + hf, 255), min(x[4][1] + hf, 255), min(x[4][2] + hf, 255) ], [ min(x[5][0] + hf, 255), min(x[5][1] + hf, 255), min(x[5][2] + hf, 255) ], [ min(x[6][0] + hf, 255), min(x[6][1] + hf, 255), min(x[6][2] + hf, 255) ], [ min(x[7][0] + hf, 255), min(x[7][1] + hf, 255), min(x[7][2] + hf, 255) ], ] for x in level_map[y:(y + 8), x:(x + 8)]] # highlight = [print(x) for x in level_map[y:(y+8), x:(x+8)]] # highlight = [x + hf for x in level_map[y:(y+8), x:(x+8)]] # print(highlight) level_map[y:(y + 8), x:(x + 8)] = highlight # print(level_map) # exit(); env.close()
def savetonumpy(game, state): movie_path = 'human/' + game + '/contest/' + game + '-' + state + '-0000.bk2' movie = retro.Movie(movie_path) movie.step() env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() observation = env.reset() observation = adjust_obs(observation) # declare array heirarchy of data -> batches -> sequences obs_data = [] action_data = [] obs_batch = [] action_batch = [] obs_sequence = [] action_sequence = [] step_count = 0 episode_count = 0 batch_count = 0 print('stepping movie') while movie.step(): # populate batches with episodes of size 1000 timesteps if (step_count == 300): obs_batch.append(obs_sequence) action_batch.append(action_sequence) # print progress print("Batch {} Episode {} finished after {} timesteps".format(batch_count, episode_count, step_count)) print("Current batch contains {} observations".format(sum(map(len, obs_batch)))) # reset step count step_count = 0 # reset sequence arrays obs_sequence = [] action_sequence = [] # increment episode count episode_count += 1 # save batches of size 10 episodes if (episode_count == 10): print("Saving dataset for batch {}".format(batch_count)) np.save('./data/obs_data_' + game + '_' + state + '_' + str(batch_count), obs_batch) np.save('./data/action_data_' + game + '_' + state + '_' + str(batch_count), action_batch) # reset episode count episode_count = 0 # reset batch arrays obs_batch = [] action_batch = [] # increment batch count batch_count += 1 keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) action_sequence.append(keys) obs_sequence.append(observation) step_count += 1 observation, _rew, _done, _info = env.step(keys) observation = adjust_obs(observation) saved_state = env.em.get_state() env.render() print("Saving dataset for batch {}".format(batch_count)) np.save('./data/obs_data_' + game + '_' + state + '_' + str(batch_count), obs_batch) np.save('./data/action_data_' + game + '_' + state + '_' + str(batch_count), action_batch) env.close()
import retro movie = retro.Movie('SonicTheHedgehog-Genesis-GreenHillZone.Act1-0000.bk2') movie.step() env = retro.make(game=movie.get_game(), state=None, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() env.reset() while movie.step(): keys = [] for i in range(env.NUM_BUTTONS): keys.append(movie.get_key(i)) _obs, _rew, _done, _info = env.step(keys)
from keras import models, losses import keras.backend as K # Some models use the l1_loss function def l1_loss(y_true, y_pred): print(y_true, y_pred) return K.sum(K.abs(y_pred - y_true), axis=-1) losses.l1_loss = l1_loss model = models.load_model(sys.argv[1]) movie = retro.Movie(sys.argv[2]) movie.step() env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL) env.initial_state = movie.get_state() obs = env.reset() def fixEmuColors(_obs): _obs = (_obs / 32).astype(np.uint8) * 32 _obs = _obs.astype(np.float32) / 255 return _obs