def run_online_eval(spec, info_space, ckpt): ''' Calls a subprocess to run lab in eval mode with the constructed ckpt prepath, same as how one would manually run the bash cmd e.g. python run_lab.py data/dqn_cartpole_2018_12_19_224811/dqn_cartpole_t0_spec.json dqn_cartpole eval@dqn_cartpole_t0_s1_ckpt-epi10-totalt1000 ''' prepath_t = util.get_prepath(spec, info_space, unit='trial') prepath_s = util.get_prepath(spec, info_space, unit='session') predir, _, prename, spec_name, _, _ = util.prepath_split(prepath_s) cmd = f'python run_lab.py {prepath_t}_spec.json {spec_name} eval@{prename}_ckpt-{ckpt}' logger.info(f'Running online eval for ckpt-{ckpt}') return util.run_cmd(cmd)
def plot_trial(trial_spec, info_space): '''Plot the trial graph, 1 pane: mean and error envelope of reward graphs from all sessions. Each aeb_df gets its own color''' prepath = util.get_prepath(trial_spec, info_space) predir, _, _, _, _, _ = util.prepath_split(prepath) session_datas = session_datas_from_file(predir, trial_spec, info_space.get('trial')) rand_session_data = session_datas[list(session_datas.keys())[0]] max_tick_unit = ps.get(trial_spec, 'env.0.max_tick_unit') aeb_count = len(rand_session_data) palette = viz.get_palette(aeb_count) fig = None for idx, (a, e, b) in enumerate(rand_session_data): aeb = (a, e, b) aeb_str = f'{a}{e}{b}' color = palette[idx] aeb_rewards_df = gather_aeb_rewards_df(aeb, session_datas, max_tick_unit) aeb_fig = build_aeb_reward_fig(aeb_rewards_df, aeb_str, color, max_tick_unit) if fig is None: fig = aeb_fig else: fig.add_traces(aeb_fig.data) fig.layout.update( title= f'trial graph: {trial_spec["name"]} t{info_space.get("trial")}, {len(session_datas)} sessions', width=500, height=600) viz.plot(fig) return fig
def session_retro_eval(session): '''retro_eval but for session at the end to rerun failed evals''' prepath = util.get_prepath(session.spec, session.info_space, unit='session') predir, _, _, _, _, _ = util.prepath_split(prepath) retro_eval(predir, session.index)
def init_thread_vars(spec, info_space, unit): '''Initialize thread variables from lab units that do not get carried over properly from master''' if info_space.get(unit) is None: info_space.tick(unit) if logger.to_init(spec, info_space): os.environ['PREPATH'] = util.get_prepath(spec, info_space) reload(logger)
def save_algorithm(algorithm, ckpt=None): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') if ckpt is not None: prepath = f'{prepath}_ckpt{ckpt}' logger.info( f'Saving algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_{net_name}_model.pth' save(net, model_path) optim_path = f'{prepath}_{net_name}_optim.pth' save(net.optim, optim_path) if ckpt != 'last': # remove checkpoint files at the end ckpt_path = f'{prepath}_ckptlast*.pth' subprocess.run(f'rm {ckpt_path}', cwd=ROOT_DIR, shell=True, stderr=DEVNULL, stdout=DEVNULL, close_fds=True) logger.info(f'Removed all checkpoint model files {ckpt_path}')
def run_by_mode(spec_file, spec_name, lab_mode): logger.info(f'Running lab in mode: {lab_mode}') spec = spec_util.get(spec_file, spec_name) info_space = InfoSpace() analysis.save_spec(spec, info_space, unit='experiment') # '@' is reserved for 'enjoy@{prepath}' os.environ['lab_mode'] = lab_mode.split('@')[0] os.environ['PREPATH'] = util.get_prepath(spec, info_space) reload(logger) # to set PREPATH properly if lab_mode == 'search': info_space.tick('experiment') Experiment(spec, info_space).run() elif lab_mode.startswith('train'): if '@' in lab_mode: prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) else: info_space.tick('trial') Trial(spec, info_space).run() elif lab_mode.startswith('enjoy'): prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) Session(spec, info_space).run() elif lab_mode.startswith('enjoy'): prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) Session(spec, info_space).run() elif lab_mode == 'dev': spec = util.override_dev_spec(spec) info_space.tick('trial') Trial(spec, info_space).run() else: logger.warn('lab_mode not recognized; must be one of `search, train, enjoy, benchmark, dev`.')
def session_data_dict_for_dist(spec, info_space): '''Method to retrieve session_datas (fitness df, so the same as session_data_dict above) when a trial with distributed sessions is done, to avoid messy multiprocessing data communication''' prepath = util.get_prepath(spec, info_space) predir, _, _, _, _, _ = util.prepath_split(prepath) session_datas = session_data_dict_from_file(predir, info_space.get('trial')) session_datas = [session_datas[k] for k in sorted(session_datas.keys())] return session_datas
def plot_trial(trial_spec, info_space): '''Plot the trial graph, 1 pane: mean and error envelope of reward graphs from all sessions. Each aeb_df gets its own color''' prepath = util.get_prepath(trial_spec, info_space) predir = util.prepath_to_predir(prepath) session_datas = session_datas_from_file(predir, trial_spec, info_space.get('trial')) aeb_count = len(session_datas[0]) palette = get_palette(aeb_count) fig = None for idx, (a, e, b) in enumerate(session_datas[0]): aeb = (a, e, b) aeb_str = f'{a}{e}{b}' color = palette[idx] aeb_rewards_df = gather_aeb_rewards_df(aeb, session_datas) aeb_fig = build_aeb_reward_fig(aeb_rewards_df, aeb_str, color) if fig is None: fig = aeb_fig else: fig.data.extend(aeb_fig.data) fig.layout.update( title=f'trial graph: {trial_spec["name"]} t{info_space.get("trial")}', width=500, height=600) viz.plot(fig) return fig
def tick(spec, unit): ''' Method to tick lab unit (experiment, trial, session) in meta spec to advance their indices Reset lower lab indices to -1 so that they tick to 0 spec_util.tick(spec, 'session') session = Session(spec) ''' meta_spec = spec['meta'] if unit == 'experiment': meta_spec['experiment_ts'] = util.get_ts() meta_spec['experiment'] += 1 meta_spec['trial'] = -1 meta_spec['session'] = -1 elif unit == 'trial': if meta_spec['experiment'] == -1: meta_spec['experiment'] += 1 meta_spec['trial'] += 1 meta_spec['session'] = -1 elif unit == 'session': if meta_spec['experiment'] == -1: meta_spec['experiment'] += 1 if meta_spec['trial'] == -1: meta_spec['trial'] += 1 meta_spec['session'] += 1 else: raise ValueError(f'Unrecognized lab unit to tick: {unit}') # set prepath since it is determined at this point meta_spec['prepath'] = prepath = util.get_prepath(spec, unit) for folder in ('graph', 'info', 'log', 'model'): folder_prepath = util.insert_folder(prepath, folder) folder_predir = os.path.dirname(f'{ROOT_DIR}/{folder_prepath}') os.makedirs(folder_predir, exist_ok=True) assert os.path.exists(folder_predir) meta_spec[f'{folder}_prepath'] = folder_prepath return spec
def run_by_mode(spec_file, spec_name, lab_mode): logger.info(f'Running lab in mode: {lab_mode}') spec = spec_util.get(spec_file, spec_name) info_space = InfoSpace() os.environ['PREPATH'] = util.get_prepath(spec, info_space) reload(logger) # to set PREPATH properly # expose to runtime, '@' is reserved for 'enjoy@{prepath}' os.environ['lab_mode'] = lab_mode.split('@')[0] if lab_mode == 'search': info_space.tick('experiment') Experiment(spec, info_space).run() elif lab_mode == 'train': info_space.tick('trial') Trial(spec, info_space).run() elif lab_mode.startswith('enjoy'): prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) Session(spec, info_space).run() elif lab_mode == 'generate_benchmark': benchmarker.generate_specs(spec, const='agent') elif lab_mode == 'benchmark': # TODO allow changing const to env run_benchmark(spec, const='agent') elif lab_mode == 'dev': spec = util.override_dev_spec(spec) info_space.tick('trial') Trial(spec, info_space).run() else: logger.warn( 'lab_mode not recognized; must be one of `search, train, enjoy, benchmark, dev`.' )
def save_experiment_data(spec, info_space, experiment_df, experiment_fig): '''Save the experiment data: best_spec, experiment_df, experiment_graph.''' prepath = util.get_prepath(spec, info_space, unit='experiment') logger.info(f'Saving experiment data to {prepath}') util.write(experiment_df, f'{prepath}_experiment_df.csv') viz.save_image(experiment_fig, f'{prepath}_experiment_graph.png') # TODO tmp hack plot_best_sessions(experiment_df, prepath)
def save_experiment_data(spec, info_space, experiment_df, experiment_fig): '''Save the experiment data: best_spec, experiment_df, experiment_graph.''' prepath = util.get_prepath(spec, info_space, unit='experiment') logger.info(f'Saving experiment data to {prepath}') util.write(experiment_df, f'{prepath}_experiment_df.csv') viz.save_image(experiment_fig, f'{prepath}_experiment_graph.png') # zip for ease of upload predir, _, _, _, _, _ = util.prepath_split(prepath) shutil.make_archive(predir, 'zip', predir) logger.info(f'All experiment data zipped to {predir}.zip')
def load_algorithm(algorithm): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_model_{net_name}.pth' load(net, model_path)
def save_trial_data(spec, info_space, trial_fitness_df, trial_fig): '''Save the trial data: spec, trial_fitness_df.''' prepath = util.get_prepath(spec, info_space, unit='trial') logger.info(f'Saving trial data to {prepath}') util.write(trial_fitness_df, f'{prepath}_trial_fitness_df.csv') viz.save_image(trial_fig, f'{prepath}_trial_graph.png') if util.get_lab_mode() == ('train', 'eval'): predir, _, _, _, _, _ = util.prepath_split(prepath) shutil.make_archive(predir, 'zip', predir) logger.info(f'All trial data zipped to {predir}.zip')
def save_algorithm(algorithm, epi=None): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') if epi is not None: prepath = f'{prepath}_epi_{epi}' logger.info(f'Saving algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_model_{net_name}.pth' save(net, model_path)
def run_trial(experiment, config): trial_index = config.pop('trial_index') spec = spec_from_config(experiment, config) info_space = deepcopy(experiment.info_space) info_space.set('trial', trial_index) trial_fitness_df = experiment.init_trial_and_run(spec, info_space) fitness_vec = trial_fitness_df.iloc[0].to_dict() fitness = analysis.calc_fitness(trial_fitness_df) trial_data = {**config, **fitness_vec, 'fitness': fitness, 'trial_index': trial_index} prepath = util.get_prepath(spec, info_space, unit='trial') util.write(trial_data, f'{prepath}_trial_data.json') return trial_data
def save_algorithm(algorithm, ckpt=None): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') if ckpt is not None: prepath = f'{prepath}_ckpt-{ckpt}' logger.info(f'Saving algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_{net_name}_model.pth' save(net, model_path) optim_path = f'{prepath}_{net_name}_optim.pth' save(net.optim, optim_path)
def save_session_data(spec, info_space, session_data, session_fitness_df, session_fig, body_df_kind='eval'): ''' Save the session data: session_df, session_fitness_df, session_graph. session_data is saved as session_df; multi-indexed with (a,e,b), 3 extra levels to read, use: session_df = util.read(filepath, header=[0, 1, 2, 3], index_col=0) session_data = util.session_df_to_data(session_df) ''' prepath = util.get_prepath(spec, info_space, unit='session') logger.info(f'Saving session data to {prepath}') prefix = 'train' if body_df_kind == 'train' else '' if 'retro_analyze' not in os.environ['PREPATH']: save_session_df(session_data, f'{prepath}_{prefix}session_df.csv', info_space) util.write(session_fitness_df, f'{prepath}_{prefix}session_fitness_df.csv') viz.save_image(session_fig, f'{prepath}_{prefix}session_graph.png')
def save_session_data(spec, info_space, session_data, session_fitness_df, session_fig): ''' Save the session data: session_df, session_fitness_df, session_graph. session_data is saved as session_df; multi-indexed with (a,e,b), 3 extra levels to read, use: session_df = util.read(filepath, header=[0, 1, 2, 3]) session_data = util.session_df_to_data(session_df) ''' prepath = util.get_prepath(spec, info_space, unit='session') logger.info(f'Saving session data to {prepath}') if 'retro_analyze' not in os.environ['PREPATH']: session_df = pd.concat(session_data, axis=1) util.write(session_df, f'{prepath}_session_df.csv') util.write(session_fitness_df, f'{prepath}_session_fitness_df.csv') viz.save_image(session_fig, f'{prepath}_session_graph.png')
def load_algorithm(algorithm): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names if util.in_eval_lab_modes(): # load specific model in eval mode prepath = agent.info_space.eval_model_prepath else: prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_{net_name}_model.pth' load(net, model_path) optim_path = f'{prepath}_{net_name}_optim.pth' load(net.optim, optim_path)
def run_trial(experiment, config): trial_index = config.pop('trial_index') spec = spec_from_config(experiment, config) info_space = deepcopy(experiment.info_space) info_space.set('trial', trial_index) trial_fitness_df = experiment.init_trial_and_run(spec, info_space) fitness_vec = trial_fitness_df.iloc[0].to_dict() fitness = analysis.calc_fitness(trial_fitness_df) trial_data = { **config, **fitness_vec, 'fitness': fitness, 'trial_index': trial_index } prepath = util.get_prepath(spec, info_space, unit='trial') util.write(trial_data, f'{prepath}_trial_data.json') return trial_data
def calc_trial_df(trial_spec, info_space): '''Calculate trial_df as mean of all session_df''' from slm_lab.experiment import retro_analysis prepath = util.get_prepath(trial_spec, info_space) predir, _, _, _, _, _ = util.prepath_split(prepath) session_datas = retro_analysis.session_datas_from_file(predir, trial_spec, info_space.get('trial'), ps.get(info_space, 'ckpt')) aeb_transpose = {aeb: [] for aeb in session_datas[list(session_datas.keys())[0]]} max_tick_unit = ps.get(trial_spec, 'meta.max_tick_unit') for s, session_data in session_datas.items(): for aeb, aeb_df in session_data.items(): aeb_transpose[aeb].append(aeb_df.sort_values(by=[max_tick_unit]).set_index(max_tick_unit, drop=False)) trial_data = {} for aeb, df_list in aeb_transpose.items(): trial_data[aeb] = pd.concat(df_list).groupby(level=0).mean().reset_index(drop=True) trial_df = pd.concat(trial_data, axis=1) return trial_df
def analyze_session(session, eager_analyze_trial=False, tmp_space_session_sub=False): ''' Gather session data, plot, and return fitness df for high level agg. @returns {DataFrame} session_fitness_df Single-row df of session fitness vector (avg over aeb), indexed with session index. ''' logger.info('Analyzing session') session_data = get_session_data(session, body_df_kind='train') session_fitness_df = _analyze_session(session, session_data, body_df_kind='train') session_data = get_session_data(session, body_df_kind='eval', tmp_space_session_sub=tmp_space_session_sub) session_fitness_df = _analyze_session(session, session_data, body_df_kind='eval') if eager_analyze_trial: # for live trial graph, analyze trial after analyzing session, this only takes a second from slm_lab.experiment import retro_analysis prepath = util.get_prepath(session.spec, session.info_space, unit='session') # use new ones to prevent side effects spec, info_space = util.prepath_to_spec_info_space(prepath) predir, _, _, _, _, _ = util.prepath_split(prepath) retro_analysis.analyze_eval_trial(spec, info_space, predir) return session_fitness_df
def save_session_data(spec, info_space, session_mdp_data, session_data, session_fitness_df, session_fig): ''' Save the session data: session_mdp_df, session_df, session_fitness_df, session_graph. session_data is saved as session_df; multi-indexed with (a,e,b), 3 extra levels to read, use: session_df = util.read(filepath, header=[0, 1, 2, 3]) session_data = util.session_df_to_data(session_df) Likewise for session_mdp_df ''' prepath = util.get_prepath(spec, info_space, unit='session') logger.info(f'Saving session data to {prepath}') if session_mdp_data is not None: # not from retro analysis session_mdp_df = pd.concat(session_mdp_data, axis=1) session_df = pd.concat(session_data, axis=1) # TODO reactivate saving when get to the transition matrix research # util.write(session_mdp_df, f'{prepath}_session_mdp_df.csv') util.write(session_df, f'{prepath}_session_df.csv') util.write(session_fitness_df, f'{prepath}_session_fitness_df.csv') viz.save_image(session_fig, f'{prepath}_session_graph.png')
def __init__(self, spec): super().__init__(spec) try_register_env(spec) # register if it's a custom gym env seed = ps.get(spec, 'meta.random_seed') # if self.is_venv: # make vector environment # self.u_env = make_gym_venv(self.name, self.num_envs, seed, self.frame_op, self.frame_op_len, self.reward_scale, self.normalize_state) # else: # self.u_env = make_gym_env(self.name, seed, self.frame_op, self.frame_op_len, self.reward_scale, self.normalize_state) # self._set_attr_from_u_env(self.u_env) self.max_t = self.max_t or self.u_env.spec.max_episode_steps assert self.max_t is not None logger.info(util.self_desc(self)) if util.to_record_video(): video_prepath = util.insert_folder( util.get_prepath(spec, unit='session'), 'video') self.u_env = gym.wrappers.Monitor(self.u_env, video_prepath, force=True) logger.info(f'Recorded videos will be saved in {video_prepath}')
def save_session_data(spec, info_space, session_mdp_data, session_data, session_fitness_df, session_fig): ''' Save the session data: session_mdp_df, session_df, session_fitness_df, session_graph. session_data is saved as session_df; multi-indexed with (a,e,b), 3 extra levels to read, use: session_df = util.read(filepath, header=[0, 1, 2, 3]) session_data = util.session_df_to_data(session_df) Likewise for session_mdp_df ''' prepath = util.get_prepath(spec, info_space, unit='session') logger.info(f'Saving session data to {prepath}') if session_mdp_data is not None: # not from retro analysis session_mdp_df = pd.concat(session_mdp_data, axis=1) session_df = pd.concat(session_data, axis=1) # TODO reactivate saving when get to the transition matrix research # util.write(session_mdp_df, f'{prepath}_session_mdp_df.csv') util.write(session_df, f'{prepath}_session_df.csv') util.write(session_fitness_df, f'{prepath}_session_fitness_df.csv') # TODO replaced by plot_best_sessions until Feb 2018 https://github.com/plotly/plotly.py/issues/880 if os.environ.get('lab_mode') == 'train': viz.save_image(session_fig, f'{prepath}_session_graph.png')
def plot_trial(trial_spec, info_space): '''Plot the trial graph, 1 pane: mean and error envelope of reward graphs from all sessions. Each aeb_df gets its own color''' prepath = util.get_prepath(trial_spec, info_space) predir = util.prepath_to_predir(prepath) session_datas = session_datas_from_file(predir, trial_spec, info_space.get('trial')) aeb_count = len(session_datas[0]) palette = get_palette(aeb_count) fig = None for idx, (a, e, b) in enumerate(session_datas[0]): aeb = (a, e, b) aeb_str = f'{a}{e}{b}' color = palette[idx] aeb_rewards_df = gather_aeb_rewards_df(aeb, session_datas) aeb_fig = build_aeb_reward_fig(aeb_rewards_df, aeb_str, color) if fig is None: fig = aeb_fig else: fig.data.extend(aeb_fig.data) fig.layout.update(title=f'trial graph: {trial_spec["name"]} t{info_space.get("trial")}', width=500, height=600) viz.plot(fig) return fig
def save_trial_data(spec, info_space, trial_fitness_df, trial_fig): '''Save the trial data: spec, trial_fitness_df.''' prepath = util.get_prepath(spec, info_space, unit='trial') logger.info(f'Saving trial data to {prepath}') util.write(trial_fitness_df, f'{prepath}_trial_fitness_df.csv') viz.save_image(trial_fig, f'{prepath}_trial_graph.png')
def save(spec, unit='experiment'): '''Save spec to proper path. Called at Experiment or Trial init.''' prepath = util.get_prepath(spec, unit) util.write(spec, f'{prepath}_spec.json')
def save_experiment_data(spec, info_space, experiment_df, experiment_fig): '''Save the experiment data: best_spec, experiment_df, experiment_graph.''' prepath = util.get_prepath(spec, info_space, unit='experiment') logger.info(f'Saving experiment data to {prepath}') util.write(experiment_df, f'{prepath}_experiment_df.csv') viz.save_image(experiment_fig, f'{prepath}_experiment_graph.png')
def save_spec(spec, info_space, unit='experiment'): '''Save spec to proper path. Called at Experiment or Trial init.''' prepath = util.get_prepath(spec, info_space, unit) util.write(spec, f'{prepath}_spec.json')
import os # NOTE increase if needed. Pytorch thread overusage https://github.com/pytorch/pytorch/issues/975 os.environ['OMP_NUM_THREADS'] = '1' from slm_lab.agent import Agent from slm_lab.env import OpenAIEnv from slm_lab.experiment import analysis from slm_lab.experiment.monitor import Body, InfoSpace, enable_aeb_space from slm_lab.lib import logger, util from slm_lab.spec import spec_util from irl_benchmark.rl.slm_session import Session spec = spec_util.get(spec_file='ppo.json', spec_name='ppo_mlp_shared_pendulum') info_space = InfoSpace() os.environ['PREPATH'] = util.get_prepath(spec, info_space) os.environ['lab_mode'] = 'training' spec['env'][0]['max_episode'] = 10 session = Session(spec, info_space) data, agent = session.run() print(data)