def _anonymize_config(from_file, storage_name, root_dir): logger = create_logger(name="ANONYMIZE CONFIG", loglevel=logging.INFO) logger.info("\nANONYMIZING Config") # Select storage_dirs to run over storage_dirs = select_storage_dirs(from_file, storage_name, root_dir) # Sanity-check that storages exist storage_dirs = [storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger)] for storage_to_copy in storage_dirs: logger.info(str(storage_to_copy)) seeds_to_copy = get_all_seeds(storage_to_copy) # find the path to all the configs files for dir in seeds_to_copy: config_path = dir / 'config.json' config = load_dict_from_json(str(config_path)) if 'experiment_name' in config: logger.info(f"ANONYMIZE -- Removing experiment_name from {str(config_path)}") del(config['experiment_name']) else: logger.info(f"PASS -- {str(config_path)} has no experiment_name. ") save_dict_to_json(config, filename=str(config_path))
def _update_config_unique(from_file, storage_name, root_dir): logger = create_logger(name="VERIFY CONFIG", loglevel=logging.INFO) logger.info("\nVERIFYING Config Unique") # Select storage_dirs to run over storage_dirs = select_storage_dirs(from_file, storage_name, root_dir) # Sanity-check that storages exist storage_dirs = [storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger)] for storage_to_copy in storage_dirs: logger.info(str(storage_to_copy)) seeds_to_copy = get_all_seeds(storage_to_copy) # find the path to all the configs files for dir in seeds_to_copy: config_path = dir / 'config.json' config_unique_path = dir / 'config_unique.json' config = load_config_from_json(str(config_path)) config_unique_dict = load_dict_from_json(str(config_unique_path)) try: # check if configs are the same validate_config_unique(config, config_unique_dict) except: # If not we update config_unique logger.info(f"{str(dir)} config_unique is not coherent with config.\n" f"Updating {str(config_unique_path)}") for key in config_unique_dict.keys(): config_unique_dict[key] = config.__dict__[key] # Validate again validate_config_unique(config, config_unique_dict) # Save updated config_unique save_dict_to_json(config_unique_dict, filename=str(config_unique_path))
def _work_on_schedule(storage_dirs, n_processes, n_experiments_per_proc, use_pbar, logger, root_dir, process_i=0): call_i = 0 try: time.sleep(np.random.uniform(low=0., high=1.5)) # For all storage_dirs... for storage_dir in storage_dirs: # Gets unhatched seeds directories for the current storage_dir unhatched_seeds = get_some_seeds(storage_dir, file_check='UNHATCHED') while len(unhatched_seeds) > 0: start_time = time.time() # Checks if that process didn't exceed its number of experiments to run if call_i >= n_experiments_per_proc: logger.info( f"Limit of {n_experiments_per_proc} experiments reached." ) break # Select the next seed directory unhatched_seeds = get_some_seeds(storage_dir, file_check='UNHATCHED') if len(unhatched_seeds) > 0: seed_dir = unhatched_seeds[0] else: logger.info(f"{storage_dir} - No more unhatched seeds") break # Removes its unhatched flag try: os.remove(str(seed_dir / 'UNHATCHED')) except FileNotFoundError: logger.info(f"{seed_dir} - Already hatched") continue # Load the config and try to train the model try: config = load_config_from_json( str(seed_dir / 'config.json')) dir_tree = DirectoryTree.init_from_seed_path(seed_dir, root=root_dir) experiment_logger = create_logger( name=f'PROCESS{process_i}:' f'{dir_tree.storage_dir.name}/' f'{dir_tree.experiment_dir.name}/' f'{dir_tree.seed_dir.name}', loglevel=logging.INFO, logfile=dir_tree.seed_dir / 'logger.out', streamHandle=not (use_pbar)) if use_pbar: pbar = tqdm(position=process_i + (1 + n_processes) * call_i) pbar.desc = f"PROCESS{process_i}:" else: pbar = None logger.info(f"{seed_dir} - Launching...") main(config=config, dir_tree=dir_tree, logger=experiment_logger, pbar=pbar) open(str(seed_dir / 'COMPLETED'), 'w+').close() call_i += 1 end_time = time.time() logger.info( f"{seed_dir} - " f"COMPLETED ({formatted_time_diff(total_time_seconds=end_time - start_time)} elapsed)" ) except Exception as e: with open(str(seed_dir / 'CRASH.txt'), 'w+') as f: f.write( f'Crashed at: {datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")}.' ) f.write(f'Error: {e}\n') f.write(traceback.format_exc()) # If all experiments have been completed (or at least crashed but have been attempted)... all_seeds = get_all_seeds(storage_dir) unhatched_seeds = get_some_seeds(storage_dir, file_check='UNHATCHED') crashed_seeds = get_some_seeds(storage_dir, file_check='CRASH.txt') completed_seeds = get_some_seeds(storage_dir, file_check='COMPLETED') if len(unhatched_seeds) == 0 and len(crashed_seeds) == 0 and len( completed_seeds) == len(all_seeds): # Creates comparative plots if not (storage_dir / 'PLOT_ARRAYS_ONGOING').exists() \ and not (storage_dir / 'PLOT_ARRAYS_COMPLETED').exists(): open(str(storage_dir / 'PLOT_ARRAYS_ONGOING'), 'w+').close() logger.info(f"{storage_dir} - MAKING COMPARATIVE PLOTS") try: create_plot_arrays(from_file=None, storage_name=storage_dir.name, root_dir=root_dir, remove_none=True, logger=logger, plots_to_make=alfred.defaults. DEFAULT_PLOTS_ARRAYS_TO_MAKE) open(str(storage_dir / 'PLOT_ARRAYS_COMPLETED'), 'w+').close() except Exception as e: logger.info( f"{type(e)}: unable to plot comparative graphs" f"\n\n{e}\n{traceback.format_exc()}") os.remove(str(storage_dir / 'PLOT_ARRAYS_ONGOING')) # If all experiments are completed benchmark them if all_seeds == completed_seeds and not ( storage_dir / "summary" / "SUMMARY_ONGOING").exists(): if not (storage_dir / "summary" / "SUMMARY_ONGOING").exists() \ and not (storage_dir / "summary" / "SUMMARY_COMPLETED").exists(): os.makedirs(str(storage_dir / "summary"), exist_ok=True) open(str(storage_dir / "summary" / 'SUMMARY_ONGOING'), 'w+').close() logger.info(f"{storage_dir} - SUMMARIZING SEARCH") try: summarize_search( storage_name=storage_dir.name, x_metric=alfred.defaults. DEFAULT_BENCHMARK_X_METRIC, y_metric=alfred.defaults. DEFAULT_BENCHMARK_Y_METRIC, y_error_bars="bootstrapped_CI", n_eval_runs=None, performance_metric=alfred.defaults. DEFAULT_BENCHMARK_PERFORMANCE_METRIC, performance_aggregation= "mean_on_last_20_percents", re_run_if_exists=False, make_performance_chart=True, make_learning_plots=True, logger=logger, root_dir=root_dir) os.remove( str(storage_dir / "summary" / 'SUMMARY_ONGOING')) open( str(storage_dir / "summary" / 'SUMMARY_COMPLETED'), 'w+').close() except Exception as e: logger.info( f"{type(e)}: unable to run 'summarize_search'" f"\n{e}\n{traceback.format_exc()}") os.remove( str(storage_dir / "summary" / 'SUMMARY_ONGOING')) open( str(storage_dir / "summary" / 'SUMMARY_FAILED'), 'w+').close() if call_i >= n_experiments_per_proc: break logger.info(f"Done. Shutting down.") except Exception as e: logger.info(f"The process CRASHED with the following error:\n{e}") return call_i
def launch_schedule(from_file, storage_name, n_processes, n_experiments_per_proc, use_pbar, check_hash, run_clean_interrupted, root_dir, log_level): set_up_alfred() # Select storage_dirs to run over storage_dirs = select_storage_dirs(from_file, storage_name, root_dir) # Creates logger logger_id = str(random.randint(1, 999999)).zfill(6) master_logger = create_logger(name=f'ID:{logger_id} - MASTER', loglevel=log_level, logfile=None, streamHandle=True) # Sanity-checks that storage_dirs exist if not they are skipped storage_dirs = [ storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, master_logger) ] # Sanity-check that storage_dirs have correct hash is required if check_hash: storage_dirs = [ storage_dir for storage_dir in storage_dirs if sanity_check_hash(storage_dir, master_logger) ] # Continues with sanity-checked storage_dir list for storage_dir in storage_dirs: file_handler = create_new_filehandler( master_logger.name, logfile=storage_dir / 'alfred_launch_schedule_logger.out') master_logger.addHandler(file_handler) master_logger.debug("Storage Directories to be launched:") for storage_dir in storage_dirs: master_logger.debug(storage_dir) # Log some info master_logger.debug(f"\n\n{'=' * 200}\n" f"\nRunning schedule for:\n" f"\nfrom_file={from_file}" f"\nstorage_name={storage_name}" f"\nn_processes={n_processes}" f"\nn_experiments_per_proc={n_experiments_per_proc}" f"\nuse_pbar={use_pbar}" f"\ncheck_hash={check_hash}" f"\nroot={get_root(root_dir)}" f"\n") # Clean the storage_dirs if asked to if run_clean_interrupted: for storage_dir in storage_dirs: clean_interrupted(from_file=None, storage_name=storage_dir.name, clean_crashes=False, ask_for_validation=False, logger=master_logger, root_dir=root_dir) # Launches multiple processes if n_processes > 1: ## TODO: Logger is not supported with multiprocess (should use queues and all) n_calls = None # for now we only return n_calls != None if running with one process only processes = [] for i in range(n_processes): # Creates process logger logger_id = str(random.randint(1, 999999)).zfill(6) logger = create_logger(name=f'ID:{logger_id} - SUBPROCESS_{i}', loglevel=log_level, logfile=storage_dir / 'alfred_launch_schedule_logger.out', streamHandle=True) # Adds logfiles to logger if multiple storage_dirs if len(storage_dirs) > 1: for storage_dir in storage_dirs[1:]: file_handler = create_new_filehandler( logger.name, logfile=storage_dir / 'alfred_launch_schedule_logger.out') logger.addHandler(file_handler) # Creates process processes.append( Process(target=_work_on_schedule, args=(storage_dirs, n_processes, n_experiments_per_proc, use_pbar, logger, root_dir, i))) try: # start processes for p in processes: p.start() time.sleep(0.5) # waits for all processes to end dead_processes = [] while any([p.is_alive() for p in processes]): # check if some processes are dead for i, p in enumerate(processes): if not p.is_alive() and i not in dead_processes: master_logger.info(f'PROCESS_{i} has died.') dead_processes.append(i) time.sleep(3) except KeyboardInterrupt: master_logger.info("KEYBOARD INTERRUPT. Killing all processes") # terminates all processes for process in processes: process.terminate() master_logger.info("All processes are done. Closing '__main__'\n\n") # No additional processes else: n_calls = _work_on_schedule( storage_dirs=storage_dirs, n_processes=n_processes, n_experiments_per_proc=n_experiments_per_proc, use_pbar=use_pbar, logger=master_logger, root_dir=root_dir) return n_calls
storage_dirs=storage_dirs, visuals_file=visuals_file, additional_curves_file=additional_curves_file, make_box_plot=True, queried_performance_metric=performance_metric, queried_performance_aggregation=performance_aggregation, load_dir="summary", save_dir="benchmark", logger=logger) return if __name__ == '__main__': benchmark_args = get_benchmark_args() logger = create_logger(name="BENCHMARK - MAIN", loglevel=benchmark_args.log_level) # Gets storage_dirs list storage_dirs = select_storage_dirs( from_file=benchmark_args.from_file, storage_name=benchmark_args.storage_names, root_dir=benchmark_args.root_dir) # Sanity-check that storages exist storage_dirs = [ storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger) ]
j = 1 while True: if (dir_tree.storage_dir / f'variations{j}.png').exists(): j += 1 else: break fig.savefig(str(dir_tree.storage_dir / f'variations{j}.png')) plt.close(fig) open(str(dir_tree.storage_dir / 'RANDOM_SEARCH'), 'w+').close() # Printing summary logger.info(f'Created directories ' f'{str(dir_tree.storage_dir)}/experiment{first_experiment_created}-{last_experiment_created}') # Saving the list of created storage_dirs in a text file located with the provided schedule_file schedule_name = Path(schedule.__file__).parent.stem with open(Path(schedule.__file__).parent / f"list_searches_{schedule_name}.txt", "a+") as f: for storage_dir in all_storage_dirs: f.write(f"{storage_dir.name}\n") logger.info(f"\nEach of these experiments contain directories for the following seeds: {SEEDS}") if __name__ == '__main__': logger = create_logger(name="PREPARE_SCHEDULE - MAIN", loglevel=logging.DEBUG) kwargs = vars(get_prepare_schedule_args()) prepare_schedule(**kwargs, logger=logger, ask_for_validation=True)
else: continue open(str(seed_dir / 'UNHATCHED'), 'w+').close() logger.info(f'Done') else: logger.info('No seed_dir to clean.') # Clean flag-file if (storage_dir / "PLOT_ARRAYS_ONGOING").exists(): os.remove(str(storage_dir / "PLOT_ARRAYS_ONGOING")) if (storage_dir / "PLOT_ARRAYS_COMPLETED").exists(): os.remove(str(storage_dir / "PLOT_ARRAYS_COMPLETED")) # Clean summary folder if (storage_dir / "summary").exists(): shutil.rmtree(storage_dir / "summary") # Clean benchmark folder if (storage_dir / "benchmark").exists(): shutil.rmtree(storage_dir / "benchmark") if __name__ == '__main__': kwargs = vars(get_clean_interrupted_args()) logger = create_logger(name="CLEAN_INTERRUPTED - MAIN", loglevel=logging.INFO) clean_interrupted(**kwargs, logger=logger)
xlim=x_lim, ylim=y_lim, title=y_metric) except Exception as e: logger.debug(f'Polotting error: {e}') except FileNotFoundError: logger.debug('Training recorder not found') current_ax.text(0.2, 0.2, "'train_recorder'\nnot found", transform=current_ax.transAxes, fontsize=24, fontweight='bold', color='red') continue plt.tight_layout() fig.savefig( str(storage_dir / f'{group_key}_comparative_{y_metric}_over_{x_metric}.png' )) plt.close(fig) if __name__ == '__main__': logger = create_logger("PLOTS", logging.DEBUG, logfile=None) kwargs = vars(get_make_plots_args()) create_plot_arrays(**kwargs, logger=logger)
logger.info(f"Std(n_transitions) = {np.std(all_n_transitions):.2f}") logger.info(f"Max(n_transitions) = {np.max(all_n_transitions)}") logger.info(f"Min(n_transitions) = {np.min(all_n_transitions)}") def run_from_wrapped(args, logger): env = make_env(task_name=args.task_name) # Run the episodes just like OpenAI Gym for i_episode in range(args.n_episodes): env.reset() done = False logger.debug(env.action_space) logger.debug(env.observation_space) while not done: if args.render: env.render() action = 0 obs, reward, done, info = env.step(action) logger.debug(f"reward={reward}") logger.debug(f'Episode {i_episode} finished') env.close() if __name__ == '__main__': args = get_args() logger = create_logger(name="run_pommerman", loglevel=args.log_level) if args.task_name in TASKS: run_from_wrapped(args, logger) else: run_from_unwrapped(args, logger)
def copy_configs(from_file, storage_name, new_desc, append_new_desc, additional_params, root_dir): logger = create_logger(name="COPY CONFIG", loglevel=logging.INFO) logger.info("\nCOPYING Config") # Select storage_dirs to run over storage_dirs = select_storage_dirs(from_file, storage_name, root_dir) # Sanity-check that storages exist storage_dirs = [ storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger) ] # Imports schedule file to have same settings for DirectoryTree.git_repos_to_track if from_file: schedule_file = str([ path for path in Path(from_file).parent.iterdir() if 'schedule' in path.name and path.name.endswith('.py') ][0]) schedule_module = ".".join(schedule_file.split('/')).strip('.py') schedule = import_module(schedule_module) for storage_to_copy in storage_dirs: seeds_to_copy = get_all_seeds(storage_to_copy) config_path_list = [] config_unique_path_list = [] # find the path to all the configs files for dir in seeds_to_copy: config_path_list.append(dir / 'config.json') config_unique_path_list.append(dir / 'config_unique.json') # extract storage name info _, _, _, _, old_desc = \ DirectoryTree.extract_info_from_storage_name(storage_to_copy.name) # overwrites it tmp_dir_tree = DirectoryTree(alg_name="nope", task_name="nap", desc="nip", seed=1, root=root_dir) storage_name_id, git_hashes, _, _, _ = \ DirectoryTree.extract_info_from_storage_name(str(tmp_dir_tree.storage_dir.name)) if new_desc is None: desc = old_desc elif new_desc is not None and append_new_desc: desc = f"{old_desc}_{new_desc}" else: desc = new_desc # creates the new folders with loaded config from which we overwrite the task_name dir = None for config_path, config_unique_path in zip(config_path_list, config_unique_path_list): config = load_config_from_json(str(config_path)) config.desc = desc expe_name = config_path.parents[1].name experiment_num = int(''.join([s for s in expe_name if s.isdigit()])) config_unique_dict = load_dict_from_json(str(config_unique_path)) if additional_params is not None: for (key, value) in additional_params: config.__dict__[key] = value config_unique_dict[key] = value dir = DirectoryTree(id=storage_name_id, alg_name=config.alg_name, task_name=config.task_name, desc=config.desc, seed=config.seed, experiment_num=experiment_num, git_hashes=git_hashes, root=root_dir) dir.create_directories() print(f"Creating {str(dir.seed_dir)}\n") save_config_to_json(config, filename=str(dir.seed_dir / "config.json")) validate_config_unique(config, config_unique_dict) save_dict_to_json(config_unique_dict, filename=str(dir.seed_dir / "config_unique.json")) open(str(dir.seed_dir / 'UNHATCHED'), 'w+').close() open( str(dir.seed_dir.parents[1] / f'config_copied_from_{str(storage_to_copy.name)}'), 'w+').close()
# Asks for validation to sync the storages answer = input("\nShould we proceed? [y or n]") if answer.lower() not in ['y', 'yes']: logger.debug("Aborting...") return logger.info("Starting...") for child in child_dirs: # get all wandb folders wandb_dirs = child.glob('**/wandb/*run*/') for to_sync in wandb_dirs: logger.info( subprocess.run(command_line + str(to_sync.name), shell=True, cwd=str(to_sync.parent), check=True)) logger.info(f'Storage {child} has been synced \n') if __name__ == '__main__': kwargs = vars(get_synch_wandb_args()) logger = create_logger(name="SYNCH TO WANDB", loglevel=logging.INFO) sync_wandb(**kwargs, logger=logger)
def create_retrain_best(from_file, storage_name, best_experiments_mapping, n_retrain_seeds, train_time_factor, root_dir): logger = create_logger(name="CREATE_RETRAIN", loglevel=logging.INFO) logger.info("\nCREATING retrainBest directories") # Select storage_dirs to run over storage_dirs = select_storage_dirs(from_file, storage_name, root_dir) # Sanity-check that storages exist storage_dirs = [ storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger) ] # Imports schedule file to have same settings for DirectoryTree.git_repos_to_track if from_file: schedule_file = str([ path for path in Path(from_file).parent.iterdir() if 'schedule' in path.name and path.name.endswith('.py') ][0]) schedule_module = ".".join(schedule_file.split('/')).strip('.py') schedule = import_module(schedule_module) # Creates retrainBest directories retrainBest_storage_dirs = [] new_retrainBest_storage_dirs = [] for storage_dir in storage_dirs: try: # Checks if a retrainBest directory already exists for this search search_storage_id = storage_dir.name.split('_')[0] corresponding_retrain_directories = [ path for path in get_root(root_dir).iterdir() if f"retrainBest{search_storage_id}" in path.name.split('_') ] if len(corresponding_retrain_directories) > 0: assert len(corresponding_retrain_directories) == 1 retrainBest_dir = corresponding_retrain_directories[0] logger.info(f"Existing retrainBest\n\n" f"\t{storage_dir.name} -> {retrainBest_dir.name}") retrainBest_storage_dirs.append(retrainBest_dir) continue else: # The retrainBest directory will contain one experiment with bestConfig from the search... if best_experiments_mapping is None: # ... bestConfig is found in the summary/ folder from the search best_config = [ path for path in (storage_dir / "summary").iterdir() if path.name.startswith("bestConfig") ][0] assert len(best_config) == 1 and type(best_config) is list else: # ... bestConfig is loaded based on specified --best_experiment_mapping best_experiments_mapping_dict = load_dict_from_json( best_experiments_mapping) assert storage_dir.name in best_experiments_mapping_dict.keys( ) best_experiment_num = best_experiments_mapping_dict[ storage_dir.name] seed_dir = DirectoryTree.get_all_seeds( experiment_dir=storage_dir / f"experiment{best_experiment_num}")[0] best_config = seed_dir / "config.json" config_dict = load_dict_from_json(filename=str(best_config)) # Retrain experiments run for twice as long if config_dict['max_episodes'] is not None: config_dict['max_episodes'] = int( config_dict['max_episodes'] * train_time_factor) elif config_dict['max_steps'] is not None: config_dict['max_steps'] = int(config_dict['max_steps'] * train_time_factor) else: raise ValueError( "At least one of max_episodes or max_steps should be defined" ) # Updates the description if "random" in config_dict['desc'] or "grid" in config_dict[ 'desc']: new_desc = config_dict['desc'] \ .replace("random", f"retrainBest{search_storage_id}") \ .replace("grid", f"retrainBest{search_storage_id}") else: new_desc = config_dict[ 'desc'] + f"_retrainBest{search_storage_id}" config_dict['desc'] = new_desc # Creates config Namespace with loaded config_dict config = argparse.ArgumentParser().parse_args("") config_pointer = vars(config) config_pointer.update(config_dict) # updates config config_unique_dict = {} config_unique_dict['alg_name'] = config.alg_name config_unique_dict['task_name'] = config.task_name config_unique_dict['seed'] = config.seed # Gets new storage_name_id tmp_dir_tree = DirectoryTree(alg_name="", task_name="", desc="", seed=1, root=root_dir) retrain_storage_id = tmp_dir_tree.storage_dir.name.split( '_')[0] # Creates the new storage_dir for retrainBest dir_tree = create_experiment_dir( storage_name_id=retrain_storage_id, config=config, config_unique_dict=config_unique_dict, SEEDS=[i * 10 for i in range(n_retrain_seeds)], root_dir=root_dir, git_hashes=DirectoryTree.get_git_hashes()) retrainBest_storage_dirs.append(dir_tree.storage_dir) new_retrainBest_storage_dirs.append(dir_tree.storage_dir) logger.info( f"New retrainBest:\n\n" f"\t{storage_dir.name} -> {dir_tree.storage_dir.name}") except Exception as e: logger.info( f"Could not create retrainBest-storage_dir {storage_dir}") logger.info(f"\n\n{e}\n{traceback.format_exc()}") # Saving the list of created storage_dirs in a text file located with the provided schedule_file schedule_name = Path(from_file).parent.stem with open( Path(from_file).parent / f"list_retrains_{schedule_name}.txt", "a+") as f: for storage_dir in new_retrainBest_storage_dirs: f.write(f"{storage_dir.name}\n") return retrainBest_storage_dirs