def run_simulations(args, save_result, local_mode, init_ray=True): if init_ray: start_ray(local_mode) if save_result: create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() set_random_seed(args.seed) n_reps = args.n_reps alg_param_grid = get_grid(args.param_grid_def) n_grid = alg_param_grid.shape[0] config_grid_vals = get_grid(args.config_grid_def) n_config_grid = len(config_grid_vals) planing_loss = np.zeros((n_reps, n_config_grid, n_grid)) info_dict = {} # ----- Run simulation in parrnell process---------------------------------------------# loss_rep_id_lst = [] for i_rep in range(n_reps): # returns objects ids: args_r = deepcopy(args) planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid, args_r.config_grid_def, args_r) loss_rep_id_lst.append(planing_loss_rep_id) # end for i_rep # ----- get the results --------------------------------------------# for i_rep in range(n_reps): loss_rep = ray.get(loss_rep_id_lst[i_rep]) if i_rep % max(n_reps // 100, 1) == 0: time_str = time.strftime( "%H hours, %M minutes and %S seconds", time.gmtime(timeit.default_timer() - start_time)) write_to_log( 'Finished: {} out of {} reps, time: {}'.format( i_rep + 1, n_reps, time_str), args) # end if planing_loss[i_rep] = loss_rep info_dict = { 'planing_loss_avg': planing_loss.mean(axis=0), 'planing_loss_std': planing_loss.std(axis=0), 'alg_param_grid': alg_param_grid, 'n_reps_finished': i_rep + 1 } if save_result: save_run_data(args, info_dict, verbose=0) # end if # end for i_rep if save_result: save_run_data(args, info_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args, save_result) return info_dict
def run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode): start_ray(local_mode) write_to_log('local_mode == {}'.format(local_mode), args) SetMrpArgs(args) start_time = timeit.default_timer() set_random_seed(args.seed) reg_types = args.reg_types n_hyper_grid = len(hyper_grid_vals) n_reps = args.n_reps results_dict = dict() write_to_log('***** Starting {} reps'.format(n_reps), args) for i_rep in range(n_reps): for i_hyper_grid, hyper_grid_val in enumerate(hyper_grid_vals): args_run = deepcopy(args) set_hyper_param(args_run, hyper_grid_val) # send jobs: out_ids = {reg_type: [None for _ in range(len(reg_grids[reg_type]))] for reg_type in reg_types} for reg_type in reg_types: for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]): # ray put if np.isnan(loss[reg_type][i_hyper_grid, i_reg_pram, i_rep]): out_ids[reg_type][i_reg_pram] = run_exp.remote(i_rep, args_run, reg_type, reg_param) # end if # end for i_reg_pram # end for reg_type # Gather results: for reg_type in reg_types: for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]): # ray get if out_ids[reg_type][i_reg_pram] is not None: out = ray.get(out_ids[reg_type][i_reg_pram]) loss[reg_type][i_hyper_grid, i_reg_pram, i_rep] = out # end if # end for i_reg_pram # end for reg_type # end for i_hyper_grid # Save results so far results_dict = {'hyper_grid_vals': hyper_grid_vals, 'loss': loss, 'reg_grids': reg_grids, 'n_reps_finished': i_rep + 1} save_run_data(args, results_dict, verbose=0) time_str = time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(timeit.default_timer() - start_time)) write_to_log('Finished: {} out of {} reps, time: {}'.format(i_rep + 1, n_reps, time_str), args) # end for i_rep stop_time = timeit.default_timer() write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) return results_dict
def run_simulations(args, save_result, local_mode): args_def = deepcopy(args) start_ray(local_mode) if save_result: create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() set_random_seed(args.seed) n_reps = args.n_reps param_val_grid = get_grid(args.param_grid_def) n_grid = param_val_grid.shape[0] config_grid = get_grid(args.config_grid_def) n_configs = len(config_grid) args.n_configs = n_configs loss_mat = np.zeros((n_reps, n_configs, n_grid)) # ----- Run simulation in parrnell process---------------------------------------------# loss_rep_id_lst = [] for i_rep in range(n_reps): # returns objects ids: loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid, args) loss_rep_id_lst.append(loss_mat_rep_id) # ----- get the results --------------------------------------------# for i_rep in range(n_reps): loss_rep = ray.get(loss_rep_id_lst[i_rep]) write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps), args) loss_mat[i_rep] = loss_rep # end for i_rep info_dict = { 'loss_avg': loss_mat.mean(axis=0), 'loss_std': loss_mat.std(axis=0), 'param_val_grid': param_val_grid, 'config_grid': config_grid } if save_result: save_run_data(args, info_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) write_to_log( ['-' * 10 + 'Defined args: ', pretty_print_args(args_def), '-' * 20], args) return info_dict
def run_simulations(args, local_mode): start_ray(local_mode) create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() create_result_dir(args) set_random_seed(args.seed) l2_grid = get_grid(args.l2_grid_def) gam_grid = get_grid(args.gam_grid_def) write_to_log('gamma_grid == {}'.format(gam_grid), args) write_to_log('l2_grid == {}'.format(l2_grid), args) grid_shape = (len(l2_grid), len(gam_grid)) loss_avg = np.zeros(grid_shape) loss_std = np.zeros(grid_shape) run_idx = 0 for i0 in range(grid_shape[0]): for i1 in range(grid_shape[1]): args_run = deepcopy(args) args_run.param_grid_def = { 'type': 'L2_factor', 'spacing': 'list', 'list': [l2_grid[i0]] } args_run.default_gamma = gam_grid[i1] info_dict = run_main_control(args_run, save_result=False, plot=False, init_ray=False) loss_avg[i0, i1] = info_dict['planing_loss_avg'][0] loss_std[i0, i1] = info_dict['planing_loss_std'][0] run_idx += 1 print("Finished {}/{}".format(run_idx, loss_avg.size)) # end for # end for grid_results_dict = { 'l2_grid': l2_grid, 'gam_grid': gam_grid, 'loss_avg': loss_avg, 'loss_std': loss_std } save_run_data(args, grid_results_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) return grid_results_dict
n_hyper_grid = len(hyper_grid_vals) n_reps = args.n_reps reg_types = args.reg_types # define search grids for regularization parameters reg_grids = dict() for reg_type in reg_types: reg_param_grid_def = args.search_grid_def[reg_type] reg_grids[reg_type] = get_grid(reg_param_grid_def) # init result matrix with nan (no result) loss = {reg_type: np.full((n_hyper_grid, len(reg_grids[reg_type]), n_reps), np.nan) for reg_type in reg_types} # run results_dict = run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode) save_run_data(args, results_dict) # ********************************* elif run_mode == 'Continue': loaded_args, loaded_results_dict = load_run_data(result_dir_to_load) args = loaded_args args.result_dir = result_dir_to_load # update the path, in case the result folder moved hyper_grid_vals = loaded_results_dict['hyper_grid_vals'] loss = loaded_results_dict['loss'] reg_grids = loaded_results_dict['reg_grids'] results_dict = run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode) save_run_data(args, results_dict) # ********************************* else: raise AssertionError('Unrecognized run_mode') # *********************************
result_reward_mat[i_grid, i_rep]): continue # skip output, args_run = ray.get(out_id[i_grid][i_rep]) result_reward_mat[i_grid, i_rep] = output # note: the final reward is an average performance on eval_episodes=10 of final policy write_to_log( f'Finished Rep: {i_rep + 1}/{n_reps_per_point[i_grid]} of Grid point {i_grid}/{len(alg_param_grid)}' f' ({args_run.job_name}), Reward : {output}, Time now: {time_now()}', args) # Save results so far: stop_time = timeit.default_timer() run_time += stop_time - start_time start_time = timeit.default_timer() save_run_data(args, { 'alg_param_grid': alg_param_grid, 'result_reward_mat': result_reward_mat, 'run_time': run_time }, verbose=1) # end for i_grid # end for i_rep write_to_log( 'Total runtime: ' + time.strftime( "%H hours, %M minutes and %S seconds", time.gmtime(run_time)), args) # --------------------------------------------------------------------------------------------------------------------# # Plot results # --------------------------------------------------------------------------------------------------------------------# mean_reward = []