def run_simulations(args, save_result, local_mode, init_ray=True): if init_ray: start_ray(local_mode) if save_result: create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() set_random_seed(args.seed) n_reps = args.n_reps alg_param_grid = get_grid(args.param_grid_def) n_grid = alg_param_grid.shape[0] config_grid_vals = get_grid(args.config_grid_def) n_config_grid = len(config_grid_vals) planing_loss = np.zeros((n_reps, n_config_grid, n_grid)) info_dict = {} # ----- Run simulation in parrnell process---------------------------------------------# loss_rep_id_lst = [] for i_rep in range(n_reps): # returns objects ids: args_r = deepcopy(args) planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid, args_r.config_grid_def, args_r) loss_rep_id_lst.append(planing_loss_rep_id) # end for i_rep # ----- get the results --------------------------------------------# for i_rep in range(n_reps): loss_rep = ray.get(loss_rep_id_lst[i_rep]) if i_rep % max(n_reps // 100, 1) == 0: time_str = time.strftime( "%H hours, %M minutes and %S seconds", time.gmtime(timeit.default_timer() - start_time)) write_to_log( 'Finished: {} out of {} reps, time: {}'.format( i_rep + 1, n_reps, time_str), args) # end if planing_loss[i_rep] = loss_rep info_dict = { 'planing_loss_avg': planing_loss.mean(axis=0), 'planing_loss_std': planing_loss.std(axis=0), 'alg_param_grid': alg_param_grid, 'n_reps_finished': i_rep + 1 } if save_result: save_run_data(args, info_dict, verbose=0) # end if # end for i_rep if save_result: save_run_data(args, info_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args, save_result) return info_dict
def run_rep(i_rep, alg_param_grid, config_grid_def, args_r): set_random_seed(args_r.seed + i_rep) config_grid_vals = get_grid(config_grid_def) n_config_grid = len(config_grid_vals) n_grid = len(alg_param_grid) # runs a single repetition of the experiment loss_rep = np.zeros((n_config_grid, n_grid)) gammaEval = args_r.gammaEval config_type = config_grid_def['type'] # grid of number of trajectories to generate for i_config, config_val in enumerate(config_grid_vals): n_traj = args_r.n_trajectories if config_type == 'n_trajectories': n_traj = config_val elif config_type == 'states_actions_TV_dist_from_uniform': args_r.train_sampling_def = { 'type': 'Generative', 'states_TV_dist_from_uniform': config_val, 'actions_TV_dist_from_uniform': config_val } elif config_type == 'chain_mix_time': args_r.train_sampling_def = { 'type': 'chain_mix_time', 'mix_time': config_val } elif config_type == 'n_episodes': args_r.n_episodes = config_val elif config_type == 'None': pass else: raise AssertionError # Generate MDP: M = MDP(args_r) # Optimal policy for the MDP: pi_opt, V_opt, Q_opt = PolicyIteration(M, gammaEval) # grid of regularization param for i_grid, alg_param in enumerate(alg_param_grid): gamma_guidance, l1_factor, l2_factor = get_regularization_params( args_r, alg_param, args_r.param_grid_def['type']) # run the learning episodes: pi_t = run_learning_method(args_r, M, n_traj, gamma_guidance, l2_factor, l1_factor) # Evaluate performance of learned policy: V_t, _ = PolicyEvaluation(M, pi_t, gammaEval) loss_rep[i_config, i_grid] = (np.abs(V_opt - V_t)).mean() # end for grid # end for i_config return loss_rep
def run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode): start_ray(local_mode) write_to_log('local_mode == {}'.format(local_mode), args) SetMrpArgs(args) start_time = timeit.default_timer() set_random_seed(args.seed) reg_types = args.reg_types n_hyper_grid = len(hyper_grid_vals) n_reps = args.n_reps results_dict = dict() write_to_log('***** Starting {} reps'.format(n_reps), args) for i_rep in range(n_reps): for i_hyper_grid, hyper_grid_val in enumerate(hyper_grid_vals): args_run = deepcopy(args) set_hyper_param(args_run, hyper_grid_val) # send jobs: out_ids = {reg_type: [None for _ in range(len(reg_grids[reg_type]))] for reg_type in reg_types} for reg_type in reg_types: for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]): # ray put if np.isnan(loss[reg_type][i_hyper_grid, i_reg_pram, i_rep]): out_ids[reg_type][i_reg_pram] = run_exp.remote(i_rep, args_run, reg_type, reg_param) # end if # end for i_reg_pram # end for reg_type # Gather results: for reg_type in reg_types: for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]): # ray get if out_ids[reg_type][i_reg_pram] is not None: out = ray.get(out_ids[reg_type][i_reg_pram]) loss[reg_type][i_hyper_grid, i_reg_pram, i_rep] = out # end if # end for i_reg_pram # end for reg_type # end for i_hyper_grid # Save results so far results_dict = {'hyper_grid_vals': hyper_grid_vals, 'loss': loss, 'reg_grids': reg_grids, 'n_reps_finished': i_rep + 1} save_run_data(args, results_dict, verbose=0) time_str = time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(timeit.default_timer() - start_time)) write_to_log('Finished: {} out of {} reps, time: {}'.format(i_rep + 1, n_reps, time_str), args) # end for i_rep stop_time = timeit.default_timer() write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) return results_dict
def run_simulations(args, save_result, local_mode): args_def = deepcopy(args) start_ray(local_mode) if save_result: create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() set_random_seed(args.seed) n_reps = args.n_reps param_val_grid = get_grid(args.param_grid_def) n_grid = param_val_grid.shape[0] config_grid = get_grid(args.config_grid_def) n_configs = len(config_grid) args.n_configs = n_configs loss_mat = np.zeros((n_reps, n_configs, n_grid)) # ----- Run simulation in parrnell process---------------------------------------------# loss_rep_id_lst = [] for i_rep in range(n_reps): # returns objects ids: loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid, args) loss_rep_id_lst.append(loss_mat_rep_id) # ----- get the results --------------------------------------------# for i_rep in range(n_reps): loss_rep = ray.get(loss_rep_id_lst[i_rep]) write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps), args) loss_mat[i_rep] = loss_rep # end for i_rep info_dict = { 'loss_avg': loss_mat.mean(axis=0), 'loss_std': loss_mat.std(axis=0), 'param_val_grid': param_val_grid, 'config_grid': config_grid } if save_result: save_run_data(args, info_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) write_to_log( ['-' * 10 + 'Defined args: ', pretty_print_args(args_def), '-' * 20], args) return info_dict
def run_simulations(args, local_mode): start_ray(local_mode) create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() create_result_dir(args) set_random_seed(args.seed) l2_grid = get_grid(args.l2_grid_def) gam_grid = get_grid(args.gam_grid_def) write_to_log('gamma_grid == {}'.format(gam_grid), args) write_to_log('l2_grid == {}'.format(l2_grid), args) grid_shape = (len(l2_grid), len(gam_grid)) loss_avg = np.zeros(grid_shape) loss_std = np.zeros(grid_shape) run_idx = 0 for i0 in range(grid_shape[0]): for i1 in range(grid_shape[1]): args_run = deepcopy(args) args_run.param_grid_def = { 'type': 'L2_factor', 'spacing': 'list', 'list': [l2_grid[i0]] } args_run.default_gamma = gam_grid[i1] info_dict = run_main_control(args_run, save_result=False, plot=False, init_ray=False) loss_avg[i0, i1] = info_dict['planing_loss_avg'][0] loss_std[i0, i1] = info_dict['planing_loss_std'][0] run_idx += 1 print("Finished {}/{}".format(run_idx, loss_avg.size)) # end for # end for grid_results_dict = { 'l2_grid': l2_grid, 'gam_grid': gam_grid, 'loss_avg': loss_avg, 'loss_std': loss_std } save_run_data(args, grid_results_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) return grid_results_dict
def run_exp(i_rep, args_run, reg_type, reg_param): # set seed set_random_seed(args_run.seed + i_rep) # Generate MDP and sampling distribution (with specified uniformity) M = MRP(args_run) gammaEval = args_run.gammaEval # set regularisation parameters gamma_guidance, l2_TD, l2_fp, l2_proj = get_regularization_params(args_run, reg_param, reg_type) # Generate data: data = M.SampleDataMrp(args_run) V_est, V_true = run_value_estimation_method(data, M, args_run, gamma_guidance, l2_proj, l2_fp, l2_TD) loss_type = args_run.evaluation_loss_type pi = None eval_loss = evaluate_value_estimation(loss_type, V_true, V_est, M, pi, gammaEval, gamma_guidance) return eval_loss