def generate_discrete_prob_TV(args, n, tv_dist_nrml=0., tol=1e-1, max_iter=5e6): """ Randomly generates a discrete distribution of dimension n with L1 distance of d from the uniform distribution. assumption tv_dist is normalized in [0,1] """ assert 0 <= tv_dist_nrml <= 1 if tv_dist_nrml == 0.: return np.ones(n) / n tv_dist_max = 0.5 * 2 * (n - 1) / n # note: tv_dist = 0.5 * L1 dist tv_dist = tv_dist_nrml * tv_dist_max # de-normalize done = False i = 0 probs = None best_probs = None best_err = float('inf') l1_dist = 2 * tv_dist # the desired L1 dist best_tv_dist_nrml = None while not done: i += 1 # probs = sample_simplex(n) # unifromly probs = draw_prob_at_dist(n, l1_dist) # Check that we indeed got the correct L1 distance from a uniform distrib curr_l1_dist = np.sum(np.abs(probs - 1 / n)) curr_tv_dist_nrml = 0.5 * curr_l1_dist / tv_dist_max curr_err = np.abs(curr_tv_dist_nrml - tv_dist_nrml) if curr_err < tol: done = True if curr_err < best_err: best_err = curr_err best_probs = probs best_tv_dist_nrml = curr_tv_dist_nrml if i >= max_iter: write_to_log([ 'rejection sampling failed -', 'desired tv_dist [normalized]: ', tv_dist_nrml, ', best_tv_dist_nrml: ', best_tv_dist_nrml, 'best_err: ', best_err ], args) probs = best_probs done = True # if i >= max_iter: # write_to_log(['n ', n, 'tv_dist ', tv_dist, 'tol ', tol, 'i ', i, # 'desired l1_dist', l1_dist, 'last l1_dist', curr_l1_dist], args) # raise AssertionError('rejection sampling failed') # print(i) return probs
def run_simulation_remote(args_run, job_name, job_info): if args_run.alg in {'TD3', 'OurDDPG'}: from TD3_Code.mainTD3 import run_simulation_TD3 write_to_log('Starting: {}, time: {}'.format(job_name, time_now()), args_run) args_run.run_name += ':' + job_name return run_simulation_TD3(args_run, job_info), args_run # elif args_run.alg == 'SAC': # from SAC_Code.main_SAC import run_simulation_SAC # write_to_log('Starting: {}, time: {}'.format(job_name, time_now()), args_run) # args_run.run_name += ':' + job_name # return run_simulation_SAC(args_run, job_info), args_run else: raise AssertionError
def run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode): start_ray(local_mode) write_to_log('local_mode == {}'.format(local_mode), args) SetMrpArgs(args) start_time = timeit.default_timer() set_random_seed(args.seed) reg_types = args.reg_types n_hyper_grid = len(hyper_grid_vals) n_reps = args.n_reps results_dict = dict() write_to_log('***** Starting {} reps'.format(n_reps), args) for i_rep in range(n_reps): for i_hyper_grid, hyper_grid_val in enumerate(hyper_grid_vals): args_run = deepcopy(args) set_hyper_param(args_run, hyper_grid_val) # send jobs: out_ids = {reg_type: [None for _ in range(len(reg_grids[reg_type]))] for reg_type in reg_types} for reg_type in reg_types: for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]): # ray put if np.isnan(loss[reg_type][i_hyper_grid, i_reg_pram, i_rep]): out_ids[reg_type][i_reg_pram] = run_exp.remote(i_rep, args_run, reg_type, reg_param) # end if # end for i_reg_pram # end for reg_type # Gather results: for reg_type in reg_types: for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]): # ray get if out_ids[reg_type][i_reg_pram] is not None: out = ray.get(out_ids[reg_type][i_reg_pram]) loss[reg_type][i_hyper_grid, i_reg_pram, i_rep] = out # end if # end for i_reg_pram # end for reg_type # end for i_hyper_grid # Save results so far results_dict = {'hyper_grid_vals': hyper_grid_vals, 'loss': loss, 'reg_grids': reg_grids, 'n_reps_finished': i_rep + 1} save_run_data(args, results_dict, verbose=0) time_str = time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(timeit.default_timer() - start_time)) write_to_log('Finished: {} out of {} reps, time: {}'.format(i_rep + 1, n_reps, time_str), args) # end for i_rep stop_time = timeit.default_timer() write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) return results_dict
def print_status(result_dir_to_load): args, info_dict = load_run_data(result_dir_to_load) alg_param_grid = get_grid(args.param_grid_def) n_grid = len(alg_param_grid) n_reps = args.n_reps print('Loaded parameters: \n', args, '\n', '-' * 20) if 'result_reward_mat' in info_dict.keys(): result_reward_mat = info_dict['result_reward_mat'] else: result_reward_mat = np.full(shape=(n_grid, n_reps), fill_value=np.nan) path = os.path.join(result_dir_to_load, 'jobs') all_files = glob.glob(os.path.join(path, "*.p")) n_rep_finish_per_point = np.full(shape=n_grid, fill_value=0, dtype=np.int) n_steps_finished = np.full(shape=(n_grid, n_reps), fill_value=-1, dtype=np.int) for f_path in all_files: save_dict = pickle.load(open(f_path, "rb")) job_info = save_dict['job_info'] timesteps_snapshots = save_dict['timesteps_snapshots'] i_grid = np.searchsorted(alg_param_grid, job_info['grid_param'], 'right') if i_grid == len(alg_param_grid): # the loaded param is not in our defined alg_param_grid continue # TODO: add option to load all files, even if not in the defined alg_param_grid (show_all_saved_results flag) i_rep = job_info['i_rep'] if not np.isnan(result_reward_mat[i_grid, i_rep]): n_steps_finished[i_grid, i_rep] = args.max_timesteps n_rep_finish_per_point[i_grid] += 1 else: n_steps_finished[i_grid, i_rep] = timesteps_snapshots[-1] for i_grid, grid_param in enumerate(alg_param_grid): write_to_log( 'Grid point {}/{}, val: {}, Number of finished reps loaded: {}'. format(1 + i_grid, len(alg_param_grid), grid_param, n_rep_finish_per_point[i_grid]), args) for i_rep in range(n_reps): if n_steps_finished[i_grid, i_rep] != -1: print('Rep: {}, Finished Time-Steps: {}'.format( i_rep, n_steps_finished[i_grid, i_rep]))
def generate_prob_given_entropy(args, n, ent_nrml, tol=1e-1, max_iter=5e6): """ Randomly generates a discrete distribution of dimension n with given entropy ent. assumption ent is normalized in [0,1] """ assert 0. <= ent_nrml <= 1. if ent_nrml == 1.: return np.ones(n) / n ent_max = np.log2(n) ent = ent_nrml * ent_max # de-normalize # use rejection sampling done = False i = 0 probs = None best_probs = None best_err = float('inf') best_ent_nrml = None while not done: i += 1 probs = sample_simplex(n) curr_ent = sps.entropy(probs, base=2) curr_ent_nrml = curr_ent / ent_max cur_err = np.abs(ent_nrml - curr_ent_nrml) if cur_err < best_err: best_err = cur_err best_probs = probs best_ent_nrml = curr_ent_nrml if cur_err < tol: done = True if i >= max_iter: write_to_log([ 'rejection sampling failed -', 'desired ent: ', ent, 'best_err: ', best_err, ', best_ent_nrml: ', best_ent_nrml ], args) probs = best_probs done = True # write_to_log(['n ', n, 'ent ', ent, 'tol ', tol, 'i ', i, 'last ent', curr_ent], args) # raise AssertionError('rejection sampling failed') return probs
def run_simulations(args, save_result, local_mode): args_def = deepcopy(args) start_ray(local_mode) if save_result: create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() set_random_seed(args.seed) n_reps = args.n_reps param_val_grid = get_grid(args.param_grid_def) n_grid = param_val_grid.shape[0] config_grid = get_grid(args.config_grid_def) n_configs = len(config_grid) args.n_configs = n_configs loss_mat = np.zeros((n_reps, n_configs, n_grid)) # ----- Run simulation in parrnell process---------------------------------------------# loss_rep_id_lst = [] for i_rep in range(n_reps): # returns objects ids: loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid, args) loss_rep_id_lst.append(loss_mat_rep_id) # ----- get the results --------------------------------------------# for i_rep in range(n_reps): loss_rep = ray.get(loss_rep_id_lst[i_rep]) write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps), args) loss_mat[i_rep] = loss_rep # end for i_rep info_dict = { 'loss_avg': loss_mat.mean(axis=0), 'loss_std': loss_mat.std(axis=0), 'param_val_grid': param_val_grid, 'config_grid': config_grid } if save_result: save_run_data(args, info_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) write_to_log( ['-' * 10 + 'Defined args: ', pretty_print_args(args_def), '-' * 20], args) return info_dict
def run_simulations(args, local_mode): start_ray(local_mode) create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() create_result_dir(args) set_random_seed(args.seed) l2_grid = get_grid(args.l2_grid_def) gam_grid = get_grid(args.gam_grid_def) write_to_log('gamma_grid == {}'.format(gam_grid), args) write_to_log('l2_grid == {}'.format(l2_grid), args) grid_shape = (len(l2_grid), len(gam_grid)) loss_avg = np.zeros(grid_shape) loss_std = np.zeros(grid_shape) run_idx = 0 for i0 in range(grid_shape[0]): for i1 in range(grid_shape[1]): args_run = deepcopy(args) args_run.param_grid_def = { 'type': 'L2_factor', 'spacing': 'list', 'list': [l2_grid[i0]] } args_run.default_gamma = gam_grid[i1] info_dict = run_main_control(args_run, save_result=False, plot=False, init_ray=False) loss_avg[i0, i1] = info_dict['planing_loss_avg'][0] loss_std[i0, i1] = info_dict['planing_loss_std'][0] run_idx += 1 print("Finished {}/{}".format(run_idx, loss_avg.size)) # end for # end for grid_results_dict = { 'l2_grid': l2_grid, 'gam_grid': gam_grid, 'loss_avg': loss_avg, 'loss_std': loss_std } save_run_data(args, grid_results_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args) return grid_results_dict
def run_simulations(args, save_result, local_mode, init_ray=True): if init_ray: start_ray(local_mode) if save_result: create_result_dir(args) write_to_log('local_mode == {}'.format(local_mode), args) start_time = timeit.default_timer() set_random_seed(args.seed) n_reps = args.n_reps alg_param_grid = get_grid(args.param_grid_def) n_grid = alg_param_grid.shape[0] config_grid_vals = get_grid(args.config_grid_def) n_config_grid = len(config_grid_vals) planing_loss = np.zeros((n_reps, n_config_grid, n_grid)) info_dict = {} # ----- Run simulation in parrnell process---------------------------------------------# loss_rep_id_lst = [] for i_rep in range(n_reps): # returns objects ids: args_r = deepcopy(args) planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid, args_r.config_grid_def, args_r) loss_rep_id_lst.append(planing_loss_rep_id) # end for i_rep # ----- get the results --------------------------------------------# for i_rep in range(n_reps): loss_rep = ray.get(loss_rep_id_lst[i_rep]) if i_rep % max(n_reps // 100, 1) == 0: time_str = time.strftime( "%H hours, %M minutes and %S seconds", time.gmtime(timeit.default_timer() - start_time)) write_to_log( 'Finished: {} out of {} reps, time: {}'.format( i_rep + 1, n_reps, time_str), args) # end if planing_loss[i_rep] = loss_rep info_dict = { 'planing_loss_avg': planing_loss.mean(axis=0), 'planing_loss_std': planing_loss.std(axis=0), 'alg_param_grid': alg_param_grid, 'n_reps_finished': i_rep + 1 } if save_result: save_run_data(args, info_dict, verbose=0) # end if # end for i_rep if save_result: save_run_data(args, info_dict) stop_time = timeit.default_timer() write_to_log( 'Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args, save_result) return info_dict
else: n_reps_per_point[i_grid] = finished_reps_per_point[i_grid] # now take completed results from loaded data: result_reward_mat = np.full((n_grid, np.max(n_reps_per_point)), np.nan) for i_grid, grid_param in enumerate(new_alg_param_grid): if grid_param in loaded_alg_param_grid: load_idx = np.nonzero(loaded_alg_param_grid == grid_param) for i_rep in range(finished_reps_per_point[i_grid]): result_reward_mat[i_grid, i_rep] = loaded_result_mat[load_idx, i_rep] # end for i_rep # end if # end for i_grid write_to_log( 'Continue run with new grid def {}, {}'.format(new_param_grid_def, time_now()), args) write_to_log('Run parameters: \n' + str(args) + '\n' + '-' * 20, args) pretty_print_args(args) alg_param_grid = new_alg_param_grid # --------------------------------------------------------------------------------------------------------------------# elif run_mode == 'New': # Start from scratch run_time = 0 create_result_dir(args) os.makedirs(os.path.join(args.result_dir, 'jobs')) alg_param_grid = get_grid(args.param_grid_def) n_grid = len(alg_param_grid) n_reps = args.n_reps n_reps_per_point = np.full(shape=n_grid, fill_value=n_reps, dtype=np.int)