def run_simulations(args, save_result, local_mode, init_ray=True):
    if init_ray:
        start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]
    config_grid_vals = get_grid(args.config_grid_def)
    n_config_grid = len(config_grid_vals)
    planing_loss = np.zeros((n_reps, n_config_grid, n_grid))
    info_dict = {}
    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        args_r = deepcopy(args)
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid,
                                             args_r.config_grid_def, args_r)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # end for i_rep
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        if i_rep % max(n_reps // 100, 1) == 0:
            time_str = time.strftime(
                "%H hours, %M minutes and %S seconds",
                time.gmtime(timeit.default_timer() - start_time))
            write_to_log(
                'Finished: {} out of {} reps, time: {}'.format(
                    i_rep + 1, n_reps, time_str), args)
        # end if
        planing_loss[i_rep] = loss_rep
        info_dict = {
            'planing_loss_avg': planing_loss.mean(axis=0),
            'planing_loss_std': planing_loss.std(axis=0),
            'alg_param_grid': alg_param_grid,
            'n_reps_finished': i_rep + 1
        }
        if save_result:
            save_run_data(args, info_dict, verbose=0)
        # end if
    # end for i_rep
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args, save_result)
    return info_dict
def run_rep(i_rep, alg_param_grid, config_grid_def, args_r):

    set_random_seed(args_r.seed + i_rep)
    config_grid_vals = get_grid(config_grid_def)
    n_config_grid = len(config_grid_vals)
    n_grid = len(alg_param_grid)
    # runs a single repetition of the experiment
    loss_rep = np.zeros((n_config_grid, n_grid))
    gammaEval = args_r.gammaEval
    config_type = config_grid_def['type']

    # grid of number of trajectories to generate
    for i_config, config_val in enumerate(config_grid_vals):
        n_traj = args_r.n_trajectories
        if config_type == 'n_trajectories':
            n_traj = config_val
        elif config_type == 'states_actions_TV_dist_from_uniform':
            args_r.train_sampling_def = {
                'type': 'Generative',
                'states_TV_dist_from_uniform': config_val,
                'actions_TV_dist_from_uniform': config_val
            }
        elif config_type == 'chain_mix_time':
            args_r.train_sampling_def = {
                'type': 'chain_mix_time',
                'mix_time': config_val
            }
        elif config_type == 'n_episodes':
            args_r.n_episodes = config_val
        elif config_type == 'None':
            pass
        else:
            raise AssertionError

        # Generate MDP:
        M = MDP(args_r)

        # Optimal policy for the MDP:
        pi_opt, V_opt, Q_opt = PolicyIteration(M, gammaEval)

        # grid of regularization param
        for i_grid, alg_param in enumerate(alg_param_grid):
            gamma_guidance, l1_factor, l2_factor = get_regularization_params(
                args_r, alg_param, args_r.param_grid_def['type'])

            # run the learning episodes:
            pi_t = run_learning_method(args_r, M, n_traj, gamma_guidance,
                                       l2_factor, l1_factor)

            # Evaluate performance of learned policy:
            V_t, _ = PolicyEvaluation(M, pi_t, gammaEval)

            loss_rep[i_config, i_grid] = (np.abs(V_opt - V_t)).mean()
        # end for grid
    #  end for i_config
    return loss_rep
예제 #3
0
def run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode):
	start_ray(local_mode)
	write_to_log('local_mode == {}'.format(local_mode), args)
	SetMrpArgs(args)
	start_time = timeit.default_timer()
	set_random_seed(args.seed)

	reg_types = args.reg_types

	n_hyper_grid = len(hyper_grid_vals)
	n_reps = args.n_reps
	results_dict = dict()

	write_to_log('***** Starting  {} reps'.format(n_reps), args)
	for i_rep in range(n_reps):

		for i_hyper_grid, hyper_grid_val in enumerate(hyper_grid_vals):
			args_run = deepcopy(args)
			set_hyper_param(args_run, hyper_grid_val)

			# send jobs:
			out_ids = {reg_type: [None for _ in range(len(reg_grids[reg_type]))] for reg_type in reg_types}
			for reg_type in reg_types:

				for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]):
					# ray put
					if np.isnan(loss[reg_type][i_hyper_grid, i_reg_pram, i_rep]):
						out_ids[reg_type][i_reg_pram] = run_exp.remote(i_rep, args_run, reg_type, reg_param)
				# end if
			# end for i_reg_pram
			# end for reg_type

			# Gather results:
			for reg_type in reg_types:
				for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]):
					# ray get
					if out_ids[reg_type][i_reg_pram] is not None:
						out = ray.get(out_ids[reg_type][i_reg_pram])
						loss[reg_type][i_hyper_grid, i_reg_pram, i_rep] = out
				# end if
			# end for i_reg_pram
		# end for reg_type
		# end for i_hyper_grid

		# Save results so far
		results_dict = {'hyper_grid_vals': hyper_grid_vals, 'loss': loss, 'reg_grids': reg_grids, 'n_reps_finished': i_rep + 1}
		save_run_data(args, results_dict, verbose=0)
		time_str = time.strftime("%H hours, %M minutes and %S seconds",  time.gmtime(timeit.default_timer() - start_time))
		write_to_log('Finished: {} out of {} reps, time: {}'.format(i_rep + 1, n_reps, time_str), args)

	# end for i_rep

	stop_time = timeit.default_timer()
	write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args)
	return results_dict
예제 #4
0
def run_simulations(args, save_result, local_mode):
    args_def = deepcopy(args)
    start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    param_val_grid = get_grid(args.param_grid_def)
    n_grid = param_val_grid.shape[0]

    config_grid = get_grid(args.config_grid_def)
    n_configs = len(config_grid)
    args.n_configs = n_configs

    loss_mat = np.zeros((n_reps, n_configs, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid,
                                         args)
        loss_rep_id_lst.append(loss_mat_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps),
                     args)
        loss_mat[i_rep] = loss_rep
    # end for i_rep
    info_dict = {
        'loss_avg': loss_mat.mean(axis=0),
        'loss_std': loss_mat.std(axis=0),
        'param_val_grid': param_val_grid,
        'config_grid': config_grid
    }
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    write_to_log(
        ['-' * 10 + 'Defined args: ',
         pretty_print_args(args_def), '-' * 20], args)
    return info_dict
def run_simulations(args, local_mode):
    start_ray(local_mode)
    create_result_dir(args)
    write_to_log('local_mode == {}'.format(local_mode), args)
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    l2_grid = get_grid(args.l2_grid_def)
    gam_grid = get_grid(args.gam_grid_def)
    write_to_log('gamma_grid == {}'.format(gam_grid), args)
    write_to_log('l2_grid == {}'.format(l2_grid), args)
    grid_shape = (len(l2_grid), len(gam_grid))
    loss_avg = np.zeros(grid_shape)
    loss_std = np.zeros(grid_shape)

    run_idx = 0
    for i0 in range(grid_shape[0]):
        for i1 in range(grid_shape[1]):
            args_run = deepcopy(args)
            args_run.param_grid_def = {
                'type': 'L2_factor',
                'spacing': 'list',
                'list': [l2_grid[i0]]
            }
            args_run.default_gamma = gam_grid[i1]

            info_dict = run_main_control(args_run,
                                         save_result=False,
                                         plot=False,
                                         init_ray=False)
            loss_avg[i0, i1] = info_dict['planing_loss_avg'][0]
            loss_std[i0, i1] = info_dict['planing_loss_std'][0]
            run_idx += 1
            print("Finished {}/{}".format(run_idx, loss_avg.size))
        # end for
    # end for
    grid_results_dict = {
        'l2_grid': l2_grid,
        'gam_grid': gam_grid,
        'loss_avg': loss_avg,
        'loss_std': loss_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
예제 #6
0
def run_exp(i_rep, args_run, reg_type, reg_param):
	# set seed
	set_random_seed(args_run.seed + i_rep)

	# Generate MDP and sampling distribution (with specified uniformity)
	M = MRP(args_run)

	gammaEval = args_run.gammaEval

	# set regularisation parameters
	gamma_guidance, l2_TD, l2_fp, l2_proj = get_regularization_params(args_run, reg_param, reg_type)

	# Generate data:
	data = M.SampleDataMrp(args_run)

	V_est, V_true = run_value_estimation_method(data, M, args_run, gamma_guidance, l2_proj, l2_fp, l2_TD)

	loss_type = args_run.evaluation_loss_type
	pi = None
	eval_loss = evaluate_value_estimation(loss_type, V_true, V_est, M, pi, gammaEval, gamma_guidance)

	return eval_loss