def simulate_random_state_action_distribution():
    """Function to simulate a random action distribution for both the agents
    """
    r1_dist = compute_random_state_action_distribution()
    r2_dist = compute_random_state_action_distribution()
    start_state = random.choice(task_start_states_list)
    n_actions = sf.run_simulation(r1_dist, r2_dist, start_state)
    lgr.debug("Total number of actions by agents using expert policy is %d" % n_actions)
    return n_actions
Пример #2
0
    n_actions_random = np.zeros(n_trials)
    n_actions_learned = np.zeros(n_trials)
    lgr.info("Loading best_dists.pickle file")
    with open("../pickles/best_dists.pickle", "r") as best_dists_file:
        r1_best_dists = pickle.load(best_dists_file)
        r2_best_dists = pickle.load(best_dists_file)

    for start_state in task_start_states_list:
        r1_best_dist = random.choice(r1_best_dists[start_state])
        r2_best_dist = random.choice(r2_best_dists[start_state])

        for i in range(n_trials):
            expert_state_action_distribution = ex.compute_expert_state_action_distribution()
            n_actions_expert[i] = sf.run_simulation(expert_state_action_distribution, expert_state_action_distribution, start_state)

            random_state_action_distribution = compute_random_state_action_distribution()
            n_actions_random[i] = sf.run_simulation(random_state_action_distribution, random_state_action_distribution, start_state)

            n_actions_learned[i] = sf.run_simulation(r1_best_dist, r2_best_dist, start_state)
        lgr.info("%s", colored("Number of trials = %d" % n_trials, 'white', attrs = ['bold']))
        lgr.info("%s", colored("Metric: Number of actions per trial", 'white', attrs = ['bold']))
        lgr.info("%s", colored("Start State: %s" % str(start_state), 'magenta', attrs = ['bold']))
        lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold']))
        lgr.info("%s%s%s", colored("                Expert Policy            ", 'red', attrs = ['bold']), colored("Learned Policy        ", 'green', attrs = ['bold']), colored("Random Policy", 'blue', attrs = ['bold']))
        lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MIN:", 'white', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MAX:", 'white', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEAN:", 'white', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MODE:", 'white', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_expert)[0][0], '.3f'), 'red', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_learned)[0][0], '.3f'), 'green', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_random)[0][0], '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEDIAN:", 'white', attrs = ['bold']), colored("%s" % format(np.median(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.median(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.median(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("VAR:", 'white', attrs = ['bold']), colored("%s" % format(np.var(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.var(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.var(n_actions_random), '.3f'), 'blue', attrs = ['bold']))