Python compute_random_state_action_distribution示例

编程语言: Python

命名空间/包名称: helperFuncs

方法/功能: compute_random_state_action_distribution

hotexamples.com的示例: 2

Python compute_random_state_action_distribution - 已找到2个示例。这些是从开源项目中提取的最受好评的helperFuncs.compute_random_state_action_distribution现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： randomActionDistribution.py 项目： sudarshan85/ht_model

def simulate_random_state_action_distribution():
    """Function to simulate a random action distribution for both the agents
    """
    r1_dist = compute_random_state_action_distribution()
    r2_dist = compute_random_state_action_distribution()
    start_state = random.choice(task_start_states_list)
    n_actions = sf.run_simulation(r1_dist, r2_dist, start_state)
    lgr.debug("Total number of actions by agents using expert policy is %d" % n_actions)
    return n_actions

示例#2

显示文件

文件： comapreDistributions.py 项目： sudarshan85/ht_model

    n_actions_random = np.zeros(n_trials)
    n_actions_learned = np.zeros(n_trials)
    lgr.info("Loading best_dists.pickle file")
    with open("../pickles/best_dists.pickle", "r") as best_dists_file:
        r1_best_dists = pickle.load(best_dists_file)
        r2_best_dists = pickle.load(best_dists_file)

    for start_state in task_start_states_list:
        r1_best_dist = random.choice(r1_best_dists[start_state])
        r2_best_dist = random.choice(r2_best_dists[start_state])

        for i in range(n_trials):
            expert_state_action_distribution = ex.compute_expert_state_action_distribution()
            n_actions_expert[i] = sf.run_simulation(expert_state_action_distribution, expert_state_action_distribution, start_state)

            random_state_action_distribution = compute_random_state_action_distribution()
            n_actions_random[i] = sf.run_simulation(random_state_action_distribution, random_state_action_distribution, start_state)

            n_actions_learned[i] = sf.run_simulation(r1_best_dist, r2_best_dist, start_state)
        lgr.info("%s", colored("Number of trials = %d" % n_trials, 'white', attrs = ['bold']))
        lgr.info("%s", colored("Metric: Number of actions per trial", 'white', attrs = ['bold']))
        lgr.info("%s", colored("Start State: %s" % str(start_state), 'magenta', attrs = ['bold']))
        lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold']))
        lgr.info("%s%s%s", colored("                Expert Policy            ", 'red', attrs = ['bold']), colored("Learned Policy        ", 'green', attrs = ['bold']), colored("Random Policy", 'blue', attrs = ['bold']))
        lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MIN:", 'white', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MAX:", 'white', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEAN:", 'white', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MODE:", 'white', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_expert)[0][0], '.3f'), 'red', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_learned)[0][0], '.3f'), 'green', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_random)[0][0], '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEDIAN:", 'white', attrs = ['bold']), colored("%s" % format(np.median(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.median(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.median(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("VAR:", 'white', attrs = ['bold']), colored("%s" % format(np.var(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.var(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.var(n_actions_random), '.3f'), 'blue', attrs = ['bold']))