def simulate_random_state_action_distribution(): """Function to simulate a random action distribution for both the agents """ r1_dist = compute_random_state_action_distribution() r2_dist = compute_random_state_action_distribution() start_state = random.choice(task_start_states_list) n_actions = sf.run_simulation(r1_dist, r2_dist, start_state) lgr.debug("Total number of actions by agents using expert policy is %d" % n_actions) return n_actions
n_actions_random = np.zeros(n_trials) n_actions_learned = np.zeros(n_trials) lgr.info("Loading best_dists.pickle file") with open("../pickles/best_dists.pickle", "r") as best_dists_file: r1_best_dists = pickle.load(best_dists_file) r2_best_dists = pickle.load(best_dists_file) for start_state in task_start_states_list: r1_best_dist = random.choice(r1_best_dists[start_state]) r2_best_dist = random.choice(r2_best_dists[start_state]) for i in range(n_trials): expert_state_action_distribution = ex.compute_expert_state_action_distribution() n_actions_expert[i] = sf.run_simulation(expert_state_action_distribution, expert_state_action_distribution, start_state) random_state_action_distribution = compute_random_state_action_distribution() n_actions_random[i] = sf.run_simulation(random_state_action_distribution, random_state_action_distribution, start_state) n_actions_learned[i] = sf.run_simulation(r1_best_dist, r2_best_dist, start_state) lgr.info("%s", colored("Number of trials = %d" % n_trials, 'white', attrs = ['bold'])) lgr.info("%s", colored("Metric: Number of actions per trial", 'white', attrs = ['bold'])) lgr.info("%s", colored("Start State: %s" % str(start_state), 'magenta', attrs = ['bold'])) lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold'])) lgr.info("%s%s%s", colored(" Expert Policy ", 'red', attrs = ['bold']), colored("Learned Policy ", 'green', attrs = ['bold']), colored("Random Policy", 'blue', attrs = ['bold'])) lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold'])) lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MIN:", 'white', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_random), '.3f'), 'blue', attrs = ['bold'])) lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MAX:", 'white', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_random), '.3f'), 'blue', attrs = ['bold'])) lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEAN:", 'white', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_random), '.3f'), 'blue', attrs = ['bold'])) lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MODE:", 'white', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_expert)[0][0], '.3f'), 'red', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_learned)[0][0], '.3f'), 'green', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_random)[0][0], '.3f'), 'blue', attrs = ['bold'])) lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEDIAN:", 'white', attrs = ['bold']), colored("%s" % format(np.median(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.median(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.median(n_actions_random), '.3f'), 'blue', attrs = ['bold'])) lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("VAR:", 'white', attrs = ['bold']), colored("%s" % format(np.var(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.var(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.var(n_actions_random), '.3f'), 'blue', attrs = ['bold']))