def calculate_pairwise_distances(self): self.distances_purchase = [] for u, v in itertools.combinations(self.purchases, 2): wd = pe.get_wd(u, v, normalize) self.distances_purchase.append(wd) self.distances_no_purchase = [] for u, v in itertools.combinations(self.no_purchases, 2): wd = pe.get_wd(u, v, normalize) self.distances_no_purchase.append(wd)
def calc_avg_dist_from_centroid(self): temp = [] for d in self.purchases: temp.append(pe.get_wd(d, self.avg_purchase, normalize)) self.avg_dist_purchase = np.mean(temp) temp = [] for d in self.no_purchases: temp.append(pe.get_wd(d, self.avg_no_purchase, normalize)) self.avg_dist_no_purchase = np.mean(temp)
def plot_distr_categories( args, info, ending_png, expert_actions, agent_actions, expert_purchase, agent_purchase, expert_no_purchase, agent_no_purchase, agent_n_shopping_days, expert_n_shopping_days, category=1 ): # Calculate Wasserstein distances wd_purchase = pe.get_wd(expert_purchase, agent_purchase, normalize) wd_no_purchase = pe.get_wd(expert_no_purchase, agent_no_purchase, normalize) n_experts = 2 if (args['state_rep'] == 24 or args['state_rep'] == 31) else args['n_experts'] agent_shopping_ratio = format(agent_n_shopping_days / (args['n_experts'] * sample_length), '.3f') expert_shopping_ratio = format(expert_n_shopping_days / (n_experts * sample_length), '.3f') expert_str = 'Expert (p.r.: ' + str(expert_shopping_ratio) + ')' agent_str = 'Agent (p.r.: ' + str(agent_shopping_ratio) + ')' fig, (ax1, ax2) = plt.subplots(1, 2) fig.suptitle('Comparison at population level for category ' + str(category)) # Plot (purchase) data = {expert_str: expert_purchase, agent_str: agent_purchase} pe.bar_plot(ax1, data, colors=None, total_width=0.7) ax1.set_xticks([], []) ax1.set_title('Purchase | EMD: {:.5f}'.format(wd_purchase)) # ax1.set_title('Last week | Purchase today') ax1.set_ylabel('Probability') # Plot (no purchase) data = {expert_str: expert_no_purchase, agent_str: agent_no_purchase} pe.bar_plot(ax2, data, colors=None, total_width=0.7) ax2.set_xticks([], []) ax2.set_title('No purchase | EMD: {:.5f}'.format(wd_no_purchase)) # ax2.set_title('Last week | No purchase today') ax2.set_ylabel('Probability') if show_info: fig.text(0.5, 0.025, info, ha='center') if save_plots: save_plt_as_png(fig, path=join(dir_path, 'figs', 'pop' + 'cat' + str(category) + ending_png)) if args['state_rep'] == 23: # Plot histogram of purchase amounts expert_amounts = np.ravel(expert_actions)[np.flatnonzero(expert_actions)] agent_amounts = np.ravel(agent_actions)[np.flatnonzero(agent_actions)] fig, ax = plt.subplots() ax.hist(expert_amounts, bins=np.arange(1, 11), alpha=0.8, density=True, label='Expert') ax.hist(agent_amounts, bins=np.arange(1, 11), alpha=0.8, density=True, label='Agent') ax.set_xlabel('Purchase amount') ax.set_ylabel('Normalized frequency') ax.legend() if show_info: fig.text(0.5, 0.025, info, ha='center') if save_plots: save_plt_as_png(fig, path=join(dir_path, 'figs', 'pop_amounts' + ending_png))
def evaluate_policy_at_population_level(args, model_dir_path, ending_eps, ending_png, info): # Load environment, model and observation normalizer env, model, obs_normalizer = pe.get_env_and_model(args, model_dir_path, sample_length, model_path=model_path) # Get possible validation states possible_val_states = pe.get_possible_val_states(n_last_days, max_n_purchases_per_n_last_days) # Sample expert data expert_trajectories = env.generate_expert_trajectories(out_dir=None) expert_states = expert_trajectories['states'] expert_actions = expert_trajectories['actions'] expert_purchase, expert_no_purchase, expert_n_shopping_days = pe.get_cond_distribs( expert_states, expert_actions, n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) n_experts = 2 if (args['state_rep'] == 24 or args['state_rep'] == 31) else args['n_experts'] # Sample agent data agent_states = [] agent_actions = [] for i in range(n_experts): initial_state = random.choice(expert_states[i]) # What happens if the agent is repeatedly initialized with history from expert with unique behavior? # Should we collect more than n_experts samples (especially if n_experts <= 10)? temp_states, temp_actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state) agent_states.append(temp_states) agent_actions.append(temp_actions) agent_purchase, agent_no_purchase, agent_n_shopping_days = pe.get_cond_distribs( agent_states, agent_actions, n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) # Calculate Wasserstein distances wd_purchase = pe.get_wd(expert_purchase, agent_purchase, normalize) wd_no_purchase = pe.get_wd(expert_no_purchase, agent_no_purchase, normalize) agent_shopping_ratio = format(agent_n_shopping_days / (n_experts * sample_length), '.3f') expert_shopping_ratio = format(expert_n_shopping_days / (n_experts * sample_length), '.3f') expert_str = 'Expert (p.r.: ' + str(expert_shopping_ratio) + ')' agent_str = 'Agent (p.r.: ' + str(agent_shopping_ratio) + ')' fig, (ax1, ax2) = plt.subplots(1, 2) fig.suptitle('Comparison at population level') # Plot (purchase) data = {expert_str: expert_purchase, agent_str: agent_purchase} pe.bar_plot(ax1, data, colors=None, total_width=0.7) ax1.set_xticks([], []) ax1.set_title('Purchase | EMD: {:.5f}'.format(wd_purchase)) # ax1.set_title('Last week | Purchase today') ax1.set_ylabel('Probability') # Plot (no purchase) data = {expert_str: expert_no_purchase, agent_str: agent_no_purchase} pe.bar_plot(ax2, data, colors=None, total_width=0.7) ax2.set_xticks([], []) ax2.set_title('No purchase | EMD: {:.5f}'.format(wd_no_purchase)) # ax2.set_title('Last week | No purchase today') ax2.set_ylabel('Probability') if show_info: fig.text(0.5, 0.025, info, ha='center') if save_plots: save_plt_as_png(fig, path=join(dir_path, 'figs', 'pop' + ending_png)) if not show_plots: plt.close(fig) if args['state_rep'] == 23: # Plot histogram of purchase amounts expert_amounts = np.ravel(expert_actions)[np.flatnonzero(expert_actions)] agent_amounts = np.ravel(agent_actions)[np.flatnonzero(agent_actions)] fig, ax = plt.subplots() ax.hist(expert_amounts, bins=np.arange(1, 11), alpha=0.8, density=True, label='Expert') ax.hist(agent_amounts, bins=np.arange(1, 11), alpha=0.8, density=True, label='Agent') ax.set_xlabel('Purchase amount') ax.set_ylabel('Normalized frequency') ax.legend() if show_info: fig.text(0.5, 0.025, info, ha='center') if save_plots: save_plt_as_png(fig, path=join(dir_path, 'figs', 'pop_amounts' + ending_png)) if not show_plots: plt.close(fig) if show_plots: plt.show()
def evaluate_policy_at_individual_level(args, model_dir_path, ending_eps, ending_png, info): # Load environment, model and observation normalizer env, model, obs_normalizer = pe.get_env_and_model(args, model_dir_path, sample_length, model_path=model_path) # Get possible validation states possible_val_states = pe.get_possible_val_states(n_last_days, max_n_purchases_per_n_last_days) # Sample expert data to calculate average expert behavior expert_trajectories = env.generate_expert_trajectories(out_dir=None, n_demos_per_expert=1, n_expert_time_steps=sample_length) expert_states = expert_trajectories['states'] expert_actions = expert_trajectories['actions'] sex = ['F' if s == 1 else 'M' for s in expert_trajectories['sex']] age = [int(a) for a in expert_trajectories['age']] avg_expert_purchase, avg_expert_no_purchase, avg_expert_n_shopping_days = pe.get_cond_distribs( expert_states, expert_actions, n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) n_experts = 2 if (args['state_rep'] == 24 or args['state_rep'] == 31) else args['n_experts'] avg_expert_shopping_ratio = format(avg_expert_n_shopping_days / (n_experts * sample_length), '.2f') all_expert_indices = range(n_experts) expert_indices_list = [] for i in range(0, n_experts, 4): try: expert_indices_list.append(all_expert_indices[i:i+4]) except IndexError: expert_indices_list.append(all_expert_indices[i:]) for j, expert_indices in enumerate(expert_indices_list): fig1, axes1 = plt.subplots(2, 2, sharex='col') fig2, axes2 = plt.subplots(2, 2, sharex='col') for i, ax1, ax2 in zip(expert_indices, axes1.flat, axes2.flat): # Sample agent data starting with expert's history initial_state = random.choice(expert_states[i]) agent_states, agent_actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state) agent_purchase, agent_no_purchase, agent_n_shopping_days = pe.get_cond_distribs( [agent_states], [agent_actions], n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) agent_shopping_ratio = format(agent_n_shopping_days / sample_length, '.3f') expert_purchase, expert_no_purchase, expert_n_shopping_days = pe.get_cond_distribs( [expert_states[i]], [expert_actions[i]], n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) expert_shopping_ratio = format(expert_n_shopping_days / sample_length, '.3f') if args['state_rep'] == 23: expert_histo, _ = np.histogram(expert_actions[i], bins=range(11)) agent_histo, _ = np.histogram(agent_actions, bins=range(11)) # Calculate Wasserstein distances wd_purchase = pe.get_wd(expert_purchase, agent_purchase, normalize) wd_purchase_avg = pe.get_wd(avg_expert_purchase, agent_purchase, normalize) wd_no_purchase = pe.get_wd(expert_no_purchase, agent_no_purchase, normalize) wd_no_purchase_avg = pe.get_wd(avg_expert_no_purchase, agent_no_purchase, normalize) expert_str = 'Expert (p.r.: ' + str(expert_shopping_ratio) + ')' agent_str = 'Agent (p.r.: ' + str(agent_shopping_ratio) + ')' avg_expert_str = 'Avg. expert (p.r.: ' + str(avg_expert_shopping_ratio) + ')' # Plot (purchase) data = {expert_str: expert_purchase, agent_str: agent_purchase, avg_expert_str: avg_expert_purchase} pe.bar_plot(ax1, data, colors=None, total_width=0.7) ax1.set_title('Expert {}: {}, age {}\nEMD (expert): {:.5f} | EMD (avg. expert): {:.5f}'.format(i+1, sex[i], age[i], wd_purchase, wd_purchase_avg)) # Plot (no purchase) data = {expert_str: expert_no_purchase, agent_str: agent_no_purchase, avg_expert_str: avg_expert_no_purchase} pe.bar_plot(ax2, data, colors=None, total_width=0.7) ax2.set_title('Expert {}: {}, age {}\nEMD (expert): {:.5f} | EMD (avg. expert): {:.5f}'.format(i+1, sex[i], age[i], wd_purchase, wd_purchase_avg)) fig1.suptitle('Comparison at individual level (purchase)') fig2.suptitle('Comparison at individual level (no purchase)') if show_info: for ax1, ax2 in zip(axes1[1][:], axes2[1][:]): ax1.set_xticks([], []) ax2.set_xticks([], []) fig1.text(0.5, 0.025, info, ha='center') fig2.text(0.5, 0.025, info, ha='center') else: fig1.subplots_adjust(bottom=0.2) fig2.subplots_adjust(bottom=0.2) for ax1, ax2 in zip(axes1[1][:], axes2[1][:]): pe.set_xticks(ax1, possible_val_states, max_n_purchases_per_n_last_days) pe.set_xticks(ax2, possible_val_states, max_n_purchases_per_n_last_days) if save_plots: save_plt_as_png(fig1, path=join(dir_path, 'figs', 'ind_purchase_' + str(j+1) + ending_png)) if save_plots: save_plt_as_png(fig2, path=join(dir_path, 'figs', 'ind_no_purchase_' + str(j+1) + ending_png)) if not show_plots: plt.close(fig1) plt.close(fig2) if show_plots: plt.show()
def compare_clusters(args, model_dir_path, ending_eps, ending_png, info): # Load environment, model and observation normalizer env, model, obs_normalizer = pe.get_env_and_model(args, model_dir_path, sample_length, model_path=model_path) # Get possible validation states possible_val_states = pe.get_possible_val_states(n_last_days, max_n_purchases_per_n_last_days) # Get multiple samples from each expert assert (sample_length % n_demos_per_expert) == 0 expert_trajectories = env.generate_expert_trajectories( out_dir=None, n_demos_per_expert=n_demos_per_expert, n_expert_time_steps=int(sample_length / n_demos_per_expert) ) expert_states = np.array(expert_trajectories['states']) expert_actions = np.array(expert_trajectories['actions']) sex = ['F' if s == 1 else 'M' for s in expert_trajectories['sex']] age = [int(a) for a in expert_trajectories['age']] n_experts = 2 if (args['state_rep'] == 24 or args['state_rep'] == 31) else args['n_experts'] experts = [] for states, actions in zip(np.split(expert_states, n_experts), np.split(expert_actions, n_experts)): # Loop over experts purchases = [] no_purchases = [] for s, a in zip(states, actions): # Loop over demonstrations temp_purchase, temp_no_purchase, _ = pe.get_cond_distribs( [s], [a], n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) purchases.append(temp_purchase) no_purchases.append(temp_no_purchase) avg_purchase, avg_no_purchase, _ = pe.get_cond_distribs( states, actions, n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) experts.append(Expert(purchases, no_purchases, avg_purchase, avg_no_purchase)) # Calculate average expert behavior expert_trajectories = env.generate_expert_trajectories(out_dir=None, n_demos_per_expert=1, n_expert_time_steps=sample_length) expert_states = expert_trajectories['states'] expert_actions = expert_trajectories['actions'] avg_expert_purchase, avg_expert_no_purchase, _ = pe.get_cond_distribs( expert_states, expert_actions, n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) if cluster_comparison and (args['state_rep'] != 24 or args['state_rep'] != 31): # Cluster expert data (purcase) X = np.array([e.avg_purchase for e in experts]) T_purchase = fclusterdata(X, 3, 'maxclust', method='single', metric=lambda u, v: wasserstein_distance(u, v)) T_purchase = pe.get_cluster_labels(T_purchase) # Cluster expert data (purcase) X = np.array([e.avg_no_purchase for e in experts]) T_no_purchase = fclusterdata(X, 3, 'maxclust', method='single', metric=lambda u, v: wasserstein_distance(u, v)) T_no_purchase = pe.get_cluster_labels(T_no_purchase) assert np.array_equal(T_purchase, T_no_purchase) cluster_indices = [np.argwhere(T_purchase == i) for i in [1, 2, 3]] distances_purchase = [] distances_no_purchase = [] all_distances_purchase = [] all_distances_no_purchase = [] for i in range(n_experts): # Sample agent data starting with expert's history initial_state = random.choice(expert_states[i]) agent_states, agent_actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state) agent_purchase, agent_no_purchase, _ = pe.get_cond_distribs( [agent_states], [agent_actions], n_last_days, max_n_purchases_per_n_last_days, normalize, case=args['state_rep'] ) e = experts[i] # Compare distributions (purchase) if cluster_comparison and (args['state_rep'] != 24 or args['state_rep'] != 31): temp = [e.avg_dist_purchase] temp.append(pe.get_wd(e.avg_purchase, agent_purchase, normalize)) temp.append(pe.get_wd(avg_expert_purchase, agent_purchase, normalize)) temp.append(np.mean([pe.get_wd(experts[j[0]].avg_purchase, agent_purchase, normalize) for j in cluster_indices[0]])) temp.append(np.mean([pe.get_wd(experts[j[0]].avg_purchase, agent_purchase, normalize) for j in cluster_indices[1]])) temp.append(np.mean([pe.get_wd(experts[j[0]].avg_purchase, agent_purchase, normalize) for j in cluster_indices[2]])) distances_purchase.append(temp) temp = [pe.get_wd(e.avg_purchase, agent_purchase, normalize) for e in experts] temp.append(pe.get_wd(avg_expert_purchase, agent_purchase, normalize)) all_distances_purchase.append(temp) # Compare distributions (no purchase) if cluster_comparison and (args['state_rep'] != 24 or args['state_rep'] != 31): temp = [e.avg_dist_no_purchase] temp.append(pe.get_wd(e.avg_no_purchase, agent_no_purchase, normalize)) temp.append(pe.get_wd(avg_expert_no_purchase, agent_no_purchase, normalize)) temp.append(np.mean([pe.get_wd(experts[j[0]].avg_no_purchase, agent_no_purchase, normalize) for j in cluster_indices[0]])) temp.append(np.mean([pe.get_wd(experts[j[0]].avg_no_purchase, agent_no_purchase, normalize) for j in cluster_indices[1]])) temp.append(np.mean([pe.get_wd(experts[j[0]].avg_no_purchase, agent_no_purchase, normalize) for j in cluster_indices[2]])) distances_no_purchase.append(temp) temp = [pe.get_wd(e.avg_no_purchase, agent_no_purchase, normalize) for e in experts] temp.append(pe.get_wd(avg_expert_no_purchase, agent_no_purchase, normalize)) all_distances_no_purchase.append(temp) if cluster_comparison and (args['state_rep'] != 24 or args['state_rep'] != 31): ##### Plot distance to one expert ##### columns = ['Var. in expert cluster', 'Dist. to expert', 'Dist. to avg. expert', 'Dist. to 1st cluster 1', 'Dist. to 2nd cluster', 'Dist. to 3rd cluster'] index = ['E{}\n({})'.format(i + 1, int(T_purchase[i])) for i in range(n_experts)] fig, (ax1, ax2) = plt.subplots(1, 2) fig.subplots_adjust(bottom=0.30) # Plot distance (purchase) distances_purchase = pd.DataFrame(distances_purchase, columns=columns, index=index) seaborn.heatmap(distances_purchase, cmap='BuPu', ax=ax1, linewidth=1, cbar_kws={'label': 'EMD'}) ax1.set_title('Purchase') distances_no_purchase = pd.DataFrame(distances_no_purchase, columns=columns, index=index) seaborn.heatmap(distances_no_purchase, cmap='BuPu', ax=ax2, linewidth=1, cbar_kws={'label': 'EMD'}) ax2.set_title('No purchase') if show_info: fig.text(0.5, 0.025, info, ha='center') if save_plots: save_plt_as_png(fig, path=join(dir_path, 'figs', 'heatmap' + ending_png)) if not show_plots: plt.close(fig) ##### Plot distance to all experts ##### fig, (ax1, ax2) = plt.subplots(1, 2, sharey='row') fig.subplots_adjust(bottom=0.25) columns = ['Expert {}'.format(i + 1) for i in range(n_experts)] columns.append('Avg. expert') index = ['Agent {}'.format(i + 1) for i in range(n_experts)] # Plot the distance between each expert cluster (purcahse) all_distances_purchase = pd.DataFrame(all_distances_purchase, columns=columns, index=index) seaborn.heatmap(all_distances_purchase, cmap='BuPu', ax=ax1, linewidth=1, cbar_kws={'label': 'EMD'}) ax1.set_title('Purchase') # Plot the distance between each expert cluster (no purcahse) all_distances_no_purchase = pd.DataFrame(all_distances_no_purchase, columns=columns, index=index) seaborn.heatmap(all_distances_no_purchase, cmap='BuPu', ax=ax2, linewidth=1, cbar_kws={'label': 'EMD'}) ax2.set_title('No purchase') if show_info: fig.text(0.5, 0.025, info, ha='center') if save_plots: save_plt_as_png(fig, path=join(dir_path, 'figs', 'heatmap_all' + ending_png)) if not show_plots: plt.close(fig) if show_plots: plt.show() '''