def plot_action_v010(action, action_name=None): H = action.shape[0] dA = action.shape[1] fig, axs = subplots(dA, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) if action_name is None: action_name = 'Action' fig.suptitle('%s Trajectory' % action_name, fontweight='bold') # fig = plt.figure() # ax = fig.add_subplot(n_reward_vector, 1, 1) ts = np.arange(H) for aa in range(dA): axs[aa].plot(ts, action[:, aa], color=ACTION_COLORS[aa], linestyle='-') axs[aa].set_ylabel('%s %02d' % (action_name, aa)) # ax = fig.add_subplot(n_reward_vector, 1, rr+1) # ax.plot(ts, data) axs[-1].set_xlabel('Time') # axs[-1].legend() plt.show(block=False)
def plot_state_v010(state, state_name=None): H = state.shape[0] dS = state.shape[1] fig, axs = subplots(dS, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) if state_name is None: state_name = 'State' fig.suptitle('%s Trajectory' % state_name, fontweight='bold') # fig = plt.figure() # ax = fig.add_subplot(n_reward_vector, 1, 1) ts = np.arange(H) for ss in range(dS): axs[ss].plot(ts, state[:, ss], color=STATE_COLORS[ss], linestyle='-') axs[ss].set_ylabel('State %02d' % ss) # ax = fig.add_subplot(n_reward_vector, 1, rr+1) # ax.plot(ts, data) axs[-1].set_xlabel('Time') # axs[-1].legend() plt.show(block=False)
def plot_weigths_unintentionals(path_list, block=False): """Plot the weights of the set of unintentional policies.""" if 'mixing_coeff' not in path_list['agent_infos'][-1]: print('There is not mixing_coeff. Then not plotting anything!') return H = len(path_list['agent_infos']) act_dim = path_list['agent_infos'][-1]['mixing_coeff'].shape[0] n_unintentional = path_list['agent_infos'][-1]['mixing_coeff'].shape[1] fig, axs = subplots(act_dim, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Mixing weights for Unintentional Policies', fontweight='bold') data = np.zeros((H, act_dim, n_unintentional)) for tt in range(H): data[tt] = path_list['agent_infos'][tt]['mixing_coeff'] # print(tt, '|', data[tt]) ts = np.arange(H) for aa in range(act_dim): # axs[aa].plot(ts, data[:, aa, :], color=COMPO_COLORS[aa], linestyle=':') axs[aa].plot(ts, data[:, aa, :], linestyle=':') axs[aa].set_ylabel('U - %02d' % aa) axs[aa].set_xlabel('Time step') # axs[aa].set_ylim(-0.1, 1.1) plt.show(block=block) return fig, axs
def plot_weights(agent_infos, observations, obj_idxs, latex_plot=False, block=False): T = len(agent_infos) nUnint = agent_infos[-1]['mixing_coeff'].shape[0] dA = agent_infos[-1]['mixing_coeff'].shape[1] all_data = np.zeros((T, nUnint, dA)) touching = np.zeros(T) for t in range(T): all_data[t] = agent_infos[t]['mixing_coeff'] dist = np.linalg.norm(observations[t][obj_idxs]) print(t, dist) touching[t] = dist < 0.1 if latex_plot: set_latex_plot() fig, axs = subplots(dA) if not isinstance(axs, np.ndarray): axs = np.array([axs]) lines = list() labels = list() Ts = 1e-3 time = np.arange(T)*Ts for aa, ax in enumerate(axs): for uu in range(nUnint): plot_w = ax.plot(time, all_data[:, uu, aa])[0] if aa == 0: lines.append(plot_w) labels.append('Weight U-%02d' % uu) ax.set_ylabel('Action %d' % (aa+1), fontsize=35) plot_t = ax.plot(time, touching)[0] if aa == 0: lines.append(plot_t) labels.append('Close to cylinder') axs[-1].set_xlabel('Time (s)', fontsize=35) legend = fig.legend(lines, labels, loc='lower center', ncol=3, labelspacing=0., prop={'size': 30}, fancybox=True, #bbox_to_anchor=(1, 1), ) fig.set_size_inches(19, 11) # 1920 x 1080 fig.tight_layout() fig.subplots_adjust(bottom=0.15) legend.draggable(True) plt.show(block=block)
def plot_process_general_data(csv_file, block=False): labels_to_plot = [ # 'mean-sq-bellman-error', # 'Bellman Residual (QFcn)', # 'Surrogate Reward (Policy)', # 'return-average', 'Exploration Returns Mean', 'Test Returns Mean', # 'episode-length-min', # 'episode-length-max', # 'Log Pis' ] # if n_unintentional is None: # n_unintentional = 0 # else: # n_unintentional += 1 n_unintentional = 0 # # # Add Intentional-Unintentional Label # new_labels = list() # for label in labels_to_plot: # for uu in range(n_unintentional): # new_string = ('[U-%02d] ' % uu) + label # new_labels.append(new_string) # # new_string = '[I] ' + label # new_labels.append(new_string) new_labels = labels_to_plot n_subplots = len(labels_to_plot) * (n_unintentional + 1) data = get_csv_data(csv_file, new_labels) fig, axs = subplots(n_subplots) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('General Info', fontweight='bold') for aa, ax in enumerate(axs): ax.plot(data[aa]) ax.set_ylabel(new_labels[aa]) plt.setp(ax.get_xticklabels(), visible=False) axs[-1].set_xlabel('Episodes') plt.setp(axs[-1].get_xticklabels(), visible=True) plt.show(block=block)
def plot_reward_composition_v010(cost_list, ignore_last=False, plot_last=False): n_reward_vector = len(cost_list) H = len(cost_list[-1]) fig, axs = subplots(n_reward_vector + 1, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Reward composition', fontweight='bold') # fig = plt.figure() # ax = fig.add_subplot(n_reward_vector, 1, 1) data = np.zeros((n_reward_vector + 1, H)) ts = np.arange(H) for rr in range(n_reward_vector): data[rr, :] = cost_list[rr] axs[rr].plot(ts, data[rr, :], color=COMPO_COLORS[rr], linestyle=':') axs[rr].set_ylabel('Reward %02d' % rr) # ax = fig.add_subplot(n_reward_vector, 1, rr+1) # ax.plot(ts, data) data[-1, :] = np.sum(data[:n_reward_vector, :], axis=0) if ignore_last: rewards_to_plot = n_reward_vector - 1 else: rewards_to_plot = n_reward_vector if plot_last: max_t = H else: max_t = H - 1 for rr in range(rewards_to_plot): axs[-1].plot(ts[:max_t], data[rr, :max_t], linestyle=':', label='%02d' % rr, color=COMPO_COLORS[rr]) axs[-1].plot(ts[:max_t], data[-1, :max_t], linewidth=2, color=COMPO_COLORS[n_reward_vector], label='Total Reward') axs[-1].set_xlabel('Time') axs[-1].legend() plt.show(block=False)
def plot_reward_composition(path_list, ignore_last=True, block=False): n_reward_vector = len(path_list['env_infos'][-1]['reward_vector']) H = len(path_list['env_infos']) fig, axs = subplots(n_reward_vector + 1, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Composition of Rewards', fontweight='bold') data = np.zeros((n_reward_vector + 1, H)) ts = np.arange(H) for rr in range(n_reward_vector): for tt in range(H): data[rr, tt] = \ path_list['env_infos'][tt]['reward_vector'][rr] axs[rr].plot(ts, data[rr, :], color=COMPO_COLORS[rr], linestyle=':') axs[rr].set_ylabel('%02d' % rr) # ax = fig.add_subplot(n_reward_vector, 1, rr+1) # ax.plot(ts, data) data[-1, :] = np.sum(data[:n_reward_vector, :], axis=0) if ignore_last: rewards_to_plot = n_reward_vector - 1 else: rewards_to_plot = n_reward_vector for rr in range(rewards_to_plot): axs[-1].plot(ts, data[rr, :], linestyle=':', label='%02d' % rr, color=COMPO_COLORS[rr]) axs[-1].plot(ts, data[-1, :], linewidth=2, color=COMPO_COLORS[n_reward_vector], label='Reward') axs[-1].set_ylabel('Reward') axs[-1].set_xlabel('Time step') axs[-1].legend() plt.show(block=block) return fig, axs
def plot_reward_iu(path_list, block=False): H = len(path_list['rewards']) n_unintentional = len(path_list['env_infos'][-1]['reward_multigoal']) fig, axs = subplots(n_unintentional + 1, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Reward of Intentional and Unintentional Policies', fontweight='bold') data = np.zeros((n_unintentional + 1, H)) ts = np.arange(H) for tt in range(H): for uu in range(n_unintentional): data[uu, tt] = \ path_list['env_infos'][tt]['reward_multigoal'][uu] # ax = fig.add_subplot(n_reward_vector, 1, rr+1) # ax.plot(ts, data) data[-1, :] = path_list['rewards'].squeeze() for aa, ax in enumerate(axs[:-1]): ax.plot(ts, data[aa, :], linestyle=':', label='U-%02d' % aa, color=COMPO_COLORS[aa]) ax.set_ylabel('Reward U-%02d' % aa) axs[-1].plot(ts, data[-1, :], linewidth=2, color=COMPO_COLORS[n_unintentional + 1], label='I') axs[-1].set_ylabel('Reward Intentional') axs[-1].set_xlabel('Time step') # axs[-1].legend() plt.show(block=block) return fig, axs
def plot_process_iu_alphas(csv_file, n_unintentional=None, block=False): labels_to_plot = ['Alphas'] if n_unintentional is None: n_unintentional = 0 else: n_unintentional += 1 # Add Intentional-Unintentional Label new_labels = list() for label in labels_to_plot: for uu in range(n_unintentional): new_string = ('[U-%02d] ' % uu) + label new_labels.append(new_string) new_string = '[I] ' + label new_labels.append(new_string) n_subplots = len(labels_to_plot) * (n_unintentional + 1) try: data = get_csv_data(csv_file, new_labels) except: print("There is no alphas data to show!!") return fig, axs = subplots(n_subplots) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Alphas', fontweight='bold') for aa, ax in enumerate(axs): ax.plot(data[aa]) ax.set_ylabel(new_labels[aa]) plt.setp(ax.get_xticklabels(), visible=False) axs[-1].set_xlabel('Episodes') plt.setp(axs[-1].get_xticklabels(), visible=True) plt.show(block=block)
def plot_process_haarnoja(csv_file, n_unintentional=None, block=False): labels_to_plot = ['return-average', 'episode-length-avg', 'log-pi-mean', 'log-sigs-mean'] if n_unintentional is None: n_unintentional = 0 else: n_unintentional += 1 # Add Intentional-Unintentional Label new_labels = list() for label in labels_to_plot: for uu in range(n_unintentional): new_string = ('[U-%02d] ' % uu) + label new_labels.append(new_string) # new_string = '[I] ' + label new_string = label new_labels.append(new_string) n_subplots = len(labels_to_plot) * (n_unintentional + 1) data = get_csv_data(csv_file, new_labels) fig, axs = subplots(n_subplots) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Avg Return and Avg Reward', fontweight='bold') for aa, ax in enumerate(axs): ax.plot(data[aa]) ax.set_ylabel(new_labels[aa]) plt.setp(ax.get_xticklabels(), visible=False) axs[-1].set_xlabel('Episodes') plt.setp(axs[-1].get_xticklabels(), visible=True) plt.show(block=block)
def plot_process_iu_avg_rewards(csv_file, n_unintentional=None, block=False): labels_to_plot = ['Test Rewards Mean'] if n_unintentional is None: n_unintentional = 0 else: n_unintentional += 1 # Add Intentional-Unintentional Label new_labels = list() for label in labels_to_plot: for uu in range(n_unintentional): new_string = ('[U-%02d] ' % uu) + label new_labels.append(new_string) new_string = '[I] ' + label new_labels.append(new_string) n_subplots = len(labels_to_plot) * (n_unintentional + 1) data = get_csv_data(csv_file, new_labels) fig, axs = subplots(n_subplots) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Rewards Mean', fontweight='bold') for aa, ax in enumerate(axs): ax.plot(data[aa]) ax.set_ylabel(new_labels[aa]) plt.setp(ax.get_xticklabels(), visible=False) axs[-1].set_xlabel('Episodes') plt.setp(axs[-1].get_xticklabels(), visible=True) print('total_iters:', len(data[-1])) plt.show(block=block)
def plot_q_vals(path_list, q_fcn, block=False): obs = path_list['observations'] actions = path_list['actions'] H = obs.shape[0] fig, axs = subplots(1, sharex=True) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Q-vals', fontweight='bold') q_values = q_fcn.get_values(obs, actions)[0] q_values.squeeze(-1) ts = np.arange(H) axs[-1].plot(ts, q_values) axs[-1].set_ylabel('Q-Value') axs[-1].set_xlabel('Time step') plt.show(block=block) return fig, axs
def main(args): labels_to_plot = ['AverageEpRet'] data = get_csv_data(args.file, labels_to_plot, space_separated=True) fig, axs = subplots(1) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Avg Return', fontweight='bold') max_iter = len(data[-1]) if args.max_iter > 0: max_iter = args.max_iter for aa, ax in enumerate(axs): ax.plot(data[aa][:max_iter]) ax.set_ylabel(labels_to_plot[aa]) plt.setp(ax.get_xticklabels(), visible=False) axs[-1].set_xlabel('Episodes') plt.setp(axs[-1].get_xticklabels(), visible=True) plt.show(block=True)
def plot_multiple_process_iu_returns( csv_file_dict, block=False, max_iter=500, steps_per_iter=None, latex_plot=True, fig_name_prefix=None, ): """ It plots the 'Test Returns Mean' label of the progress file. If algorithm of experiment is HIU, the unintentional data is considered an independent experiment. The keys of the categories dict are used for the axis title. The keys of the experiments dict are used for the labels in the legend. Args: csv_file_dict (dict): A dictionary of categories. - Category (dict): One figure per category. - Experiment (list): One for each figure. - Seed (String): Experiment run with a specific seed. dict( Criteria1 = dict( Experiment1 = list( ('full_path_of_experiment_with_seed_X', [-1]) ('full_path_of_experiment_with_seed_Y', [-1]) ('full_path_of_experiment_with_seed_Z', [-1]) ) Experiment2 = list( 'full_path_of_experiment_with_seed_X', [-1, 0, 1]) 'full_path_of_experiment_with_seed_Y', [-1, 0, 1]) 'full_path_of_experiment_with_seed_Z', [-1, 0, 1]) ) ) Criteria2 = dict( Experiment1 = list( ('full_path_of_experiment_with_seed_X', [-1]) ('full_path_of_experiment_with_seed_Y', [-1]) ('full_path_of_experiment_with_seed_Z', [-1]) ) Experiment2 = list( 'full_path_of_experiment_with_seed_X', [-1, 0, 1]) 'full_path_of_experiment_with_seed_Y', [-1, 0, 1]) 'full_path_of_experiment_with_seed_Z', [-1, 0, 1]) ) ) ) block (bool): Block the figure max_iter: steps_per_iter: latex_plot: Returns: """ labels_to_plot = ['Test Returns Mean'] labels_y_axis = ['Average Return'] if latex_plot: set_latex_plot() i_labels = list() u_labels = list() for ll, label in enumerate(labels_to_plot): for uu in range(N_UNINTENTIONS): new_string = ('[U-%02d] ' % uu) + label u_labels.append(new_string) intent_string = '[I] ' + label i_labels.append(intent_string) categories = list(csv_file_dict.keys()) if steps_per_iter is None: x_data = np.arange(0, max_iter) x_label = 'Iterations' else: x_data = np.arange(0, max_iter) * steps_per_iter x_label = 'Time steps (%s)' % '{:.0e}'.format(steps_per_iter) for cc, cate in enumerate(categories): # ######## # # Get data # # ######## # catego_dict = csv_file_dict[cate] n_subplots = len(i_labels) expts = list(catego_dict.keys()) nexpts = len(expts) nseeds = len(catego_dict[expts[-1]]) niters = max_iter nunint = N_UNINTENTIONS all_data = [ np.zeros((nexpts, nseeds, nunint + 1, niters)) for _ in i_labels ] algos = list() infos = list() for ee, expt in enumerate(expts): seeds = catego_dict[expt] algos.append(list()) for ss, seed in enumerate(seeds): data_dir = catego_dict[expt][ss][0] info = [ii + 1 for ii in catego_dict[expt][ss][1] ] # Because Main is 0 not -1 variant_file = os.path.join(data_dir, VARIANT_FILE) with open(variant_file) as json_data: algo_name = json.load(json_data)['algo_name'] algos[-1].append(algo_name) if ss == 0: infos.append(info) csv_file = os.path.join(data_dir, LOG_FILE) # print(csv_file) if algo_name.upper() in ['HIUSAC', 'HIUSACNEW', 'HIUDDPG']: data_csv = get_csv_data(csv_file, i_labels + u_labels) else: data_csv = get_csv_data(csv_file, i_labels) for dd in range(n_subplots): if data_csv.shape[-1] < max_iter: raise ValueError( 'por ahora hay solo %02d iters. En %s' % (data_csv.shape[-1], csv_file)) n_data = data_csv.shape[0] all_data[dd][ee, ss, :n_data, :] = data_csv[:, :max_iter] # TODO: Assuming only AvgReturn rew_scales = catego_dict[expt][ss][2] for ii, rew_scale in zip(info, rew_scales): all_data[-1][ee, ss, ii, :] *= 1 / rew_scale # ############# # # Plot the data # # ############# # fig, axs = subplots(n_subplots) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) # fig.suptitle('Expected Return - '+str(cate), fontweight='bold') if fig_name_prefix is None: fig_name_prefix = "" fig_title = (fig_name_prefix + 'Expected Return ' + str(cate)).replace( " ", "_") fig.canvas.set_window_title(fig_title) lines = list() labels = list() for aa, ax in enumerate(axs): for ee, expt in enumerate(expts): print('----> cat:', cate, '|', expt, all_data[aa][ee, :, :, :].shape, '| info:', infos[ee]) for ii, iu_idx in enumerate(infos[ee]): # for ii in range(max_unint): data_mean = np.mean(all_data[aa][ee, :, iu_idx, :], axis=0) data_std = np.std(all_data[aa][ee, :, iu_idx, :], axis=0) # 85:1.440, 90:1.645, 95:1.960, 99:2.576 ax.fill_between(x_data, (data_mean - 0.5 * data_std), (data_mean + 0.5 * data_std), alpha=.3) mean_plot = ax.plot(x_data, data_mean)[0] if aa == 0: lines.append(mean_plot) if algos[ee][ii].upper() == 'HIUSAC': if iu_idx == 0: i_suffix = ' [I]' else: i_suffix = ' [U-%02d]' % iu_idx labels.append(expt + i_suffix) else: labels.append(expt) xdiff = x_data[1] - x_data[0] ax.set_xlim(x_data[0] - xdiff, x_data[-1] + xdiff) ax.set_ylabel(labels_y_axis[aa], fontsize=50) plt.setp(ax.get_xticklabels(), visible=False) ax.xaxis.set_major_locator(plt.MultipleLocator(50)) ax.xaxis.set_minor_locator(plt.MultipleLocator(10)) axs[-1].set_xlabel(x_label, fontsize=50) plt.setp(axs[-1].get_xticklabels(), visible=True) legend = fig.legend( lines, labels, loc='lower right', ncol=1, # legend = fig.legend(lines, labels, loc=(-1, 0), ncol=1, labelspacing=0., prop={'size': 40}) fig.set_size_inches(19, 11) # 1920 x 1080 fig.tight_layout() legend.draggable(True) plt.show(block=block)
def plot_process_iu_policies(csv_file, n_unintentional=None, block=False, plot_initial=False, plot_intentional=False, deterministic=False): if deterministic: labels_to_plot = [ 'Mixing Weights', 'Policy Loss', # 'Raw Policy Loss', 'Rewards', ] else: labels_to_plot = [ 'Mixing Weights', 'Pol KL Loss', 'Rewards', 'Policy Entropy', # 'Log Policy Target', # 'Policy Mean', # 'Policy Std' ] if n_unintentional is None: n_unintentional = 0 else: n_unintentional += 1 if plot_initial: idx0 = 0 else: idx0 = 1 # Add Intentional-Unintentional Label new_labels = list() for ll, label in enumerate(labels_to_plot): for uu in range(n_unintentional): new_string = ('[U-%02d] ' % uu) + label new_labels.append(new_string) if ll > 0 and plot_intentional: new_string = '[I] ' + label new_labels.append(new_string) n_subplots = len(labels_to_plot) data = get_csv_data(csv_file, new_labels) fig, axs = subplots(n_subplots) if not isinstance(axs, np.ndarray): axs = np.array([axs]) fig.subplots_adjust(hspace=0) fig.suptitle('Policy Properties', fontweight='bold') idx_counter = 0 lines = list() labels = list() for aa, ax in enumerate(axs): for uu in range(n_unintentional): line, = ax.plot(data[idx_counter, idx0:], label='[U-%02d] ' % uu) idx_counter += 1 if aa == 1: lines.append(line) labels.append('[U-%02d] ' % uu) if aa > 0 and plot_intentional: line, = ax.plot(data[idx_counter, idx0:], label='[I]') idx_counter += 1 if aa == 1: lines.append(line) labels.append('[I]') ax.set_ylabel(labels_to_plot[aa]) plt.setp(ax.get_xticklabels(), visible=False) axs[-1].set_xlabel('Episodes') plt.setp(axs[-1].get_xticklabels(), visible=True) fig.legend(lines, labels, loc='right', ncol=1, labelspacing=0.) plt.show(block=block)