def prepare_dataset_(self): with open(self.load_path, "rb") as file: transitions = pickle.load(file) if self.data_limit is not None: transitions = transitions[:self.data_limit] self.dataset = self.transitions_to_dataset_(transitions) vis_utils.plot_many_histograms( [self.dataset[constants.REWARDS], self.dataset[constants.STATE_LABELS], self.dataset[constants.NEXT_STATE_LABELS]], ["rewards_hist", "state_labels_hist", "next_state_labels_hist"], xlabel="items", num_bins=10, saver=self.saver ) if self.oversample: self.dataset.oversample((self.dataset[constants.REWARDS] > 0)) self.dataset.shuffle() self.num_samples = self.dataset.size self.valid_samples = int(self.num_samples * self.validation_fraction) self.valid_dataset = self.dataset.split(self.valid_samples) self.epoch_size = self.dataset[constants.STATES].shape[0] // self.batch_size self.log_dataset_stats_()
def prepare_dataset_(self): with open(self.load_path, "rb") as file: transitions = pickle.load(file) self.dataset = prepare_minatar_dataset(transitions) vis_utils.plot_many_histograms([ self.dataset[constants.Q_VALUES].reshape(-1), self.dataset[constants.NEXT_Q_VALUES].reshape(-1) ], ["q_values_hist", "next_q_values_hist"], xlabel="q-values", num_bins=50, saver=self.saver) self.dataset.shuffle() num_samples = self.dataset.size valid_samples = int(num_samples * self.validation_fraction) self.valid_dataset = self.dataset.split(valid_samples) self.epoch_size = self.dataset[ constants.STATES].shape[0] // self.batch_size self.train_not_dones = np.logical_not(self.dataset[constants.DONES]) self.valid_not_dones = np.logical_not( self.valid_dataset[constants.DONES]) self.log_dataset_stats_()
def plot_cluster_failures_(self): vis_utils.plot_many_histograms([self.cluster_failures.reshape(-1)], ["cluster_failure_probs"], xlabel="bins", num_bins=50, saver=self.saver)
def prepare_dataset_(self): with open(self.load_path, "rb") as file: transitions = pickle.load(file) self.dataset = prepare_bisim_dataset_with_q_values( transitions, self.grid_size, self.num_pucks, gt_q_values=self.gt_q_values, only_one_q_value=self.only_one_q_value, include_goal_state=self.include_goal_states, shift_qs_by=self.discount if self.shift_q_values else None ) self.dataset[constants.NEW_DONES] = self.dataset[constants.REWARDS][:, self.active_task_indices] > 0.0 vis_utils.plot_many_histograms( [self.dataset[constants.Q_VALUES].reshape(-1), self.dataset[constants.NEXT_Q_VALUES].reshape(-1)], ["q_values_hist", "next_q_values_hist"], xlabel="q-values", num_bins=50, saver=self.saver ) vis_utils.plot_many_histograms( [self.dataset[constants.STATE_LABELS], self.dataset[constants.NEXT_STATE_LABELS]], ["state_labels_hist", "next_state_labels_hist"], xlabel="label index", num_bins=10, saver=self.saver ) original_size = self.dataset[constants.STATES][0].shape[0] assert len(self.dataset[constants.STATES][0].shape) == 2 assert original_size == self.dataset[constants.STATES][0].shape[1] if original_size == self.RESIZE_SIZE: self.disable_resize = True self.depth_size = self.RESIZE_SIZE if not self.disable_resize: self.images_pl, self.resized_t = \ self.dataset.resize_tf(original_size, self.RESIZE_SIZE, [constants.STATES, constants.NEXT_STATES]) self.logger.info("original depth size: {:d}, new size: {:d}".format(original_size, self.depth_size)) if self.oversample: self.dataset.oversample((self.dataset[constants.REWARDS] > 0)) self.means, self.stds = self.dataset.normalize_together( [constants.STATES, constants.NEXT_STATES], std_threshold=0.001 ) self.dataset.shuffle() num_samples = self.dataset.size valid_samples = int(num_samples * self.validation_fraction) self.valid_dataset = self.dataset.split(valid_samples) self.epoch_size = self.dataset[constants.STATES].shape[0] // self.batch_size self.train_not_dones = np.logical_not(self.dataset[constants.NEW_DONES]) self.valid_not_dones = np.logical_not(self.valid_dataset[constants.NEW_DONES]) self.log_dataset_stats_()
def plot_cluster_q_values_(self): print(self.cluster_q_values) vis_utils.plot_many_histograms( [self.cluster_q_values.reshape(-1)], ["cluster_q_values"], xlabel="bins", num_bins=50, saver=self.saver )
def plot_perplexities_(self): train_perplexities = evaluate.get_perplexities(self.train_cluster_log_probs) valid_perplexities = evaluate.get_perplexities(self.valid_cluster_log_probs) vis_utils.plot_many_histograms( [train_perplexities, valid_perplexities], ["train_state_perplexities", "valid_state_perplexities"], xlabel="perplexity", num_bins=50, saver=self.saver )
def plot_cluster_rewards_(self, cluster_rewards, name): vis_utils.plot_many_histograms([cluster_rewards.reshape(-1)], [name], xlabel="bins", num_bins=50, saver=self.saver)