def prepare_dataset_(self):

        with open(self.load_path, "rb") as file:
            transitions = pickle.load(file)

        if self.data_limit is not None:
            transitions = transitions[:self.data_limit]

        self.dataset = self.transitions_to_dataset_(transitions)

        vis_utils.plot_many_histograms(
            [self.dataset[constants.REWARDS], self.dataset[constants.STATE_LABELS],
             self.dataset[constants.NEXT_STATE_LABELS]],
            ["rewards_hist", "state_labels_hist", "next_state_labels_hist"], xlabel="items",
            num_bins=10, saver=self.saver
        )

        if self.oversample:
            self.dataset.oversample((self.dataset[constants.REWARDS] > 0))

        self.dataset.shuffle()

        self.num_samples = self.dataset.size
        self.valid_samples = int(self.num_samples * self.validation_fraction)
        self.valid_dataset = self.dataset.split(self.valid_samples)

        self.epoch_size = self.dataset[constants.STATES].shape[0] // self.batch_size

        self.log_dataset_stats_()
예제 #2
0
    def prepare_dataset_(self):

        with open(self.load_path, "rb") as file:
            transitions = pickle.load(file)

        self.dataset = prepare_minatar_dataset(transitions)

        vis_utils.plot_many_histograms([
            self.dataset[constants.Q_VALUES].reshape(-1),
            self.dataset[constants.NEXT_Q_VALUES].reshape(-1)
        ], ["q_values_hist", "next_q_values_hist"],
                                       xlabel="q-values",
                                       num_bins=50,
                                       saver=self.saver)

        self.dataset.shuffle()

        num_samples = self.dataset.size
        valid_samples = int(num_samples * self.validation_fraction)
        self.valid_dataset = self.dataset.split(valid_samples)

        self.epoch_size = self.dataset[
            constants.STATES].shape[0] // self.batch_size

        self.train_not_dones = np.logical_not(self.dataset[constants.DONES])
        self.valid_not_dones = np.logical_not(
            self.valid_dataset[constants.DONES])

        self.log_dataset_stats_()
예제 #3
0
    def plot_cluster_failures_(self):

        vis_utils.plot_many_histograms([self.cluster_failures.reshape(-1)],
                                       ["cluster_failure_probs"],
                                       xlabel="bins",
                                       num_bins=50,
                                       saver=self.saver)
예제 #4
0
    def prepare_dataset_(self):

        with open(self.load_path, "rb") as file:
            transitions = pickle.load(file)

        self.dataset = prepare_bisim_dataset_with_q_values(
            transitions, self.grid_size, self.num_pucks, gt_q_values=self.gt_q_values,
            only_one_q_value=self.only_one_q_value, include_goal_state=self.include_goal_states,
            shift_qs_by=self.discount if self.shift_q_values else None
        )

        self.dataset[constants.NEW_DONES] = self.dataset[constants.REWARDS][:, self.active_task_indices] > 0.0

        vis_utils.plot_many_histograms(
            [self.dataset[constants.Q_VALUES].reshape(-1), self.dataset[constants.NEXT_Q_VALUES].reshape(-1)],
            ["q_values_hist", "next_q_values_hist"], xlabel="q-values",
            num_bins=50, saver=self.saver
        )

        vis_utils.plot_many_histograms(
            [self.dataset[constants.STATE_LABELS], self.dataset[constants.NEXT_STATE_LABELS]],
            ["state_labels_hist", "next_state_labels_hist"], xlabel="label index",
            num_bins=10, saver=self.saver
        )

        original_size = self.dataset[constants.STATES][0].shape[0]

        assert len(self.dataset[constants.STATES][0].shape) == 2
        assert original_size == self.dataset[constants.STATES][0].shape[1]

        if original_size == self.RESIZE_SIZE:
            self.disable_resize = True

        self.depth_size = self.RESIZE_SIZE
        if not self.disable_resize:
            self.images_pl, self.resized_t = \
                self.dataset.resize_tf(original_size, self.RESIZE_SIZE, [constants.STATES, constants.NEXT_STATES])

        self.logger.info("original depth size: {:d}, new size: {:d}".format(original_size, self.depth_size))

        if self.oversample:
            self.dataset.oversample((self.dataset[constants.REWARDS] > 0))

        self.means, self.stds = self.dataset.normalize_together(
            [constants.STATES, constants.NEXT_STATES], std_threshold=0.001
        )

        self.dataset.shuffle()

        num_samples = self.dataset.size
        valid_samples = int(num_samples * self.validation_fraction)
        self.valid_dataset = self.dataset.split(valid_samples)

        self.epoch_size = self.dataset[constants.STATES].shape[0] // self.batch_size

        self.train_not_dones = np.logical_not(self.dataset[constants.NEW_DONES])
        self.valid_not_dones = np.logical_not(self.valid_dataset[constants.NEW_DONES])

        self.log_dataset_stats_()
예제 #5
0
    def plot_cluster_q_values_(self):

        print(self.cluster_q_values)

        vis_utils.plot_many_histograms(
            [self.cluster_q_values.reshape(-1)],
            ["cluster_q_values"], xlabel="bins",
            num_bins=50, saver=self.saver
        )
예제 #6
0
    def plot_perplexities_(self):

        train_perplexities = evaluate.get_perplexities(self.train_cluster_log_probs)
        valid_perplexities = evaluate.get_perplexities(self.valid_cluster_log_probs)

        vis_utils.plot_many_histograms(
            [train_perplexities, valid_perplexities],
            ["train_state_perplexities", "valid_state_perplexities"],
            xlabel="perplexity", num_bins=50, saver=self.saver
        )
    def plot_cluster_rewards_(self, cluster_rewards, name):

        vis_utils.plot_many_histograms([cluster_rewards.reshape(-1)], [name],
                                       xlabel="bins",
                                       num_bins=50,
                                       saver=self.saver)