Пример #1
0
 def _plot_impl(self,
                y="return",
                x="learning_steps",
                save=False,
                show=True):
     labels = rlpy.tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], lw=2, markersize=4, marker=MARKERS[0])
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - 0.1 * delta - 0.1, M + 0.1 * delta + 0.1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(self.full_path,
                             "{:03}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=0.1)
     if show:
         plt.ioff()
         plt.show()
Пример #2
0
 def show_domain(self, a=0):
     s = self.state
     # Draw the environment
     if self.domain_fig is None:
         self.move_fig = plt.subplot(111)
         s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
         self.domain_fig = plt.imshow(s,
                                      cmap="FlipBoard",
                                      interpolation="nearest",
                                      vmin=0,
                                      vmax=1)
         plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         # pl.tight_layout()
         a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
         self.move_fig = self.move_fig.plot(a_col,
                                            a_row,
                                            "kx",
                                            markersize=30.0)
         plt.show()
     a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
     self.move_fig.pop(0).remove()
     # print a_row,a_col
     # Instead of '>' you can use 'D', 'o'
     self.move_fig = plt.plot(a_col, a_row, "kx", markersize=30.0)
     s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
     self.domain_fig.set_data(s)
     plt.draw()
Пример #3
0
    def show_domain(self, a):
        s = self.state
        # Draw the environment
        fig = plt.figure("IntruderMonitoring")
        if self.domain_fig is None:
            self.domain_fig = plt.imshow(
                self.map,
                cmap="IntruderMonitoring",
                interpolation="nearest",
                vmin=0,
                vmax=3,
            )
            plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE)
            plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE)
            plt.show()
        if self.ally_fig is not None:
            self.ally_fig.pop(0).remove()
            self.intruder_fig.pop(0).remove()

        s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2))
        s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2))
        self.ally_fig = plt.plot(
            s_ally[:, 1],
            s_ally[:, 0],
            "bo",
            markersize=30.0,
            alpha=0.7,
            markeredgecolor="k",
            markeredgewidth=2,
        )
        self.intruder_fig = plt.plot(
            s_intruder[:, 1],
            s_intruder[:, 0],
            "g>",
            color="gray",
            markersize=30.0,
            alpha=0.7,
            markeredgecolor="k",
            markeredgewidth=2,
        )
        fig.canvas.draw()
        fig.canvas.flush_events()
Пример #4
0
 def show_domain(self, a=None):
     if a is not None:
         a = self.actions[a]
     T = np.empty((self.d, 2))
     T[:, 0] = np.cos(self.theta)
     T[:, 1] = np.sin(self.theta)
     R = np.dot(self.P, T)
     R1 = R - 0.5 * self.lengths[:, None] * T
     R2 = R + 0.5 * self.lengths[:, None] * T
     Rx = np.hstack([R1[:, 0], R2[:, 0]]) + self.pos_cm[0]
     Ry = np.hstack([R1[:, 1], R2[:, 1]]) + self.pos_cm[1]
     fig = plt.figure("Swimmer")
     if self.swimmer_lines is None:
         plt.plot(0.0, 0.0, "ro")
         self.swimmer_lines = plt.plot(Rx, Ry)[0]
         self.action_text = plt.text(-2, -8, str(a))
         plt.xlim(-5, 15)
         plt.ylim(-10, 10)
     else:
         self.swimmer_lines.set_data(Rx, Ry)
         self.action_text.set_text(str(a))
     fig.canvas.draw()
     fig.canvas.flush_events()
Пример #5
0
    def batch_discover(self, td_errors, phi, states):
        """
        :param td_errors: p-by-1 vector, error associated with each state
        :param phi: p-by-n matrix, vector-valued feature function evaluated at
            each state.
        :param states: p-by-(statedimension) matrix, each state under test.

        Discovers features using OMPTD
        1. Find the index of remaining features in the bag \n
        2. Calculate the inner product of each feature with the TD_Error vector \n
        3. Add the top max_batch_discovery features to the selected features \n

        OUTPUT: Boolean indicating expansion of features
        """
        if len(self.remaining_features) == 0:
            # No More features to Expand
            return False

        self.calculate_full_phi_normalized(states)

        relevances = np.zeros(len(self.remaining_features))
        for i, f in enumerate(self.remaining_features):
            phi_f = self.fullphi[:, f]
            relevances[i] = np.abs(np.dot(phi_f, td_errors))

        if self.SHOW_RELEVANCES:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.plot(e_vec, linewidth=3)
            plt.ioff()
            plt.show()
            plt.ion()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]

        # Add top <maxDiscovery> features
        self.logger.debug("OMPTD Batch: Max Relevance = %0.3f" % max_relevance)
        added_feature = False
        to_be_deleted = []  # Record the indices of items to be removed
        for j in range(min(self.max_batch_discovery, len(relevances))):
            max_index = sortedIndices[j]
            f = self.remaining_features[max_index]
            relevance = relevances[max_index]
            # print "Inspecting %s" % str(list(self.iFDD.getFeature(f).f_set))
            if relevance >= self.batch_threshold:
                self.logger.debug("New Feature %d: %s, Relevance = %0.3f" % (
                    self.features_num,
                    str(np.sort(list(self.iFDD.getFeature(f).f_set))),
                    relevances[max_index],
                ))
                to_be_deleted.append(max_index)
                self.selected_features.append(f)
                self.features_num += 1
                added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        self.remaining_features = np.delete(self.remaining_features,
                                            to_be_deleted)
        return added_feature
Пример #6
0
    def batch_discover(self, td_errors, phi, states):
        """
        Discovers features using iFDD in batch setting.
        self.batch_threshold is the minimum relevance value for the feature
        to be expanded.
        :param td_errors: p-by-1 (How much error observed for each sample)
        :param phi: n-by-p features corresponding to all samples
            (each column corresponds to one sample).
        """
        maxDiscovery = self.max_batch_discovery
        n = self.features_num  # number of features
        p = len(td_errors)  # Number of samples
        counts = np.zeros((n, n))
        relevances = np.zeros((n, n))
        for i in range(p):
            phiphiT = np.outer(phi[i, :], phi[i, :])
            if self.iFDDPlus:
                relevances += phiphiT * td_errors[i]
            else:
                relevances += phiphiT * abs(td_errors[i])
            counts += phiphiT
        # Remove Diagonal and upper part of the relevances as they are useless
        relevances = np.triu(relevances, 1)
        non_zero_index = np.nonzero(relevances)
        if self.iFDDPlus:
            # Calculate relevances based on theoretical results of ICML 2013
            # potential submission
            relevances[non_zero_index] = np.divide(
                np.abs(relevances[non_zero_index]), np.sqrt(counts[non_zero_index])
            )
        else:
            # Based on Geramifard11_ICML Paper
            relevances[non_zero_index] = relevances[non_zero_index]

        # Find indexes to non-zero excited pairs
        # F1 and F2 are the parents of the potentials
        (F1, F2) = relevances.nonzero()
        relevances = relevances[F1, F2]
        if len(relevances) == 0:
            # No feature to add
            self.logger.debug("iFDD Batch: Max Relevance = 0")
            return False

        if self.debug:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.ioff()
            plt.plot(e_vec, linewidth=3)
            plt.show()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]
        # Add top <maxDiscovery> features
        self.logger.debug("iFDD Batch: Max Relevance = {0:g}".format(max_relevance))
        added_feature = False
        new_features = 0
        for j in range(len(relevances)):
            if new_features >= maxDiscovery:
                break
            max_index = sortedIndices[j]
            f1 = F1[max_index]
            f2 = F2[max_index]
            relevance = relevances[max_index]
            if relevance > self.batch_threshold:
                # print "Inspecting",
                # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2)
                if self.inspectPair(f1, f2, np.inf):
                    self.logger.debug(
                        "New Feature %d: %s, Relevance = %0.3f"
                        % (
                            self.features_num - 1,
                            self.getStrFeatureSet(self.features_num - 1),
                            relevances[max_index],
                        )
                    )
                    new_features += 1
                    added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        # A signal to see if the representation has been expanded or not
        return added_feature