Пример #1
0
 def showDomain(self, a=0):
     # Draw the environment
     s = self.state
     world = np.zeros((self.blocks, self.blocks), 'uint8')
     undrawn_blocks = np.arange(self.blocks)
     while len(undrawn_blocks):
         A = undrawn_blocks[0]
         B = s[A]
         undrawn_blocks = undrawn_blocks[1:]
         if B == A:  # => A is on Table
             world[0, A] = A + 1  # 0 is white thats why!
         else:
             # See if B is already drawn
             i, j = findElemArray2D(B + 1, world)
             if len(i):
                 world[i + 1, j] = A + 1  # 0 is white thats why!
             else:
                 # Put it in the back of the list
                 undrawn_blocks = np.hstack((undrawn_blocks, [A]))
     if self.domain_fig is None:
         self.domain_fig = plt.imshow(
             world,
             cmap='BlocksWorld',
             origin='lower',
             interpolation='nearest')  # ,vmin=0,vmax=self.blocks)
         plt.xticks(np.arange(self.blocks), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.blocks), fontsize=FONTSIZE)
         # pl.tight_layout()
         plt.axis('off')
         plt.show()
     else:
         self.domain_fig.set_data(world)
         plt.draw()
Пример #2
0
 def showDomain(self, a=0):
     # Draw the environment
     s = self.state
     world = np.zeros((self.blocks, self.blocks), 'uint8')
     undrawn_blocks = np.arange(self.blocks)
     while len(undrawn_blocks):
         A = undrawn_blocks[0]
         B = s[A]
         undrawn_blocks = undrawn_blocks[1:]
         if B == A:  # => A is on Table
             world[0, A] = A + 1  # 0 is white thats why!
         else:
             # See if B is already drawn
             i, j = findElemArray2D(B + 1, world)
             if len(i):
                 world[i + 1, j] = A + 1  # 0 is white thats why!
             else:
                 # Put it in the back of the list
                 undrawn_blocks = np.hstack((undrawn_blocks, [A]))
     if self.domain_fig is None:
         plt.figure("Domain")
         self.domain_fig = plt.imshow(
             world,
             cmap='BlocksWorld',
             origin='lower',
             interpolation='nearest')  # ,vmin=0,vmax=self.blocks)
         plt.xticks(np.arange(self.blocks), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.blocks), fontsize=FONTSIZE)
         # pl.tight_layout()
         plt.axis('off')
         plt.show()
     else:
         self.domain_fig.set_data(world)
         plt.figure("Domain").canvas.draw()
         plt.figure("Domain").canvas.flush_events()
Пример #3
0
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.domain_fig is None:
         self.move_fig = plt.subplot(111)
         s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
         self.domain_fig = plt.imshow(
             s,
             cmap='FlipBoard',
             interpolation='nearest',
             vmin=0,
             vmax=1)
         plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         # pl.tight_layout()
         a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
         self.move_fig = self.move_fig.plot(
             a_col,
             a_row,
             'kx',
             markersize=30.0)
         plt.show()
     a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
     self.move_fig.pop(0).remove()
     # print a_row,a_col
     # Instead of '>' you can use 'D', 'o'
     self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0)
     s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
     self.domain_fig.set_data(s)
     plt.draw()
Пример #4
0
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.circles is None:
         plt.figure("Domain")
         self.domain_fig = plt.subplot(3, 1, 1)
         plt.figure(1, (self.chainSize * 2 / 10.0, 2))
         self.domain_fig.set_xlim(0, self.chainSize * 2 / 10.0)
         self.domain_fig.set_ylim(0, 2)
         self.domain_fig.add_patch(
             mpatches.Circle((old_div(1, 5.0) + 2 / 10.0 *
                              (self.chainSize - 1), self.Y),
                             self.RADIUS * 1.1,
                             fc="w"))  # Make the last one double circle
         self.domain_fig.xaxis.set_visible(False)
         self.domain_fig.yaxis.set_visible(False)
         self.circles = [
             mpatches.Circle((old_div(1, 5.0) + 2 / 10.0 * i, self.Y),
                             self.RADIUS,
                             fc="w") for i in range(self.chainSize)
         ]
         for i in range(self.chainSize):
             self.domain_fig.add_patch(self.circles[i])
             plt.show()
     for p in self.circles:
         p.set_facecolor('w')
     for p in self.GOAL_STATES:
         self.circles[p].set_facecolor('g')
     self.circles[s].set_facecolor('k')
     plt.figure("Domain").canvas.draw()
     plt.figure("Domain").canvas.flush_events()
Пример #5
0
    def showDomain(self, a=0, s=None):
        if s is None:
            s = self.state

        # Draw the environment
        if self.domain_fig is None:
            self.agent_fig = plt.figure("Domain")
            self.domain_fig = plt.imshow(self.map,
                                         cmap='GridWorld',
                                         interpolation='nearest',
                                         vmin=0,
                                         vmax=5)
            plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE)
            plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE)
            # pl.tight_layout()
            self.agent_fig = plt.gca().plot(s[1],
                                            s[0],
                                            'kd',
                                            markersize=20.0 - self.COLS)
            plt.show()
        self.agent_fig.pop(0).remove()
        self.agent_fig = plt.figure("Domain")
        #mapcopy = copy(self.map)
        #mapcopy[s[0],s[1]] = self.AGENT
        # self.domain_fig.set_data(mapcopy)
        # Instead of '>' you can use 'D', 'o'
        self.agent_fig = plt.gca().plot(s[1],
                                        s[0],
                                        'k>',
                                        markersize=20.0 - self.COLS)
        plt.draw()
Пример #6
0
 def plot(self, y="return", x="learning_steps", save=False):
     """Plots the performance of the experiment
     This function has only limited capabilities.
     For more advanced plotting of results consider
     :py:class:`Tools.Merger.Merger`.
     """
     labels = rlpy.Tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], '-bo', lw=3, markersize=10)
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(self.full_path,
                             "{:3}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=.1)
     plt.ioff()
     plt.show()
Пример #7
0
    def showDomain(self, a):
        s = self.state
        # Draw the environment
        if self.domain_fig is None:
            self.domain_fig = plt.imshow(self.map,
                                         cmap='IntruderMonitoring',
                                         interpolation='nearest',
                                         vmin=0,
                                         vmax=3)
            plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE)
            plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE)
            plt.show()
        if self.ally_fig is not None:
            self.ally_fig.pop(0).remove()
            self.intruder_fig.pop(0).remove()

        s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2))
        s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2))
        self.ally_fig = plt.plot(s_ally[:, 1],
                                 s_ally[:, 0],
                                 'bo',
                                 markersize=30.0,
                                 alpha=.7,
                                 markeredgecolor='k',
                                 markeredgewidth=2)
        self.intruder_fig = plt.plot(s_intruder[:, 1],
                                     s_intruder[:, 0],
                                     'g>',
                                     color='gray',
                                     markersize=30.0,
                                     alpha=.7,
                                     markeredgecolor='k',
                                     markeredgewidth=2)
        plt.draw()
Пример #8
0
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.domain_fig is None:
         self.move_fig = plt.subplot(111)
         s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
         self.domain_fig = plt.imshow(s,
                                      cmap='FlipBoard',
                                      interpolation='nearest',
                                      vmin=0,
                                      vmax=1)
         plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         # pl.tight_layout()
         a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
         self.move_fig = self.move_fig.plot(a_col,
                                            a_row,
                                            'kx',
                                            markersize=30.0)
         plt.show()
     a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
     self.move_fig.pop(0).remove()
     # print a_row,a_col
     # Instead of '>' you can use 'D', 'o'
     self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0)
     s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
     self.domain_fig.set_data(s)
     plt.draw()
Пример #9
0
    def showDomain(self, a=0, s=None):
        if s is None:
            s = self.state

        # Draw the environment
        if self.domain_fig is None:
            self.agent_fig = plt.figure("Domain")
            self.domain_fig = plt.imshow(
                self.map,
                cmap='GridWorld',
                interpolation='nearest',
                vmin=0,
                vmax=5)
            plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE)
            plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE)
            # pl.tight_layout()
            self.agent_fig = plt.gca(
            ).plot(s[1],
                   s[0],
                   'kd',
                   markersize=20.0 - self.COLS)
            plt.show()
        self.agent_fig.pop(0).remove()
        self.agent_fig = plt.figure("Domain")
        #mapcopy = copy(self.map)
        #mapcopy[s[0],s[1]] = self.AGENT
        # self.domain_fig.set_data(mapcopy)
        # Instead of '>' you can use 'D', 'o'
        self.agent_fig = plt.gca(
        ).plot(s[1],
               s[0],
               'k>',
               markersize=20.0 - self.COLS)
        plt.draw()
Пример #10
0
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.circles is None:
         self.domain_fig = plt.subplot(3, 1, 1)
         plt.figure(1, (self.chainSize * 2 / 10.0, 2))
         self.domain_fig.set_xlim(0, self.chainSize * 2 / 10.0)
         self.domain_fig.set_ylim(0, 2)
         self.domain_fig.add_patch(
             mpatches.Circle((1 / 5.0 + 2 / 10.0 * (self.chainSize - 1),
                              self.Y),
                             self.RADIUS * 1.1,
                             fc="w"))  # Make the last one double circle
         self.domain_fig.xaxis.set_visible(False)
         self.domain_fig.yaxis.set_visible(False)
         self.circles = [mpatches.Circle((1 / 5.0 + 2 / 10.0 * i, self.Y), self.RADIUS, fc="w")
                         for i in range(self.chainSize)]
         for i in range(self.chainSize):
             self.domain_fig.add_patch(self.circles[i])
             plt.show()
     for p in self.circles:
         p.set_facecolor('w')
     for p in self.GOAL_STATES:
         self.circles[p].set_facecolor('g')
     self.circles[s].set_facecolor('k')
     plt.draw()
Пример #11
0
 def plot(self, y="return", x="learning_steps", save=False):
     """Plots the performance of the experiment
     This function has only limited capabilities.
     For more advanced plotting of results consider
     :py:class:`Tools.Merger.Merger`.
     """
     labels = rlpy.Tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], '-bo', lw=3, markersize=10)
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(
             self.full_path,
             "{:3}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=.1)
     plt.ioff()
     plt.show()
Пример #12
0
 def showExploration(self):
     plt.figure()
     plt.scatter([i[0] for i in self.visited_states],
                 [i[1] for i in self.visited_states],
                 color='k')
     plt.scatter([self.src['x']], [self.src['y']], color='r')
     plt.scatter([self.target['x']], [self.target['y']], color='b')
     plt.show()
Пример #13
0
def show_one_variable(variable, vrange, map1="9x9-2PathR1.txt"):
	x = vrange
	ctrl = param_ranges(variable, vrange)
	y = param_ranges(variable, vrange, cur_map=map1)
	try:
		plt.title(variable + " vs AUC")
		plt.ioff()
		plt.plot(x, ctrl)
		plt.plot(x, y)
		plt.legend()
		plt.show()
	finally:
		return ctrl, y
Пример #14
0
    def showLearning(self, representation):
        pi = np.zeros(
            (self.X_discretization,
             self.XDot_discretization),
            'uint8')
        V = np.zeros((self.X_discretization, self.XDot_discretization))

        if self.valueFunction_fig is None:
            self.valueFunction_fig = plt.figure("Value Function")
            self.valueFunction_im = plt.imshow(
                V,
                cmap='ValueFunction',
                interpolation='nearest',
                origin='lower',
                vmin=self.MIN_RETURN,
                vmax=self.MAX_RETURN)

            plt.xticks(self.xTicks, self.xTicksLabels, fontsize=12)
            plt.yticks(self.yTicks, self.yTicksLabels, fontsize=12)
            plt.xlabel(r"$x$")
            plt.ylabel(r"$\dot x$")

            self.policy_fig = plt.figure("Policy")
            self.policy_im = plt.imshow(
                pi,
                cmap='MountainCarActions',
                interpolation='nearest',
                origin='lower',
                vmin=0,
                vmax=self.actions_num)

            plt.xticks(self.xTicks, self.xTicksLabels, fontsize=12)
            plt.yticks(self.yTicks, self.yTicksLabels, fontsize=12)
            plt.xlabel(r"$x$")
            plt.ylabel(r"$\dot x$")
            plt.show()

        for row, xDot in enumerate(np.linspace(self.XDOTMIN, self.XDOTMAX, self.XDot_discretization)):
            for col, x in enumerate(np.linspace(self.XMIN, self.XMAX, self.X_discretization)):
                s = [x, xDot]
                Qs = representation.Qs(s, False)
                As = self.possibleActions()
                pi[row, col] = representation.bestAction(s, False, As)
                V[row, col] = max(Qs)
        self.valueFunction_im.set_data(V)
        self.policy_im.set_data(pi)

        self.valueFunction_fig = plt.figure("Value Function")
        plt.draw()
        self.policy_fig = plt.figure("Policy")
        plt.draw()
Пример #15
0
    def showLearning(self, representation):
        good_pol = SwimmerPolicy(
            representation=representation,
            epsilon=0)
        id1 = 2
        id2 = 3
        res = 200
        s = np.zeros(self.state_space_dims)
        l1 = np.linspace(
            self.statespace_limits[id1, 0], self.statespace_limits[id1, 1], res)
        l2 = np.linspace(
            self.statespace_limits[id2, 0], self.statespace_limits[id2, 1], res)

        pi = np.zeros((res, res), 'uint8')
        good_pi = np.zeros((res, res), 'uint8')
        V = np.zeros((res, res))

        for row, x1 in enumerate(l1):
            for col, x2 in enumerate(l2):
                s[id1] = x1
                s[id2] = x2
                # Array of Q-function evaluated at all possible actions at
                # state s
                Qs = representation.Qs(s, False)
                # Assign pi to be optimal action (which maximizes Q-function)
                maxQ = np.max(Qs)
                pi[row, col] = np.random.choice(np.arange(len(Qs))[Qs == maxQ])
                good_pi[row, col] = good_pol.pi(
                    s, False, np.arange(self.actions_num))
                # Assign V to be the value of the Q-function under optimal
                # action
                V[row, col] = maxQ

        self._plot_policy(
            pi,
            title="Learned Policy",
            ylim=self.statespace_limits[id1],
            xlim=self.statespace_limits[id2])
        self._plot_policy(
            good_pi,
            title="Good Policy",
            var="good_policy_fig",
            ylim=self.statespace_limits[id1],
            xlim=self.statespace_limits[id2])
        self._plot_valfun(
            V,
            ylim=self.statespace_limits[id1],
            xlim=self.statespace_limits[id2])

        if self.policy_fig is None or self.valueFunction_fig is None:
            plt.show()
Пример #16
0
    def showDomain(self, a=0):
        # Draw the environment
        s = self.state
        s = s[0]
        if self.circles is None:
            fig = plt.figure(1, (self.chainSize * 2, 2))
            ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect=1.)
            ax.set_xlim(0, self.chainSize * 2)
            ax.set_ylim(0, 2)
            # Make the last one double circle
            ax.add_patch(
                mpatches.Circle((1 + 2 * (self.chainSize - 1), self.Y), self.RADIUS * 1.1, fc="w"))
            ax.xaxis.set_visible(False)
            ax.yaxis.set_visible(False)
            self.circles = [mpatches.Circle((1 + 2 * i, self.Y), self.RADIUS, fc="w")
                            for i in range(self.chainSize)]
            for i in range(self.chainSize):
                ax.add_patch(self.circles[i])
                if i != self.chainSize - 1:
                    fromAtoB(
                        1 + 2 * i + self.SHIFT,
                        self.Y + self.SHIFT,
                        1 + 2 * (i + 1) - self.SHIFT,
                        self.Y + self.SHIFT)
                    if i != self.chainSize - 2:
                        fromAtoB(
                            1 + 2 * (i + 1) - self.SHIFT,
                            self.Y - self.SHIFT,
                            1 + 2 * i + self.SHIFT,
                            self.Y - self.SHIFT,
                            'r')
                fromAtoB(
                    .75,
                    self.Y -
                    1.5 *
                    self.SHIFT,
                    .75,
                    self.Y +
                    1.5 *
                    self.SHIFT,
                    'r',
                    connectionstyle='arc3,rad=-1.2')
                plt.show()

        [p.set_facecolor('w') for p in self.circles]
        self.circles[s].set_facecolor('k')
        plt.draw()
Пример #17
0
    def showDomain(self, a):
        s = self.state
        # Draw the environment
        if self.domain_fig is None:
            plt.figure("Domain")
            self.domain_fig = plt.imshow(
                self.map,
                cmap='IntruderMonitoring',
                interpolation='nearest',
                vmin=0,
                vmax=3)
            plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE)
            plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE)
            plt.show()
        if self.ally_fig is not None:
            self.ally_fig.pop(0).remove()
            self.intruder_fig.pop(0).remove()

        s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2))
        s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2))
        self.ally_fig = plt.plot(
            s_ally[:,
                   1],
            s_ally[:,
                   0],
            'bo',
            markersize=30.0,
            alpha=.7,
            markeredgecolor='k',
            markeredgewidth=2)
        self.intruder_fig = plt.plot(
            s_intruder[:,
                       1],
            s_intruder[:,
                       0],
            'g>',
            color='gray',
            markersize=30.0,
            alpha=.7,
            markeredgecolor='k',
            markeredgewidth=2)
        plt.figure("Domain").canvas.draw()
        plt.figure("Domain").canvas.flush_events()
Пример #18
0
    def gridworld_showlearning(self, representation):
        dom = self.actual_domain
        if self.valueFunction_fig is None:
            plt.figure("Value Function")
            self.valueFunction_fig = plt.imshow(dom.map,
                                                cmap='ValueFunction',
                                                interpolation='nearest',
                                                vmin=dom.MIN_RETURN,
                                                vmax=dom.MAX_RETURN)
            plt.xticks(np.arange(dom.COLS), fontsize=12)
            plt.yticks(np.arange(dom.ROWS), fontsize=12)
            # Create quivers for each action. 4 in total
            plt.show()
        plt.figure("Value Function")

        V = self.get_value_function(representation)
        # print Acts
        # Show Value Function
        self.valueFunction_fig.set_data(V)
        plt.draw()
Пример #19
0
    def plot_trials(self,
                    y="eps_return",
                    x="learning_steps",
                    average=10,
                    save=False):
        """Plots the performance of the experiment
        This function has only limited capabilities.
        For more advanced plotting of results consider
        :py:class:`Tools.Merger.Merger`.
        """
        def movingaverage(interval, window_size):
            window = np.ones(int(window_size)) / float(window_size)
            return np.convolve(interval, window, 'same')

        labels = rlpy.Tools.results.default_labels
        performance_fig = plt.figure("Performance")
        trials = self.trials
        y_arr = np.array(trials[y])
        if average:
            assert type(average) is int, "Filter length is not an integer!"
            y_arr = movingaverage(y_arr, average)
        plt.plot(trials[x], y_arr, '-bo', lw=3, markersize=10)
        plt.xlim(0, trials[x][-1] * 1.01)
        m = y_arr.min()
        M = y_arr.max()
        delta = M - m
        if delta > 0:
            plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
        xlabel = labels[x] if x in labels else x
        ylabel = labels[y] if y in labels else y
        plt.xlabel(xlabel, fontsize=16)
        plt.ylabel(ylabel, fontsize=16)
        if save:
            path = os.path.join(self.full_path,
                                "{:3}-trials.pdf".format(self.exp_id))
            performance_fig.savefig(path, transparent=True, pad_inches=.1)
        plt.ioff()
        plt.show()
Пример #20
0
    def batchDiscover(self, td_errors, phi, states):
        """
        :param td_errors: p-by-1 vector, error associated with each state
        :param phi: p-by-n matrix, vector-valued feature function evaluated at 
            each state.
        :param states: p-by-(statedimension) matrix, each state under test.
        
        Discovers features using OMPTD
        1. Find the index of remaining features in the bag \n
        2. Calculate the inner product of each feature with the TD_Error vector \n
        3. Add the top maxBatchDiscovery features to the selected features \n
        
        OUTPUT: Boolean indicating expansion of features
        
        """
        if len(self.remainingFeatures) == 0:
            # No More features to Expand
            return False

        SHOW_RELEVANCES = 0      # Plot the relevances
        self.calculateFullPhiNormalized(states)

        relevances = np.zeros(len(self.remainingFeatures))
        for i, f in enumerate(self.remainingFeatures):
            phi_f = self.fullphi[:, f]
            relevances[i] = np.abs(np.dot(phi_f, td_errors))

        if SHOW_RELEVANCES:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.plot(e_vec, linewidth=3)
            plt.ioff()
            plt.show()
            plt.ion()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]

        # Add top <maxDiscovery> features
        self.logger.debug("OMPTD Batch: Max Relevance = %0.3f" % max_relevance)
        added_feature = False
        to_be_deleted = []  # Record the indices of items to be removed
        for j in xrange(min(self.maxBatchDiscovery, len(relevances))):
            max_index = sortedIndices[j]
            f = self.remainingFeatures[max_index]
            relevance = relevances[max_index]
            # print "Inspecting %s" % str(list(self.iFDD.getFeature(f).f_set))
            if relevance >= self.batchThreshold:
                self.logger.debug(
                    'New Feature %d: %s, Relevance = %0.3f' %
                    (self.features_num, str(np.sort(list(self.iFDD.getFeature(f).f_set))), relevances[max_index]))
                to_be_deleted.append(max_index)
                self.selectedFeatures.append(f)
                self.features_num += 1
                added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        self.remainingFeatures = np.delete(self.remainingFeatures, to_be_deleted)
        return added_feature
Пример #21
0
    def showDomain(self, a=0):
        s = self.state
        if self.domain_fig is None:
            self.domain_fig = plt.figure(
                1, (UAVLocation.SIZE * self.dist_between_locations + 1,
                    self.NUM_UAV + 1))
            plt.show()
        plt.clf()
        # Draw the environment
        # Allocate horizontal 'lanes' for UAVs to traverse

        # Formerly, we checked if this was the first time plotting; wedge shapes cannot be removed from
        # matplotlib environment, nor can their properties be changed, without clearing the figure
        # Thus, we must redraw the figure on each timestep
        #        if self.location_rect_vis is None:
        # Figure with x width corresponding to number of location states, UAVLocation.SIZE
        # and rows (lanes) set aside in y for each UAV (NUM_UAV total lanes).
        # Add buffer of 1
        self.subplot_axes = self.domain_fig.add_axes([0, 0, 1, 1],
                                                     frameon=False,
                                                     aspect=1.)
        crashLocationX = 2 * \
            (self.dist_between_locations) * (UAVLocation.SIZE - 1)
        self.subplot_axes.set_xlim(0, 1 + crashLocationX + self.RECT_GAP)
        self.subplot_axes.set_ylim(0, 1 + self.NUM_UAV)
        self.subplot_axes.xaxis.set_visible(False)
        self.subplot_axes.yaxis.set_visible(False)

        # Assign coordinates of each possible uav location on figure
        self.location_coord = [
            0.5 + (self.LOCATION_WIDTH / 2) + (self.dist_between_locations) * i
            for i in range(UAVLocation.SIZE - 1)
        ]
        self.location_coord.append(crashLocationX + self.LOCATION_WIDTH / 2)

        # Create rectangular patches at each of those locations
        self.location_rect_vis = [
            mpatches.Rectangle([0.5 + (self.dist_between_locations) * i, 0],
                               self.LOCATION_WIDTH,
                               self.NUM_UAV * 2,
                               fc='w') for i in range(UAVLocation.SIZE - 1)
        ]
        self.location_rect_vis.append(
            mpatches.Rectangle([crashLocationX, 0],
                               self.LOCATION_WIDTH,
                               self.NUM_UAV * 2,
                               fc='w'))
        [
            self.subplot_axes.add_patch(self.location_rect_vis[i])
            for i in range(4)
        ]
        self.comms_line = [
            lines.Line2D([
                0.5 + self.LOCATION_WIDTH +
                (self.dist_between_locations) * i, 0.5 + self.LOCATION_WIDTH +
                (self.dist_between_locations) * i + self.RECT_GAP
            ], [self.NUM_UAV * 0.5 + 0.5, self.NUM_UAV * 0.5 + 0.5],
                         linewidth=3,
                         color='black',
                         visible=False) for i in range(UAVLocation.SIZE - 2)
        ]
        self.comms_line.append(
            lines.Line2D([
                0.5 + self.LOCATION_WIDTH +
                (self.dist_between_locations) * 2, crashLocationX
            ], [self.NUM_UAV * 0.5 + 0.5, self.NUM_UAV * 0.5 + 0.5],
                         linewidth=3,
                         color='black',
                         visible=False))

        # Create location text below rectangles
        locText = ["Base", "Refuel", "Communication", "Surveillance"]
        self.location_rect_txt = [
            plt.text(0.5 + self.dist_between_locations * i +
                     0.5 * self.LOCATION_WIDTH,
                     -0.3,
                     locText[i],
                     ha='center') for i in range(UAVLocation.SIZE - 1)
        ]
        self.location_rect_txt.append(
            plt.text(crashLocationX + 0.5 * self.LOCATION_WIDTH,
                     -0.3,
                     locText[UAVLocation.SIZE - 1],
                     ha='center'))

        # Initialize list of circle objects

        uav_x = self.location_coord[UAVLocation.BASE]

        # Update the member variables storing all the figure objects
        self.uav_circ_vis = [
            mpatches.Circle((uav_x, 1 + uav_id), self.UAV_RADIUS, fc="w")
            for uav_id in range(0, self.NUM_UAV)
        ]
        self.uav_text_vis = [None for uav_id in range(0, self.NUM_UAV)]  # f**k
        self.uav_sensor_vis = [
            mpatches.Wedge((uav_x + self.SENSOR_REL_X, 1 + uav_id),
                           self.SENSOR_LENGTH, -30, 30)
            for uav_id in range(0, self.NUM_UAV)
        ]
        self.uav_actuator_vis = [
            mpatches.Wedge((uav_x, 1 + uav_id + self.ACTUATOR_REL_Y),
                           self.ACTUATOR_HEIGHT, 60, 120)
            for uav_id in range(0, self.NUM_UAV)
        ]

        # The following was executed when we used to check if the environment needed re-drawing: see above.
        # Remove all UAV circle objects from visualization
        #        else:
        #            [self.uav_circ_vis[uav_id].remove() for uav_id in range(0,self.NUM_UAV)]
        #            [self.uav_text_vis[uav_id].remove() for uav_id in range(0,self.NUM_UAV)]
        #            [self.uav_sensor_vis[uav_id].remove() for uav_id in range(0,self.NUM_UAV)]

        # For each UAV:
        # Draw a circle, with text inside = amt fuel remaining
        # Triangle on top of UAV for comms, black = good, red = bad
        # Triangle in front of UAV for surveillance
        sStruct = self.state2Struct(s)

        for uav_id in range(0, self.NUM_UAV):
            # Assign all the variables corresponding to this UAV for this iteration;
            # this could alternately be done with a UAV class whose objects keep track
            # of these variables.  Elect to use lists here since ultimately the state
            # must be a vector anyway.
            # State index corresponding to the location of this uav
            uav_location = sStruct.locations[uav_id]
            uav_fuel = sStruct.fuel[uav_id]
            uav_sensor = sStruct.sensor[uav_id]
            uav_actuator = sStruct.actuator[uav_id]

            # Assign coordinates on figure where UAV should be drawn
            uav_x = self.location_coord[uav_location]
            uav_y = 1 + uav_id

            # Update plot wit this UAV
            self.uav_circ_vis[uav_id] = mpatches.Circle((uav_x, uav_y),
                                                        self.UAV_RADIUS,
                                                        fc="w")
            self.uav_text_vis[uav_id] = plt.text(uav_x - 0.05, uav_y - 0.05,
                                                 uav_fuel)
            if uav_sensor == SensorState.RUNNING:
                objColor = 'black'
            else:
                objColor = 'red'
            self.uav_sensor_vis[uav_id] = mpatches.Wedge(
                (uav_x + self.SENSOR_REL_X, uav_y),
                self.SENSOR_LENGTH,
                -30,
                30,
                color=objColor)

            if uav_actuator == ActuatorState.RUNNING:
                objColor = 'black'
            else:
                objColor = 'red'
            self.uav_actuator_vis[uav_id] = mpatches.Wedge(
                (uav_x, uav_y + self.ACTUATOR_REL_Y),
                self.ACTUATOR_HEIGHT,
                60,
                120,
                color=objColor)

            self.subplot_axes.add_patch(self.uav_circ_vis[uav_id])
            self.subplot_axes.add_patch(self.uav_sensor_vis[uav_id])
            self.subplot_axes.add_patch(self.uav_actuator_vis[uav_id])

        numHealthySurveil = np.sum(
            np.logical_and(sStruct.locations == UAVLocation.SURVEIL,
                           sStruct.sensor))
        # We have comms coverage: draw a line between comms states to show this
        if (any(sStruct.locations == UAVLocation.COMMS)):
            for i in xrange(len(self.comms_line)):
                self.comms_line[i].set_visible(True)
                self.comms_line[i].set_color('black')
                self.subplot_axes.add_line(self.comms_line[i])
            # We also have UAVs in surveillance; color the comms line black
            if numHealthySurveil > 0:
                self.location_rect_vis[len(self.location_rect_vis) -
                                       1].set_color('green')
        plt.draw()
        sleep(0.5)
Пример #22
0
    def showLearning(self, representation):
        if self.valueFunction_fig is None:
            plt.figure("Value Function")
            self.valueFunction_fig = plt.imshow(self.map,
                                                cmap='ValueFunction',
                                                interpolation='nearest',
                                                vmin=self.MIN_RETURN,
                                                vmax=self.MAX_RETURN)
            plt.xticks(np.arange(self.COLS), fontsize=12)
            plt.yticks(np.arange(self.ROWS), fontsize=12)
            # Create quivers for each action. 4 in total
            X = np.arange(self.ROWS) - self.SHIFT
            Y = np.arange(self.COLS)
            X, Y = np.meshgrid(X, Y)
            DX = DY = np.ones(X.shape)
            C = np.zeros(X.shape)
            C[0, 0] = 1  # Making sure C has both 0 and 1
            # length of arrow/width of bax. Less then 0.5 because each arrow is
            # offset, 0.4 looks nice but could be better/auto generated
            arrow_ratio = 0.4
            Max_Ratio_ArrowHead_to_ArrowLength = 0.25
            ARROW_WIDTH = 0.5 * Max_Ratio_ArrowHead_to_ArrowLength / 5.0
            self.upArrows_fig = plt.quiver(Y,
                                           X,
                                           DY,
                                           DX,
                                           C,
                                           units='y',
                                           cmap='Actions',
                                           scale_units="height",
                                           scale=self.ROWS / arrow_ratio,
                                           width=-1 * ARROW_WIDTH)
            self.upArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS) + self.SHIFT
            Y = np.arange(self.COLS)
            X, Y = np.meshgrid(X, Y)
            self.downArrows_fig = plt.quiver(Y,
                                             X,
                                             DY,
                                             DX,
                                             C,
                                             units='y',
                                             cmap='Actions',
                                             scale_units="height",
                                             scale=self.ROWS / arrow_ratio,
                                             width=-1 * ARROW_WIDTH)
            self.downArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS)
            Y = np.arange(self.COLS) - self.SHIFT
            X, Y = np.meshgrid(X, Y)
            self.leftArrows_fig = plt.quiver(Y,
                                             X,
                                             DY,
                                             DX,
                                             C,
                                             units='x',
                                             cmap='Actions',
                                             scale_units="width",
                                             scale=self.COLS / arrow_ratio,
                                             width=ARROW_WIDTH)
            self.leftArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS)
            Y = np.arange(self.COLS) + self.SHIFT
            X, Y = np.meshgrid(X, Y)
            self.rightArrows_fig = plt.quiver(Y,
                                              X,
                                              DY,
                                              DX,
                                              C,
                                              units='x',
                                              cmap='Actions',
                                              scale_units="width",
                                              scale=self.COLS / arrow_ratio,
                                              width=ARROW_WIDTH)
            self.rightArrows_fig.set_clim(vmin=0, vmax=1)
            plt.show()
        plt.figure("Value Function")
        V = np.zeros((self.ROWS, self.COLS))
        # Boolean 3 dimensional array. The third array highlights the action.
        # Thie mask is used to see in which cells what actions should exist
        Mask = np.ones((self.COLS, self.ROWS, self.actions_num), dtype='bool')
        arrowSize = np.zeros((self.COLS, self.ROWS, self.actions_num),
                             dtype='float')
        # 0 = suboptimal action, 1 = optimal action
        arrowColors = np.zeros((self.COLS, self.ROWS, self.actions_num),
                               dtype='uint8')
        for r in xrange(self.ROWS):
            for c in xrange(self.COLS):
                if self.map[r, c] == self.BLOCKED:
                    V[r, c] = 0
                if self.map[r, c] == self.GOAL:
                    V[r, c] = self.MAX_RETURN
                if self.map[r, c] == self.PIT:
                    V[r, c] = self.MIN_RETURN
                if self.map[r, c] == self.EMPTY or self.map[r,
                                                            c] == self.START:
                    s = np.array([r, c])
                    As = self.possibleActions(s)
                    terminal = self.isTerminal(s)
                    Qs = representation.Qs(s, terminal)
                    bestA = representation.bestActions(s, terminal, As)
                    V[r, c] = max(Qs[As])
                    Mask[c, r, As] = False
                    arrowColors[c, r, bestA] = 1

                    for i in xrange(len(As)):
                        a = As[i]
                        Q = Qs[i]
                        value = linearMap(Q, self.MIN_RETURN, self.MAX_RETURN,
                                          0, 1)
                        arrowSize[c, r, a] = value
        # Show Value Function
        self.valueFunction_fig.set_data(V)
        # Show Policy Up Arrows
        DX = arrowSize[:, :, 0]
        DY = np.zeros((self.ROWS, self.COLS))
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 0])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 0])
        C = np.ma.masked_array(arrowColors[:, :, 0], mask=Mask[:, :, 0])
        self.upArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Down Arrows
        DX = -arrowSize[:, :, 1]
        DY = np.zeros((self.ROWS, self.COLS))
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 1])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 1])
        C = np.ma.masked_array(arrowColors[:, :, 1], mask=Mask[:, :, 1])
        self.downArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Left Arrows
        DX = np.zeros((self.ROWS, self.COLS))
        DY = -arrowSize[:, :, 2]
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 2])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 2])
        C = np.ma.masked_array(arrowColors[:, :, 2], mask=Mask[:, :, 2])
        self.leftArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Right Arrows
        DX = np.zeros((self.ROWS, self.COLS))
        DY = arrowSize[:, :, 3]
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 3])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 3])
        C = np.ma.masked_array(arrowColors[:, :, 3], mask=Mask[:, :, 3])
        self.rightArrows_fig.set_UVC(DY, DX, C)
        plt.draw()
Пример #23
0
    def showDomain(self, a=0):
        s = self.state
        plt.figure("Domain")

        if self.networkGraph is None:  # or self.networkPos is None:
            self.networkGraph = nx.Graph()
            # enumerate all computer_ids, simulatenously iterating through
            # neighbors list and compstatus
            for computer_id, (neighbors,
                              compstatus) in enumerate(zip(self.NEIGHBORS, s)):
                # Add a node to network for each computer
                self.networkGraph.add_node(computer_id, node_color="w")
            for uniqueEdge in self.UNIQUE_EDGES:
                self.networkGraph.add_edge(
                    uniqueEdge[0], uniqueEdge[1],
                    edge_color="k")  # Add an edge between each neighbor
            self.networkPos = nx.circular_layout(self.networkGraph)
            nx.draw_networkx_nodes(self.networkGraph,
                                   self.networkPos,
                                   node_color="w")
            nx.draw_networkx_edges(self.networkGraph,
                                   self.networkPos,
                                   edges_color="k")
            nx.draw_networkx_labels(self.networkGraph, self.networkPos)
            plt.show()
        else:
            plt.clf()
            blackEdges = []
            redEdges = []
            greenNodes = []
            redNodes = []
            for computer_id, (neighbors,
                              compstatus) in enumerate(zip(self.NEIGHBORS, s)):
                if (compstatus == self.RUNNING):
                    greenNodes.append(computer_id)
                else:
                    redNodes.append(computer_id)
            # Iterate through all unique edges
            for uniqueEdge in self.UNIQUE_EDGES:
                if (s[uniqueEdge[0]] == self.RUNNING
                        and s[uniqueEdge[1]] == self.RUNNING):
                    # Then both computers are working
                    blackEdges.append(uniqueEdge)
                else:  # If either computer is BROKEN, make the edge red
                    redEdges.append(uniqueEdge)
            # "if redNodes", etc. - only draw things in the network if these lists aren't empty / null
            if redNodes:
                nx.draw_networkx_nodes(self.networkGraph,
                                       self.networkPos,
                                       nodelist=redNodes,
                                       node_color="r",
                                       linewidths=2)
            if greenNodes:
                nx.draw_networkx_nodes(self.networkGraph,
                                       self.networkPos,
                                       nodelist=greenNodes,
                                       node_color="w",
                                       linewidths=2)
            if blackEdges:
                nx.draw_networkx_edges(self.networkGraph,
                                       self.networkPos,
                                       edgelist=blackEdges,
                                       edge_color="k",
                                       width=2,
                                       style='solid')
            if redEdges:
                nx.draw_networkx_edges(self.networkGraph,
                                       self.networkPos,
                                       edgelist=redEdges,
                                       edge_color="k",
                                       width=2,
                                       style='dotted')
        nx.draw_networkx_labels(self.networkGraph, self.networkPos)
        plt.figure("Domain").canvas.draw()
        plt.figure("Domain").canvas.flush_events()
 def showExploration(self):
     plt.figure()
     plt.scatter([i[0] for i in self.visited_states],[i[1] for i in self.visited_states], color='k')
     plt.scatter([self.src['x']],[self.src['y']], color='r')
     plt.scatter([self.target['x']],[self.target['y']], color='b')
     plt.show()
Пример #25
0
    def batchDiscover(self, td_errors, phi, states):
        # Discovers features using iFDD in batch setting.
        # TD_Error: p-by-1 (How much error observed for each sample)
        # phi: n-by-p features corresponding to all samples (each column corresponds to one sample)
        # self.batchThreshold is the minimum relevance value for the feature to
        # be expanded
        SHOW_PLOT = 0  # Shows the histogram of relevances
        maxDiscovery = self.maxBatchDiscovery
        n = self.features_num  # number of features
        p = len(td_errors)  # Number of samples
        counts = np.zeros((n, n))
        relevances = np.zeros((n, n))
        for i in xrange(p):
            phiphiT = np.outer(phi[i, :], phi[i,:])
            if self.iFDDPlus:
                relevances += phiphiT * td_errors[i]
            else:
                relevances += phiphiT * abs(td_errors[i])
            counts += phiphiT
        # Remove Diagonal and upper part of the relevances as they are useless
        relevances = np.triu(relevances, 1)
        non_zero_index = np.nonzero(relevances)
        if self.iFDDPlus:
            # Calculate relevances based on theoretical results of ICML 2013
            # potential submission
            relevances[non_zero_index] = np.divide(
                np.abs(relevances[non_zero_index]),
                np.sqrt(counts[non_zero_index]))
        else:
            # Based on Geramifard11_ICML Paper
            relevances[non_zero_index] = relevances[non_zero_index]

        # Find indexes to non-zero excited pairs
        # F1 and F2 are the parents of the potentials
        (F1, F2) = relevances.nonzero()
        relevances = relevances[F1, F2]
        if len(relevances) == 0:
            # No feature to add
            self.logger.debug("iFDD Batch: Max Relevance = 0")
            return False

        if SHOW_PLOT:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.ioff()
            plt.plot(e_vec, linewidth=3)
            plt.show()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]
        # Add top <maxDiscovery> features
        self.logger.debug(
            "iFDD Batch: Max Relevance = {0:g}".format(max_relevance))
        added_feature = False
        new_features = 0
        for j in xrange(len(relevances)):
            if new_features >= maxDiscovery:
                break
            max_index = sortedIndices[j]
            f1 = F1[max_index]
            f2 = F2[max_index]
            relevance = relevances[max_index]
            if relevance > self.batchThreshold:
                # print "Inspecting",
                # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2)
                if self.inspectPair(f1, f2, np.inf):
                    self.logger.debug(
                        'New Feature %d: %s, Relevance = %0.3f' %
                        (self.features_num - 1, self.getStrFeatureSet(self.features_num - 1), relevances[max_index]))
                    new_features += 1
                    added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        return (
            # A signal to see if the representation has been expanded or not
            added_feature
        )
Пример #26
0
 def showDomain(self, a=0):
     s = self.state
     if self.networkGraph is None:  # or self.networkPos is None:
         self.networkGraph = nx.Graph()
         # enumerate all computer_ids, simulatenously iterating through
         # neighbors list and compstatus
         for computer_id, (neighbors, compstatus) in enumerate(zip(self.NEIGHBORS, s)):
             # Add a node to network for each computer
             self.networkGraph.add_node(computer_id, node_color="w")
         for uniqueEdge in self.UNIQUE_EDGES:
                 self.networkGraph.add_edge(
                     uniqueEdge[0],
                     uniqueEdge[1],
                     edge_color="k")  # Add an edge between each neighbor
         self.networkPos = nx.circular_layout(self.networkGraph)
         nx.draw_networkx_nodes(
             self.networkGraph,
             self.networkPos,
             node_color="w")
         nx.draw_networkx_edges(
             self.networkGraph,
             self.networkPos,
             edges_color="k")
         nx.draw_networkx_labels(self.networkGraph, self.networkPos)
         plt.show()
     else:
         plt.clf()
         blackEdges = []
         redEdges = []
         greenNodes = []
         redNodes = []
         for computer_id, (neighbors, compstatus) in enumerate(zip(self.NEIGHBORS, s)):
             if(compstatus == self.RUNNING):
                 greenNodes.append(computer_id)
             else:
                 redNodes.append(computer_id)
         # Iterate through all unique edges
         for uniqueEdge in self.UNIQUE_EDGES:
             if(s[uniqueEdge[0]] == self.RUNNING and s[uniqueEdge[1]] == self.RUNNING):
                 # Then both computers are working
                 blackEdges.append(uniqueEdge)
             else:  # If either computer is BROKEN, make the edge red
                 redEdges.append(uniqueEdge)
         # "if redNodes", etc. - only draw things in the network if these lists aren't empty / null
         if redNodes:
             nx.draw_networkx_nodes(
                 self.networkGraph,
                 self.networkPos,
                 nodelist=redNodes,
                 node_color="r",
                 linewidths=2)
         if greenNodes:
             nx.draw_networkx_nodes(
                 self.networkGraph,
                 self.networkPos,
                 nodelist=greenNodes,
                 node_color="w",
                 linewidths=2)
         if blackEdges:
             nx.draw_networkx_edges(
                 self.networkGraph,
                 self.networkPos,
                 edgelist=blackEdges,
                 edge_color="k",
                 width=2,
                 style='solid')
         if redEdges:
             nx.draw_networkx_edges(
                 self.networkGraph,
                 self.networkPos,
                 edgelist=redEdges,
                 edge_color="k",
                 width=2,
                 style='dotted')
     nx.draw_networkx_labels(self.networkGraph, self.networkPos)
     plt.draw()
Пример #27
0
    def showLearning(self, representation):
        if self.valueFunction_fig is None:
            plt.figure("Value Function")
            self.valueFunction_fig = plt.imshow(
                self.map,
                cmap='ValueFunction',
                interpolation='nearest',
                vmin=self.MIN_RETURN,
                vmax=self.MAX_RETURN)
            plt.xticks(np.arange(self.COLS), fontsize=12)
            plt.yticks(np.arange(self.ROWS), fontsize=12)
            # Create quivers for each action. 4 in total
            X = np.arange(self.ROWS) - self.SHIFT
            Y = np.arange(self.COLS)
            X, Y = np.meshgrid(X, Y)
            DX = DY = np.ones(X.shape)
            C = np.zeros(X.shape)
            C[0, 0] = 1  # Making sure C has both 0 and 1
            # length of arrow/width of bax. Less then 0.5 because each arrow is
            # offset, 0.4 looks nice but could be better/auto generated
            arrow_ratio = 0.4
            Max_Ratio_ArrowHead_to_ArrowLength = 0.25
            ARROW_WIDTH = 0.5 * Max_Ratio_ArrowHead_to_ArrowLength / 5.0
            self.upArrows_fig = plt.quiver(
                Y,
                X,
                DY,
                DX,
                C,
                units='y',
                cmap='Actions',
                scale_units="height",
                scale=self.ROWS /
                arrow_ratio,
                width=-
                1 *
                ARROW_WIDTH)
            self.upArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS) + self.SHIFT
            Y = np.arange(self.COLS)
            X, Y = np.meshgrid(X, Y)
            self.downArrows_fig = plt.quiver(
                Y,
                X,
                DY,
                DX,
                C,
                units='y',
                cmap='Actions',
                scale_units="height",
                scale=self.ROWS /
                arrow_ratio,
                width=-
                1 *
                ARROW_WIDTH)
            self.downArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS)
            Y = np.arange(self.COLS) - self.SHIFT
            X, Y = np.meshgrid(X, Y)
            self.leftArrows_fig = plt.quiver(
                Y,
                X,
                DY,
                DX,
                C,
                units='x',
                cmap='Actions',
                scale_units="width",
                scale=self.COLS /
                arrow_ratio,
                width=ARROW_WIDTH)
            self.leftArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS)
            Y = np.arange(self.COLS) + self.SHIFT
            X, Y = np.meshgrid(X, Y)
            self.rightArrows_fig = plt.quiver(
                Y,
                X,
                DY,
                DX,
                C,
                units='x',
                cmap='Actions',
                scale_units="width",
                scale=self.COLS /
                arrow_ratio,
                width=ARROW_WIDTH)
            self.rightArrows_fig.set_clim(vmin=0, vmax=1)
            plt.show()
        plt.figure("Value Function")
        V = np.zeros((self.ROWS, self.COLS))
        # Boolean 3 dimensional array. The third array highlights the action.
        # Thie mask is used to see in which cells what actions should exist
        Mask = np.ones(
            (self.COLS,
             self.ROWS,
             self.actions_num),
            dtype='bool')
        arrowSize = np.zeros(
            (self.COLS,
             self.ROWS,
             self.actions_num),
            dtype='float')
        # 0 = suboptimal action, 1 = optimal action
        arrowColors = np.zeros(
            (self.COLS,
             self.ROWS,
             self.actions_num),
            dtype='uint8')
        for r in xrange(self.ROWS):
            for c in xrange(self.COLS):
                if self.map[r, c] == self.BLOCKED:
                    V[r, c] = 0
                if self.map[r, c] == self.GOAL:
                    V[r, c] = self.MAX_RETURN
                if self.map[r, c] == self.PIT:
                    V[r, c] = self.MIN_RETURN
                if self.map[r, c] == self.EMPTY or self.map[r, c] == self.START:
                    s = np.array([r, c])
                    As = self.possibleActions(s)
                    terminal = self.isTerminal(s)
                    Qs = representation.Qs(s, terminal)
                    bestA = representation.bestActions(s, terminal, As)
                    V[r, c] = max(Qs[As])
                    Mask[c, r, As] = False
                    arrowColors[c, r, bestA] = 1

                    for i in xrange(len(As)):
                        a = As[i]
                        Q = Qs[i]
                        value = linearMap(
                            Q,
                            self.MIN_RETURN,
                            self.MAX_RETURN,
                            0,
                            1)
                        arrowSize[c, r, a] = value
        # Show Value Function
        self.valueFunction_fig.set_data(V)
        # Show Policy Up Arrows
        DX = arrowSize[:, :, 0]
        DY = np.zeros((self.ROWS, self.COLS))
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 0])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 0])
        C  = np.ma.masked_array(arrowColors[:, :, 0], mask=Mask[:,:, 0])
        self.upArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Down Arrows
        DX = -arrowSize[:, :, 1]
        DY = np.zeros((self.ROWS, self.COLS))
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 1])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 1])
        C  = np.ma.masked_array(arrowColors[:, :, 1], mask=Mask[:,:, 1])
        self.downArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Left Arrows
        DX = np.zeros((self.ROWS, self.COLS))
        DY = -arrowSize[:, :, 2]
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 2])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 2])
        C  = np.ma.masked_array(arrowColors[:, :, 2], mask=Mask[:,:, 2])
        self.leftArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Right Arrows
        DX = np.zeros((self.ROWS, self.COLS))
        DY = arrowSize[:, :, 3]
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 3])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 3])
        C  = np.ma.masked_array(arrowColors[:, :, 3], mask=Mask[:,:, 3])
        self.rightArrows_fig.set_UVC(DY, DX, C)
        plt.draw()
Пример #28
0
Файл: PST.py Проект: okkhoy/rlpy
    def showDomain(self, a=0):
        s = self.state
        if self.domain_fig is None:
            plt.figure("Domain")
            self.domain_fig = plt.figure(
                1, (UAVLocation.SIZE * self.dist_between_locations + 1, self.NUM_UAV + 1))
            plt.show()
        plt.clf()
         # Draw the environment
         # Allocate horizontal 'lanes' for UAVs to traverse

        # Formerly, we checked if this was the first time plotting; wedge shapes cannot be removed from
        # matplotlib environment, nor can their properties be changed, without clearing the figure
        # Thus, we must redraw the figure on each timestep
        #        if self.location_rect_vis is None:
        # Figure with x width corresponding to number of location states, UAVLocation.SIZE
        # and rows (lanes) set aside in y for each UAV (NUM_UAV total lanes).
        # Add buffer of 1
        self.subplot_axes = self.domain_fig.add_axes(
            [0, 0, 1, 1], frameon=False, aspect=1.)
        crashLocationX = 2 * \
            (self.dist_between_locations) * (UAVLocation.SIZE - 1)
        self.subplot_axes.set_xlim(0, 1 + crashLocationX + self.RECT_GAP)
        self.subplot_axes.set_ylim(0, 1 + self.NUM_UAV)
        self.subplot_axes.xaxis.set_visible(False)
        self.subplot_axes.yaxis.set_visible(False)

        # Assign coordinates of each possible uav location on figure
        self.location_coord = [0.5 + (old_div(self.LOCATION_WIDTH, 2)) +
                               (self.dist_between_locations) * i for i in range(UAVLocation.SIZE - 1)]
        self.location_coord.append(crashLocationX + old_div(self.LOCATION_WIDTH, 2))

         # Create rectangular patches at each of those locations
        self.location_rect_vis = [mpatches.Rectangle(
            [0.5 + (self.dist_between_locations) * i,
             0],
            self.LOCATION_WIDTH,
            self.NUM_UAV * 2,
            fc='w') for i in range(UAVLocation.SIZE - 1)]
        self.location_rect_vis.append(
            mpatches.Rectangle([crashLocationX,
                                0],
                               self.LOCATION_WIDTH,
                               self.NUM_UAV * 2,
                               fc='w'))
        [self.subplot_axes.add_patch(self.location_rect_vis[i])
         for i in range(4)]
        self.comms_line = [lines.Line2D(
            [0.5 + self.LOCATION_WIDTH + (self.dist_between_locations) * i,
             0.5 + self.LOCATION_WIDTH + (
                 self.dist_between_locations) * i + self.RECT_GAP],
            [self.NUM_UAV * 0.5 + 0.5,
             self.NUM_UAV * 0.5 + 0.5],
            linewidth=3,
            color='black',
            visible=False) for i in range(UAVLocation.SIZE - 2)]
        self.comms_line.append(
            lines.Line2D(
                [0.5 + self.LOCATION_WIDTH + (self.dist_between_locations) * 2,
                 crashLocationX],
                [self.NUM_UAV * 0.5 + 0.5,
                 self.NUM_UAV * 0.5 + 0.5],
                linewidth=3,
                color='black',
                visible=False))

        # Create location text below rectangles
        locText = ["Base", "Refuel", "Communication", "Surveillance"]
        self.location_rect_txt = [plt.text(
            0.5 + self.dist_between_locations * i + 0.5 * self.LOCATION_WIDTH,
            -0.3,
            locText[i],
            ha='center') for i in range(UAVLocation.SIZE - 1)]
        self.location_rect_txt.append(
            plt.text(crashLocationX + 0.5 * self.LOCATION_WIDTH, -0.3,
                     locText[UAVLocation.SIZE - 1], ha='center'))

        # Initialize list of circle objects

        uav_x = self.location_coord[UAVLocation.BASE]

        # Update the member variables storing all the figure objects
        self.uav_circ_vis = [mpatches.Circle(
            (uav_x,
             1 + uav_id),
            self.UAV_RADIUS,
            fc="w") for uav_id in range(0,
                                        self.NUM_UAV)]
        self.uav_text_vis = [None for uav_id in range(0, self.NUM_UAV)]  # f**k
        self.uav_sensor_vis = [mpatches.Wedge(
            (uav_x + self.SENSOR_REL_X,
             1 + uav_id),
            self.SENSOR_LENGTH,
            -30,
            30) for uav_id in range(0,
                                    self.NUM_UAV)]
        self.uav_actuator_vis = [mpatches.Wedge(
            (uav_x,
             1 + uav_id + self.ACTUATOR_REL_Y),
            self.ACTUATOR_HEIGHT,
            60,
            120) for uav_id in range(0,
                                     self.NUM_UAV)]

        # The following was executed when we used to check if the environment needed re-drawing: see above.
                # Remove all UAV circle objects from visualization
        #        else:
        #            [self.uav_circ_vis[uav_id].remove() for uav_id in range(0,self.NUM_UAV)]
        #            [self.uav_text_vis[uav_id].remove() for uav_id in range(0,self.NUM_UAV)]
        #            [self.uav_sensor_vis[uav_id].remove() for uav_id in range(0,self.NUM_UAV)]

        # For each UAV:
        # Draw a circle, with text inside = amt fuel remaining
        # Triangle on top of UAV for comms, black = good, red = bad
        # Triangle in front of UAV for surveillance
        sStruct = self.state2Struct(s)

        for uav_id in range(0, self.NUM_UAV):
            # Assign all the variables corresponding to this UAV for this iteration;
            # this could alternately be done with a UAV class whose objects keep track
            # of these variables.  Elect to use lists here since ultimately the state
            # must be a vector anyway.
            # State index corresponding to the location of this uav
            uav_location = sStruct.locations[uav_id]
            uav_fuel = sStruct.fuel[uav_id]
            uav_sensor = sStruct.sensor[uav_id]
            uav_actuator = sStruct.actuator[uav_id]

            # Assign coordinates on figure where UAV should be drawn
            uav_x = self.location_coord[uav_location]
            uav_y = 1 + uav_id

            # Update plot wit this UAV
            self.uav_circ_vis[uav_id] = mpatches.Circle(
                (uav_x, uav_y), self.UAV_RADIUS, fc="w")
            self.uav_text_vis[uav_id] = plt.text(
                uav_x - 0.05,
                uav_y - 0.05,
                uav_fuel)
            if uav_sensor == SensorState.RUNNING:
                objColor = 'black'
            else:
                objColor = 'red'
            self.uav_sensor_vis[uav_id] = mpatches.Wedge(
                (uav_x + self.SENSOR_REL_X,
                 uav_y),
                self.SENSOR_LENGTH,
                -30,
                30,
                color=objColor)

            if uav_actuator == ActuatorState.RUNNING:
                objColor = 'black'
            else:
                objColor = 'red'
            self.uav_actuator_vis[uav_id] = mpatches.Wedge(
                (uav_x,
                 uav_y + self.ACTUATOR_REL_Y),
                self.ACTUATOR_HEIGHT,
                60,
                120,
                color=objColor)

            self.subplot_axes.add_patch(self.uav_circ_vis[uav_id])
            self.subplot_axes.add_patch(self.uav_sensor_vis[uav_id])
            self.subplot_axes.add_patch(self.uav_actuator_vis[uav_id])

        numHealthySurveil = np.sum(
            np.logical_and(
                sStruct.locations == UAVLocation.SURVEIL,
                sStruct.sensor))
        # We have comms coverage: draw a line between comms states to show this
        if (any(sStruct.locations == UAVLocation.COMMS)):
            for i in range(len(self.comms_line)):
                self.comms_line[i].set_visible(True)
                self.comms_line[i].set_color('black')
                self.subplot_axes.add_line(self.comms_line[i])
            # We also have UAVs in surveillance; color the comms line black
            if numHealthySurveil > 0:
                self.location_rect_vis[
                    len(self.location_rect_vis) - 1].set_color('green')
        plt.figure("Domain").canvas.draw()
        plt.figure("Domain").canvas.flush_events()
        sleep(0.5)
Пример #29
0
    def batchDiscover(self, td_errors, phi, states):
        # Discovers features using iFDD in batch setting.
        # TD_Error: p-by-1 (How much error observed for each sample)
        # phi: n-by-p features corresponding to all samples (each column corresponds to one sample)
        # self.batchThreshold is the minimum relevance value for the feature to
        # be expanded
        SHOW_PLOT = 0  # Shows the histogram of relevances
        maxDiscovery = self.maxBatchDiscovery
        n = self.features_num  # number of features
        p = len(td_errors)  # Number of samples
        counts = np.zeros((n, n))
        relevances = np.zeros((n, n))
        for i in xrange(p):
            phiphiT = np.outer(phi[i, :], phi[i, :])
            if self.iFDDPlus:
                relevances += phiphiT * td_errors[i]
            else:
                relevances += phiphiT * abs(td_errors[i])
            counts += phiphiT
        # Remove Diagonal and upper part of the relevances as they are useless
        relevances = np.triu(relevances, 1)
        non_zero_index = np.nonzero(relevances)
        if self.iFDDPlus:
            # Calculate relevances based on theoretical results of ICML 2013
            # potential submission
            relevances[non_zero_index] = np.divide(
                np.abs(relevances[non_zero_index]),
                np.sqrt(counts[non_zero_index]))
        else:
            # Based on Geramifard11_ICML Paper
            relevances[non_zero_index] = relevances[non_zero_index]

        # Find indexes to non-zero excited pairs
        # F1 and F2 are the parents of the potentials
        (F1, F2) = relevances.nonzero()
        relevances = relevances[F1, F2]
        if len(relevances) == 0:
            # No feature to add
            self.logger.debug("iFDD Batch: Max Relevance = 0")
            return False

        if SHOW_PLOT:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.ioff()
            plt.plot(e_vec, linewidth=3)
            plt.show()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]
        # Add top <maxDiscovery> features
        self.logger.debug(
            "iFDD Batch: Max Relevance = {0:g}".format(max_relevance))
        added_feature = False
        new_features = 0
        for j in xrange(len(relevances)):
            if new_features >= maxDiscovery:
                break
            max_index = sortedIndices[j]
            f1 = F1[max_index]
            f2 = F2[max_index]
            relevance = relevances[max_index]
            if relevance > self.batchThreshold:
                # print "Inspecting",
                # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2)
                if self.inspectPair(f1, f2, np.inf):
                    self.logger.debug(
                        'New Feature %d: %s, Relevance = %0.3f' %
                        (self.features_num - 1,
                         self.getStrFeatureSet(self.features_num - 1),
                         relevances[max_index]))
                    new_features += 1
                    added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        return (
            # A signal to see if the representation has been expanded or not
            added_feature)