def show_domain(self, a=0): # Draw the environment s = self.state world = np.zeros((self.blocks, self.blocks), "uint8") undrawn_blocks = np.arange(self.blocks) while len(undrawn_blocks): A = undrawn_blocks[0] B = s[A] undrawn_blocks = undrawn_blocks[1:] if B == A: # => A is on Table world[0, A] = A + 1 # 0 is white thats why! else: # See if B is already drawn i, j = findElemArray2D(B + 1, world) if len(i): world[i + 1, j] = A + 1 # 0 is white thats why! else: # Put it in the back of the list undrawn_blocks = np.hstack((undrawn_blocks, [A])) if self.domain_fig is None: plt.figure("Domain") self.domain_fig = plt.imshow( world, cmap="BlocksWorld", origin="lower", interpolation="nearest") # ,vmin=0,vmax=self.blocks) plt.xticks(np.arange(self.blocks), fontsize=FONTSIZE) plt.yticks(np.arange(self.blocks), fontsize=FONTSIZE) # pl.tight_layout() plt.axis("off") plt.show() else: self.domain_fig.set_data(world) plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()
def show_domain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow(s, cmap="FlipBoard", interpolation="nearest", vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot(a_col, a_row, "kx", markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, "kx", markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def _plot_impl(self, y="return", x="learning_steps", save=False, show=True): labels = rlpy.tools.results.default_labels performance_fig = plt.figure("Performance") res = self.result plt.plot(res[x], res[y], lw=2, markersize=4, marker=MARKERS[0]) plt.xlim(0, res[x][-1] * 1.01) y_arr = np.array(res[y]) m = y_arr.min() M = y_arr.max() delta = M - m if delta > 0: plt.ylim(m - 0.1 * delta - 0.1, M + 0.1 * delta + 0.1) xlabel = labels[x] if x in labels else x ylabel = labels[y] if y in labels else y plt.xlabel(xlabel, fontsize=16) plt.ylabel(ylabel, fontsize=16) if save: path = os.path.join(self.full_path, "{:03}-performance.pdf".format(self.exp_id)) performance_fig.savefig(path, transparent=True, pad_inches=0.1) if show: plt.ioff() plt.show()
def show_domain(self, a=0): s = self.state # Draw the environment fig = plt.figure("FiftyChain") if self.circles is None: self.domain_fig = plt.subplot(3, 1, 1) plt.figure(1, (self.chain_size * 2 / 10.0, 2)) self.domain_fig.set_xlim(0, self.chain_size * 2 / 10.0) self.domain_fig.set_ylim(0, 2) # Make the last one double circle self.domain_fig.add_patch( mpatches.Circle( (0.2 + 2 / 10.0 * (self.chain_size - 1), self.Y), self.RADIUS * 1.1, fc="w", )) self.domain_fig.xaxis.set_visible(False) self.domain_fig.yaxis.set_visible(False) self.circles = [ mpatches.Circle((0.2 + 2 / 10.0 * i, self.Y), self.RADIUS, fc="w") for i in range(self.chain_size) ] for i in range(self.chain_size): self.domain_fig.add_patch(self.circles[i]) plt.show() for p in self.circles: p.set_facecolor("w") for p in self.GOAL_STATES: self.circles[p].set_facecolor("g") self.circles[s].set_facecolor("k") fig.canvas.draw() fig.canvas.flush_events()
def show_learning(self, representation): good_pol = SwimmerPolicy(representation=representation, epsilon=0) id1 = 2 id2 = 3 res = 200 s = np.zeros(self.state_space_dims) l1 = np.linspace(self.statespace_limits[id1, 0], self.statespace_limits[id1, 1], res) l2 = np.linspace(self.statespace_limits[id2, 0], self.statespace_limits[id2, 1], res) pi = np.zeros((res, res), "uint8") good_pi = np.zeros((res, res), "uint8") V = np.zeros((res, res)) for row, x1 in enumerate(l1): for col, x2 in enumerate(l2): s[id1] = x1 s[id2] = x2 # Array of Q-function evaluated at all possible actions at # state s Qs = representation.Qs(s, False) # Assign pi to be optimal action (which maximizes Q-function) maxQ = np.max(Qs) pi[row, col] = np.random.choice(np.arange(len(Qs))[Qs == maxQ]) good_pi[row, col] = good_pol.pi(s, False, np.arange(self.num_actions)) # Assign V to be the value of the Q-function under optimal # action V[row, col] = maxQ self._plot_policy( pi, title="Learned Policy", ylim=self.statespace_limits[id1], xlim=self.statespace_limits[id2], ) self._plot_policy( good_pi, title="Good Policy", var="good_policy_fig", ylim=self.statespace_limits[id1], xlim=self.statespace_limits[id2], ) self._plot_valfun(V, ylim=self.statespace_limits[id1], xlim=self.statespace_limits[id2]) if self.policy_fig is None or self.valueFunction_fig is None: plt.show()
def _init_domain_figure(self): # Initialize the figure self.domain_fig = plt.figure("CartPole {}".format(self.NAME)) self.domain_ax = self.domain_fig.add_axes([0, 0, 1, 1], frameon=True, aspect=1.0) self.pendulum_arm = lines.Line2D([], [], linewidth=self.PEND_WIDTH, color="black") self.cart_box = mpatches.Rectangle( [0, self.PENDULUM_PIVOT_Y - self.RECT_HEIGHT / 2], self.RECT_WIDTH, self.RECT_HEIGHT, alpha=0.4, ) self.cart_blob = mpatches.Rectangle( [0, self.PENDULUM_PIVOT_Y - self.BLOB_WIDTH / 2], self.BLOB_WIDTH, self.BLOB_WIDTH, alpha=0.4, ) self.domain_ax.add_patch(self.cart_box) self.domain_ax.add_line(self.pendulum_arm) self.domain_ax.add_patch(self.cart_blob) # Draw Ground groundPath = mpath.Path(self.GROUND_VERTS) groundPatch = mpatches.PathPatch(groundPath, hatch="//") self.domain_ax.add_patch(groundPatch) self.time_text = self.domain_ax.text(self.POSITION_LIMITS[1], self.LENGTH, "") self.reward_text = self.domain_ax.text(self.POSITION_LIMITS[0], self.LENGTH, "") # Allow room for pendulum to swing without getting cut off on graph viewable_dist = self.LENGTH + 0.5 if (self.POSITION_LIMITS[0] < -100 * self.LENGTH or self.POSITION_LIMITS[1] > 100 * self.LENGTH): # We have huge position limits, limit the figure width so # cart is still visible self.domain_ax.set_xlim(-viewable_dist, viewable_dist) else: self.domain_ax.set_xlim( self.POSITION_LIMITS[0] - viewable_dist, self.POSITION_LIMITS[1] + viewable_dist, ) self.domain_ax.set_ylim(-viewable_dist, viewable_dist) self.domain_ax.set_aspect("equal") plt.show()
def show_domain(self, a): s = self.state # Draw the environment fig = plt.figure("IntruderMonitoring") if self.domain_fig is None: self.domain_fig = plt.imshow( self.map, cmap="IntruderMonitoring", interpolation="nearest", vmin=0, vmax=3, ) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) plt.show() if self.ally_fig is not None: self.ally_fig.pop(0).remove() self.intruder_fig.pop(0).remove() s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2)) s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2)) self.ally_fig = plt.plot( s_ally[:, 1], s_ally[:, 0], "bo", markersize=30.0, alpha=0.7, markeredgecolor="k", markeredgewidth=2, ) self.intruder_fig = plt.plot( s_intruder[:, 1], s_intruder[:, 0], "g>", color="gray", markersize=30.0, alpha=0.7, markeredgecolor="k", markeredgewidth=2, ) fig.canvas.draw() fig.canvas.flush_events()
def batch_discover(self, td_errors, phi, states): """ :param td_errors: p-by-1 vector, error associated with each state :param phi: p-by-n matrix, vector-valued feature function evaluated at each state. :param states: p-by-(statedimension) matrix, each state under test. Discovers features using OMPTD 1. Find the index of remaining features in the bag \n 2. Calculate the inner product of each feature with the TD_Error vector \n 3. Add the top max_batch_discovery features to the selected features \n OUTPUT: Boolean indicating expansion of features """ if len(self.remaining_features) == 0: # No More features to Expand return False self.calculate_full_phi_normalized(states) relevances = np.zeros(len(self.remaining_features)) for i, f in enumerate(self.remaining_features): phi_f = self.fullphi[:, f] relevances[i] = np.abs(np.dot(phi_f, td_errors)) if self.SHOW_RELEVANCES: e_vec = relevances.flatten() e_vec = e_vec[e_vec != 0] e_vec = np.sort(e_vec) plt.plot(e_vec, linewidth=3) plt.ioff() plt.show() plt.ion() # Sort based on relevances # We want high to low hence the reverse: [::-1] sortedIndices = np.argsort(relevances)[::-1] max_relevance = relevances[sortedIndices[0]] # Add top <maxDiscovery> features self.logger.debug("OMPTD Batch: Max Relevance = %0.3f" % max_relevance) added_feature = False to_be_deleted = [] # Record the indices of items to be removed for j in range(min(self.max_batch_discovery, len(relevances))): max_index = sortedIndices[j] f = self.remaining_features[max_index] relevance = relevances[max_index] # print "Inspecting %s" % str(list(self.iFDD.getFeature(f).f_set)) if relevance >= self.batch_threshold: self.logger.debug("New Feature %d: %s, Relevance = %0.3f" % ( self.features_num, str(np.sort(list(self.iFDD.getFeature(f).f_set))), relevances[max_index], )) to_be_deleted.append(max_index) self.selected_features.append(f) self.features_num += 1 added_feature = True else: # Because the list is sorted, there is no use to look at the # others break self.remaining_features = np.delete(self.remaining_features, to_be_deleted) return added_feature
def show_domain(self, a=0): s = self.state if self.domain_fig is None: plt.figure("Domain") self.domain_fig = plt.figure( 1, (UAVLocation.SIZE * self.dist_between_locations + 1, self.NUM_UAV + 1), ) plt.show() plt.clf() # Draw the environment # Allocate horizontal 'lanes' for UAVs to traverse # Formerly, we checked if this was the first time plotting; wedge shapes cannot be removed from # matplotlib environment, nor can their properties be changed, without clearing the figure # Thus, we must redraw the figure on each timestep # if self.location_rect_vis is None: # Figure with x width corresponding to number of location states, UAVLocation.SIZE # and rows (lanes) set aside in y for each UAV (NUM_UAV total lanes). # Add buffer of 1 self.subplot_axes = self.domain_fig.add_axes([0, 0, 1, 1], frameon=False, aspect=1.0) crashLocationX = 2 * (self.dist_between_locations) * ( UAVLocation.SIZE - 1) self.subplot_axes.set_xlim(0, 1 + crashLocationX + self.RECT_GAP) self.subplot_axes.set_ylim(0, 1 + self.NUM_UAV) self.subplot_axes.xaxis.set_visible(False) self.subplot_axes.yaxis.set_visible(False) # Assign coordinates of each possible uav location on figure self.location_coord = [ 0.5 + self.LOCATION_WIDTH / 2 + (self.dist_between_locations) * i for i in range(UAVLocation.SIZE - 1) ] self.location_coord.append(crashLocationX + self.LOCATION_WIDTH / 2) # Create rectangular patches at each of those locations self.location_rect_vis = [ mpatches.Rectangle( [0.5 + (self.dist_between_locations) * i, 0], self.LOCATION_WIDTH, self.NUM_UAV * 2, fc="w", ) for i in range(UAVLocation.SIZE - 1) ] self.location_rect_vis.append( mpatches.Rectangle([crashLocationX, 0], self.LOCATION_WIDTH, self.NUM_UAV * 2, fc="w")) [ self.subplot_axes.add_patch(self.location_rect_vis[i]) for i in range(4) ] self.comms_line = [ lines.Line2D( [ 0.5 + self.LOCATION_WIDTH + (self.dist_between_locations) * i, 0.5 + self.LOCATION_WIDTH + (self.dist_between_locations) * i + self.RECT_GAP, ], [self.NUM_UAV * 0.5 + 0.5, self.NUM_UAV * 0.5 + 0.5], linewidth=3, color="black", visible=False, ) for i in range(UAVLocation.SIZE - 2) ] self.comms_line.append( lines.Line2D( [ 0.5 + self.LOCATION_WIDTH + (self.dist_between_locations) * 2, crashLocationX, ], [self.NUM_UAV * 0.5 + 0.5, self.NUM_UAV * 0.5 + 0.5], linewidth=3, color="black", visible=False, )) # Create location text below rectangles locText = ["Base", "Refuel", "Communication", "Surveillance"] self.location_rect_txt = [ plt.text( 0.5 + self.dist_between_locations * i + 0.5 * self.LOCATION_WIDTH, -0.3, locText[i], ha="center", ) for i in range(UAVLocation.SIZE - 1) ] self.location_rect_txt.append( plt.text( crashLocationX + 0.5 * self.LOCATION_WIDTH, -0.3, locText[UAVLocation.SIZE - 1], ha="center", )) # Initialize list of circle objects uav_x = self.location_coord[UAVLocation.BASE] # Update the member variables storing all the figure objects self.uav_circ_vis = [ mpatches.Circle((uav_x, 1 + uav_id), self.UAV_RADIUS, fc="w") for uav_id in range(0, self.NUM_UAV) ] self.uav_text_vis = [None for uav_id in range(0, self.NUM_UAV)] # f**k self.uav_sensor_vis = [ mpatches.Wedge((uav_x + self.SENSOR_REL_X, 1 + uav_id), self.SENSOR_LENGTH, -30, 30) for uav_id in range(0, self.NUM_UAV) ] self.uav_actuator_vis = [ mpatches.Wedge((uav_x, 1 + uav_id + self.ACTUATOR_REL_Y), self.ACTUATOR_HEIGHT, 60, 120) for uav_id in range(0, self.NUM_UAV) ] # For each UAV: # Draw a circle, with text inside = amt fuel remaining # Triangle on top of UAV for comms, black = good, red = bad # Triangle in front of UAV for surveillance sStruct = self.state2Struct(s) for uav_id in range(0, self.NUM_UAV): # Assign all the variables corresponding to this UAV for this iteration; # this could alternately be done with a UAV class whose objects keep track # of these variables. Elect to use lists here since ultimately the state # must be a vector anyway. # State index corresponding to the location of this uav uav_location = sStruct.locations[uav_id] uav_fuel = sStruct.fuel[uav_id] uav_sensor = sStruct.sensor[uav_id] uav_actuator = sStruct.actuator[uav_id] # Assign coordinates on figure where UAV should be drawn uav_x = self.location_coord[uav_location] uav_y = 1 + uav_id # Update plot wit this UAV self.uav_circ_vis[uav_id] = mpatches.Circle((uav_x, uav_y), self.UAV_RADIUS, fc="w") self.uav_text_vis[uav_id] = plt.text(uav_x - 0.05, uav_y - 0.05, uav_fuel) if uav_sensor == SensorState.RUNNING: objColor = "black" else: objColor = "red" self.uav_sensor_vis[uav_id] = mpatches.Wedge( (uav_x + self.SENSOR_REL_X, uav_y), self.SENSOR_LENGTH, -30, 30, color=objColor, ) if uav_actuator == ActuatorState.RUNNING: objColor = "black" else: objColor = "red" self.uav_actuator_vis[uav_id] = mpatches.Wedge( (uav_x, uav_y + self.ACTUATOR_REL_Y), self.ACTUATOR_HEIGHT, 60, 120, color=objColor, ) self.subplot_axes.add_patch(self.uav_circ_vis[uav_id]) self.subplot_axes.add_patch(self.uav_sensor_vis[uav_id]) self.subplot_axes.add_patch(self.uav_actuator_vis[uav_id]) numHealthySurveil = np.sum( np.logical_and(sStruct.locations == UAVLocation.SURVEIL, sStruct.sensor)) # We have comms coverage: draw a line between comms states to show this if any(sStruct.locations == UAVLocation.COMMS): for i in range(len(self.comms_line)): self.comms_line[i].set_visible(True) self.comms_line[i].set_color("black") self.subplot_axes.add_line(self.comms_line[i]) # We also have UAVs in surveillance; color the comms line black if numHealthySurveil > 0: self.location_rect_vis[len(self.location_rect_vis) - 1].set_color("green") plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events() sleep(0.5)
def batch_discover(self, td_errors, phi, states): """ Discovers features using iFDD in batch setting. self.batch_threshold is the minimum relevance value for the feature to be expanded. :param td_errors: p-by-1 (How much error observed for each sample) :param phi: n-by-p features corresponding to all samples (each column corresponds to one sample). """ maxDiscovery = self.max_batch_discovery n = self.features_num # number of features p = len(td_errors) # Number of samples counts = np.zeros((n, n)) relevances = np.zeros((n, n)) for i in range(p): phiphiT = np.outer(phi[i, :], phi[i, :]) if self.iFDDPlus: relevances += phiphiT * td_errors[i] else: relevances += phiphiT * abs(td_errors[i]) counts += phiphiT # Remove Diagonal and upper part of the relevances as they are useless relevances = np.triu(relevances, 1) non_zero_index = np.nonzero(relevances) if self.iFDDPlus: # Calculate relevances based on theoretical results of ICML 2013 # potential submission relevances[non_zero_index] = np.divide( np.abs(relevances[non_zero_index]), np.sqrt(counts[non_zero_index]) ) else: # Based on Geramifard11_ICML Paper relevances[non_zero_index] = relevances[non_zero_index] # Find indexes to non-zero excited pairs # F1 and F2 are the parents of the potentials (F1, F2) = relevances.nonzero() relevances = relevances[F1, F2] if len(relevances) == 0: # No feature to add self.logger.debug("iFDD Batch: Max Relevance = 0") return False if self.debug: e_vec = relevances.flatten() e_vec = e_vec[e_vec != 0] e_vec = np.sort(e_vec) plt.ioff() plt.plot(e_vec, linewidth=3) plt.show() # Sort based on relevances # We want high to low hence the reverse: [::-1] sortedIndices = np.argsort(relevances)[::-1] max_relevance = relevances[sortedIndices[0]] # Add top <maxDiscovery> features self.logger.debug("iFDD Batch: Max Relevance = {0:g}".format(max_relevance)) added_feature = False new_features = 0 for j in range(len(relevances)): if new_features >= maxDiscovery: break max_index = sortedIndices[j] f1 = F1[max_index] f2 = F2[max_index] relevance = relevances[max_index] if relevance > self.batch_threshold: # print "Inspecting", # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2) if self.inspectPair(f1, f2, np.inf): self.logger.debug( "New Feature %d: %s, Relevance = %0.3f" % ( self.features_num - 1, self.getStrFeatureSet(self.features_num - 1), relevances[max_index], ) ) new_features += 1 added_feature = True else: # Because the list is sorted, there is no use to look at the # others break # A signal to see if the representation has been expanded or not return added_feature
def show_domain(self, a=0): s = self.state plt.figure("Domain") if self.networkGraph is None: # or self.networkPos is None: self.networkGraph = nx.Graph() # enumerate all computer_ids, simulatenously iterating through # neighbors list and compstatus for computer_id, (neighbors, compstatus) in enumerate(zip(self.NEIGHBORS, s)): # Add a node to network for each computer self.networkGraph.add_node(computer_id, node_color="w") for uniqueEdge in self.UNIQUE_EDGES: self.networkGraph.add_edge( uniqueEdge[0], uniqueEdge[1], edge_color="k") # Add an edge between each neighbor self.networkPos = nx.circular_layout(self.networkGraph) nx.draw_networkx_nodes(self.networkGraph, self.networkPos, node_color="w") nx.draw_networkx_edges(self.networkGraph, self.networkPos, edge_color="k") nx.draw_networkx_labels(self.networkGraph, self.networkPos) plt.show() else: plt.clf() blackEdges = [] redEdges = [] greenNodes = [] redNodes = [] for computer_id, (neighbors, compstatus) in enumerate(zip(self.NEIGHBORS, s)): if compstatus == self.RUNNING: greenNodes.append(computer_id) else: redNodes.append(computer_id) # Iterate through all unique edges for uniqueEdge in self.UNIQUE_EDGES: if (s[uniqueEdge[0]] == self.RUNNING and s[uniqueEdge[1]] == self.RUNNING): # Then both computers are working blackEdges.append(uniqueEdge) else: # If either computer is BROKEN, make the edge red redEdges.append(uniqueEdge) # "if redNodes", etc. - only draw things in the network if these lists aren't empty / null if redNodes: nx.draw_networkx_nodes( self.networkGraph, self.networkPos, nodelist=redNodes, node_color="r", linewidths=2, ) if greenNodes: nx.draw_networkx_nodes( self.networkGraph, self.networkPos, nodelist=greenNodes, node_color="w", linewidths=2, ) if blackEdges: nx.draw_networkx_edges( self.networkGraph, self.networkPos, edgelist=blackEdges, edge_color="k", width=2, style="solid", ) if redEdges: nx.draw_networkx_edges( self.networkGraph, self.networkPos, edgelist=redEdges, edge_color="k", width=2, style="dotted", ) nx.draw_networkx_labels(self.networkGraph, self.networkPos) plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()