def main(self): x_field = self.fields_by_key('x')[0] y_field = self.fields_by_key('y')[0] x = np.array(self.slice_data(x_field,int)) y = np.array(self.slice_data(y_field,int)) n = len(x) render = StringIO.StringIO() ############################################################################### # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() y_ = ir.fit_transform(x, y) lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression ############################################################################### # plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(0.5 * np.ones(n)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'g.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.savefig(render,format='png') return render
def plot_MDS(): """Plots the difference matrix with Multi-Dimensional Scaling""" diff_matrix = fast_generate_diff_matrix() X_true = diff_matrix similarities = euclidean_distances(diff_matrix) seed = 1 mds = manifold.MDS(n_components=1, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed", n_jobs=1) pos = mds.fit(similarities).embedding_ # nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12, # dissimilarity="precomputed", random_state=2, n_jobs=1, # n_init=1) # npos = nmds.fit_transform(similarities, init=pos) # Rescale the data pos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((pos ** 2).sum()) # npos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((npos ** 2).sum()) # Rotate the data clf = PCA(n_components=2) X_true = clf.fit_transform(X_true) pos = clf.fit_transform(pos) # # npos = clf.fit_transform(npos) fig = plt.figure(1) ax = plt.axes([0., 0., 1., 1.]) plt.scatter(X_true[:, 0], X_true[:, 1], c='r', s=20) # plt.scatter(pos[:, 0], pos[:, 1], s=20, c='g') # plt.scatter(npos[:, 0], npos[:, 1], s=20, c='b') plt.legend(('True position'), loc='best') similarities = similarities.max() / similarities * 100 similarities[np.isinf(similarities)] = 0 # Plot the edges start_idx, end_idx = np.where(pos) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[X_true[i, :], X_true[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = np.abs(similarities) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, values.max())) lc.set_array(similarities.flatten()) lc.set_linewidths(0.5 * np.ones(len(segments))) ax.add_collection(lc) plt.show()
def visualize(reader, visualization_method, value_column, segment_column): labels, data = organize_data(reader, visualization_method, value_column, segment_column) if visualization_method == 'hc': link = linkage(data) dendrogram(link, leaf_label_func=lambda i: labels[i]) plt.gcf() plt.show() if visualization_method == 'mds': n = len(labels) data -= data.mean() clf = PCA(n_components=2) data = clf.fit_transform(data) similarities = euclidean_distances(data) # Add noise to the similarities noise = np.random.rand(n, n) noise = noise + noise.T noise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0 similarities += noise fig = plt.figure(1) ax = plt.axes([0., 0., 1., 1.]) similarities = similarities.max() / similarities * 100 similarities[np.isinf(similarities)] = 0 plt.scatter(data[:, 0], data[:, 1], c='r', s=20) plt.legend('Position', loc='best') start_idx, end_idx = np.where(data) segments = [[data[i, :], data[j, :]] for i in range(len(data)) for j in range(len(data))] values = np.abs(similarities) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, values.max())) lc.set_array(similarities.flatten()) lc.set_linewidths(0.5 * np.ones(len(segments))) ax.add_collection(lc) for label, x, y in zip(labels, data[:, 0], data[:, 1]): plt.annotate( label, xy = (x, y), xytext = (-20, 20), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) plt.show()
def plotRegression(x, y, y_, lr): segements = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segements, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(0.5 * np.ones(n)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'g.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.show()
class Tracks(object): def __init__(self, ax, tails=None): self.tracks = None self.tails = tails self.initialize_lines(ax) @staticmethod def create_trackmap(stormdata): trackmap = [] for trackid in range(np.max(stormdata['track_id']) + 1): indexes = np.where(stormdata['track_id'] == trackid)[0] # Makes sure the track segments are in chronological order indexes = indexes[np.argsort(stormdata['frame_index'][indexes])] trackmap.append(indexes) return trackmap def remove_lines(self): if self.tracks is not None: self.tracks.remove() self.tracks = None def initialize_lines(self, ax): self.remove_lines() self.tracks = LineCollection([]) ax.add_collection(self.tracks) def update_lines(self, frame_index, stormdata): segments = [] for indexes in self.create_trackmap(stormdata): trackdata = stormdata[indexes] trackdata = trackdata[trackdata['frame_index'] <= frame_index] if self.tails: mask = trackdata['frame_index'] >= (frame_index - self.tails) trackdata = trackdata[mask] # There must always be something in a track, even it it is NaNs. segments.append(zip(trackdata['xcent'], trackdata['ycent']) or [(np.nan, np.nan)]) self.tracks.set_segments(segments) def lolite_line(self, indx): self.hilite_line(indx, 1) def hilite_line(self, indx, lw=4): if indx is not None: lws = self.tracks.get_linewidths() lws[indx] = lw self.tracks.set_linewidths(lws)
def multidimensional_scaling(rdm, labels): # perform multidimensional scaling mds = MDS( n_components=2, max_iter=3000, dissimilarity='precomputed' ) positions = mds.fit(rdm).embedding_ positions /= positions.max() # visualize the embedding in a figure figure = plt.figure(1) ax = plt.axes([0., 0., 1., 1.]) plt.scatter(positions[:, 0], positions[:, 1]) # plot the edges segments = [[positions[i, :], positions[j, :]] for i in range(len(positions)) for j in range(len(positions))] values = np.abs(rdm) lc = LineCollection( segments, zorder=0, cmap=plt.cm.YlGnBu, norm=plt.Normalize(0, values.max()) ) lc.set_array(rdm.flatten()) lc.set_linewidths(2 * np.ones(len(segments))) ax.add_collection(lc) # add labels for index, label in enumerate(labels): plt.annotate(label, (positions[index, 0], positions[index, 1])) plt.show()
class HoughDemo(ImageProcessDemo): TITLE = u"Hough Demo" DEFAULT_IMAGE = "stuff.jpg" SETTINGS = ["th2", "show_canny", "rho", "theta", "hough_th", "minlen", "maxgap", "dp", "mindist", "param2", "min_radius", "max_radius", "blur_sigma", "linewidth", "alpha", "check_line", "check_circle"] check_line = Bool(True) check_circle = Bool(True) #Gaussian blur parameters blur_sigma = Range(0.1, 5.0, 2.0) show_blur = Bool(False) # Canny parameters th2 = Range(0.0, 255.0, 200.0) show_canny = Bool(False) # HoughLine parameters rho = Range(1.0, 10.0, 1.0) theta = Range(0.1, 5.0, 1.0) hough_th = Range(1, 100, 40) minlen = Range(0, 100, 10) maxgap = Range(0, 20, 10) # HoughtCircle parameters dp = Range(1.0, 5.0, 1.9) mindist = Range(1.0, 100.0, 50.0) param2 = Range(5, 100, 50) min_radius = Range(5, 100, 20) max_radius = Range(10, 100, 70) # draw parameters linewidth = Range(1.0, 3.0, 1.0) alpha = Range(0.0, 1.0, 0.6) def control_panel(self): return VGroup( Group( Item("blur_sigma", label=u"标准方差"), Item("show_blur", label=u"显示结果"), label=u"高斯模糊参数" ), Group( Item("th2", label=u"阈值2"), Item("show_canny", label=u"显示结果"), label=u"边缘检测参数" ), Group( Item("rho", label=u"偏移分辨率(像素)"), Item("theta", label=u"角度分辨率(角度)"), Item("hough_th", label=u"阈值"), Item("minlen", label=u"最小长度"), Item("maxgap", label=u"最大空隙"), label=u"直线检测" ), Group( Item("dp", label=u"分辨率(像素)"), Item("mindist", label=u"圆心最小距离(像素)"), Item("param2", label=u"圆心检查阈值"), Item("min_radius", label=u"最小半径"), Item("max_radius", label=u"最大半径"), label=u"圆检测" ), Group( Item("linewidth", label=u"线宽"), Item("alpha", label=u"alpha"), HGroup( Item("check_line", label=u"直线"), Item("check_circle", label=u"圆"), ), label=u"绘图参数" ) ) def __init__(self, **kwargs): super(HoughDemo, self).__init__(**kwargs) self.connect_dirty("th2, show_canny, show_blur, rho, theta, hough_th," "min_radius, max_radius, blur_sigma," "minlen, maxgap, dp, mindist, param2, " "linewidth, alpha, check_line, check_circle") self.lines = LineCollection([], linewidths=2, alpha=0.6) self.axe.add_collection(self.lines) self.circles = EllipseCollection( [], [], [], units="xy", facecolors="none", edgecolors="red", linewidths=2, alpha=0.6, transOffset=self.axe.transData) self.axe.add_collection(self.circles) def _img_changed(self): self.img_gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) def draw(self): img_smooth = cv2.GaussianBlur(self.img_gray, (0, 0), self.blur_sigma, self.blur_sigma) img_edge = cv2.Canny(img_smooth, self.th2 * 0.5, self.th2) if self.show_blur and self.show_canny: show_img = cv2.cvtColor(np.maximum(img_smooth, img_edge), cv2.COLOR_BAYER_BG2BGR) elif self.show_blur: show_img = cv2.cvtColor(img_smooth, cv2.COLOR_BAYER_BG2BGR) elif self.show_canny: show_img = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2BGR) else: show_img = self.img if self.check_line: theta = self.theta / 180.0 * np.pi lines = cv2.HoughLinesP(img_edge, self.rho, theta, self.hough_th, minLineLength=self.minlen, maxLineGap=self.maxgap) if lines is not None: lines = lines[0] lines.shape = -1, 2, 2 self.lines.set_segments(lines) self.lines.set_visible(True) else: self.lines.set_visible(False) else: self.lines.set_visible(False) if self.check_circle: circles = cv2.HoughCircles(img_smooth, 3, self.dp, self.mindist, param1=self.th2, param2=self.param2, minRadius=self.min_radius, maxRadius=self.max_radius) if circles is not None: circles = circles[0] self.circles._heights = self.circles._widths = circles[:, 2] self.circles.set_offsets(circles[:, :2]) self.circles._angles = np.zeros(len(circles)) self.circles._transOffset = self.axe.transData self.circles.set_visible(True) else: self.circles.set_visible(False) else: self.circles.set_visible(False) self.lines.set_linewidths(self.linewidth) self.circles.set_linewidths(self.linewidth) self.lines.set_alpha(self.alpha) self.circles.set_alpha(self.alpha) self.draw_image(show_img)
def getStockMarketStructure(symbol_dict): # Choose a time period reasonnably calm (not too long ago so that we get # high-tech firms, and before the 2008 crash) d1 = datetime.datetime(2009, 1, 1) d2 = datetime.datetime(2011, 1, 1) #d1 = datetime.datetime.now() - timedelta(days=365*2) #d2 = datetime.datetime.now()- timedelta(days=1) # kraft symbol has now changed from KFT to MDLZ in yahoo symbols, names = np.array(list(symbol_dict.items())).T quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True) for symbol in symbols] open = np.array([q.open for q in quotes]).astype(np.float) close = np.array([q.close for q in quotes]).astype(np.float) # The daily variations of the quotes are what carry most information variation = close - open ############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T ############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(),) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) #plt.show() filename_1 = id_generator()+'.svg' plt.savefig(filename_1) return filename_1
def dendrogram(self, w=12, h=17, colors=10, color_labels=True, weight_nodes=True, annotate=True): """ Draws dendrogram :colors: Approx. no of color clusters in figure. """ self.labels = self.parse_topic_labels('labels') self.colors = colors fig = plt.figure(figsize=(w, h)) #plt.title("Topic Dendrogram") plt.xlabel("Distance") #plt.ylabel("Topic") R = hierarchymod.dendrogram( self.Z, orientation='right', #labels=labelList, distance_sort='descending', show_leaf_counts=False, no_plot=False, leaf_label_func=self._labelpicker, #color_threshold=2.0*np.max(self.Z[:,2]) link_color_func=self._colorpicker) self.ax = plt.gca() if weight_nodes: self.get_node_weights() #assumes orientation left or right self.lines = [] for (xline, yline) in zip(R['dcoord'], R['icoord']): coords = list(zip(xline, yline)) self.lines.append(coords) for i, line in enumerate(self.lines): coord_array = np.array(line, dtype=float) line.append(coord_array) line.append(R['i_list'][i]) i_dict = {} new_colls = [] num_colls = len(self.ax.collections) for i, c in enumerate(self.ax.collections): i_dict[i] = [] segments = [] widths = [] color = c.get_color() for j, p in enumerate(c.get_paths()): for line in self.lines: if np.equal(line[4], p.vertices).all(): i_dict[i].append(line[5]) s, w = self.segment_path(p.vertices, line[5]) segments.extend(s) widths.extend(w) coll = LineCollection(segments) coll.set_color(color) coll.set_linewidths(widths) new_colls.append(coll) # replace old line collections for c in new_colls: self.ax.add_collection(c) self.ax.collections = self.ax.collections[num_colls:] if color_labels: self.cluster_idxs = {} for c, pi in zip(R['color_list'], R['icoord']): for leg in pi[1:3]: i = (leg - 5.0) / 10.0 if abs(i - int(i)) < 1e-5: self.cluster_idxs[int(i)] = c ylbls = self.ax.get_ymajorticklabels() for c, y in enumerate(ylbls): y.set_color(self.cluster_idxs[c]) #tempfix self.ax.get_ymajorticklabels()[11].set_color(self.cluster_idxs[12]) self.ax.set_xlim(left=0.6) if annotate: #self.ax.annotate("Fiscal policy", (1.08, 20)) self.ax.annotate("Fiscal policy\nand corporate finance", (1.12, 43)) self.ax.annotate("Financial markets", (1.13, 133)) #self.ax.annotate("Politics, domestic", (0.94,179)) self.ax.annotate("Entertainment", (1.14, 225)) self.ax.annotate("Labor market, career \nand organization", (1.03, 295)) self.ax.annotate("Politics", (1.085, 383)) #self.ax.annotate("Crime", (0.925,405)) self.ax.annotate("Sports", (1.05, 460)) self.ax.annotate("US & UK", (1.0, 600)) self.ax.annotate("Industry and trade", (1.08, 770)) #self.ax.annotate("Environment", (1.01, 777)) plt.tight_layout() fig.savefig(os.path.join(params().paths['lda'], 'dendrogram' + str(self.num_topics) + '.pdf'), dpi=300) fig.savefig(os.path.join(params().paths['lda'], 'dendrogram' + str(self.num_topics) + '.png'), dpi=300) plt.show() return fig, self.ax, R
class SunPlotPy(wx.Frame, Spatial, Grid ): """ The main frame of the application """ title = 'sunplot(py)' # Plotting options autoclim=True showedges=False bgcolor='k' textcolor='w' cmap='RdBu' particlesize = 1.8 particlecolor = 'm' # other flags collectiontype='cells' oldcollectiontype='cells' # tindex=0 depthlevs = [0., 10., 100., 200., 300., 400., 500.,\ 1000.,2000.,3000.,4000.,5000] _FillValue=999999 def __init__(self): wx.Frame.__init__(self, None, -1, self.title) self.create_menu() self.create_status_bar() self.create_main_panel() #self.draw_figure() def create_menu(self): self.menubar = wx.MenuBar() ### # File Menu ### menu_file = wx.Menu() # Load a hydro output file m_expt = menu_file.Append(-1, "&Open file\tCtrl-O", "Open netcdf file") self.Bind(wx.EVT_MENU, self.on_open_file, m_expt) # Load a grid file m_grid = menu_file.Append(-1, "&Load grid\tCtrl-G", "Load SUNTANS grid from folder") self.Bind(wx.EVT_MENU, self.on_load_grid, m_grid) # Load a particle file m_part = menu_file.Append(-1, "&Load PTM file\tCtrl-Shift-P", "Load a PTM file") self.Bind(wx.EVT_MENU, self.on_load_ptm, m_part) # Save current scene as an animation m_anim = menu_file.Append(-1,"&Save animation of current scene\tCtrl-S","Save animation") self.Bind(wx.EVT_MENU, self.on_save_anim, m_anim) # Save the current figure m_prin = menu_file.Append(-1,"&Print current scene\tCtrl-P","Save figure") self.Bind(wx.EVT_MENU, self.on_save_fig, m_prin) menu_file.AppendSeparator() # Exit m_exit = menu_file.Append(-1, "E&xit\tCtrl-X", "Exit") self.Bind(wx.EVT_MENU, self.on_exit, m_exit) ### # Tools menu ### menu_tools = wx.Menu() m_gridstat = menu_tools.Append(-1, "&Plot grid size statistics", "SUNTANS grid size") self.Bind(wx.EVT_MENU, self.on_plot_gridstat, m_gridstat) m_countcells = menu_tools.Append(-1, "&Count # grid cells", "Grid cell count") self.Bind(wx.EVT_MENU, self.on_count_cells, m_countcells) m_overlaybathy = menu_tools.Append(-1, "&Overlay depth contours", "Depth overlay") self.Bind(wx.EVT_MENU, self.on_overlay_bathy, m_overlaybathy) ### # Help Menu ### menu_help = wx.Menu() m_about = menu_help.Append(-1, "&About\tF1", "About the demo") self.Bind(wx.EVT_MENU, self.on_about, m_about) # Add all of the menu bars self.menubar.Append(menu_file, "&File") self.menubar.Append(menu_tools, "&Tools") self.menubar.Append(menu_help, "&Help") self.SetMenuBar(self.menubar) def create_main_panel(self): """ Creates the main panel with all the controls on it: * mpl canvas * mpl navigation toolbar * Control panel for interaction """ self.panel = wx.Panel(self) # Create the mpl Figure and FigCanvas objects. # 5x4 inches, 100 dots-per-inch # self.dpi = 100 #self.fig = Figure((7.0, 6.0), dpi=self.dpi,facecolor=self.bgcolor) self.fig = Figure((7.0, 6.0), dpi=self.dpi) self.canvas = FigCanvas(self.panel, -1, self.fig) # Since we have only one plot, we can use add_axes # instead of add_subplot, but then the subplot # configuration tool in the navigation toolbar wouldn't # work. # self.axes = self.fig.add_subplot(111) #SetAxColor(self.axes,self.textcolor,self.bgcolor) # Bind the 'pick' event for clicking on one of the bars # #self.canvas.mpl_connect('pick_event', self.on_pick) ######## # Create widgets ######## self.variable_list = wx.ComboBox( self.panel, size=(200,-1), choices=['Select a variable...'], style=wx.CB_READONLY) self.variable_list.Bind(wx.EVT_COMBOBOX, self.on_select_variable) self.time_list = wx.ComboBox( self.panel, size=(200,-1), choices=['Select a time step...'], style=wx.CB_READONLY) self.time_list.Bind(wx.EVT_COMBOBOX, self.on_select_time) self.depthlayer_list = wx.ComboBox( self.panel, size=(200,-1), choices=['Select a vertical layer...'], style=wx.CB_READONLY) self.depthlayer_list.Bind(wx.EVT_COMBOBOX, self.on_select_depth) self.show_edge_check = wx.CheckBox(self.panel, -1, "Show Edges", style=wx.ALIGN_RIGHT) self.show_edge_check.Bind(wx.EVT_CHECKBOX, self.on_show_edges) if USECMOCEAN: cmaps=[] for cmap in cm.cmapnames: cmaps.append(cmap) cmaps.append(cmap+'_r') # Add all reverse map options else: # Use matplotlib standard cmaps = matplotlib.cm.datad.keys() cmaps.sort() self.colormap_list = wx.ComboBox( self.panel, size=(100,-1), choices=cmaps, style=wx.CB_READONLY) self.colormap_list.Bind(wx.EVT_COMBOBOX, self.on_select_cmap) self.colormap_label = wx.StaticText(self.panel, -1,"Colormap:") self.clim_check = wx.CheckBox(self.panel, -1, "Manual color limits ", style=wx.ALIGN_RIGHT) self.clim_check.Bind(wx.EVT_CHECKBOX, self.on_clim_check) self.climlow = wx.TextCtrl( self.panel, size=(100,-1), style=wx.TE_PROCESS_ENTER) self.climlow.Bind(wx.EVT_TEXT_ENTER, self.on_climlow) self.climhigh = wx.TextCtrl( self.panel, size=(100,-1), style=wx.TE_PROCESS_ENTER) self.climhigh.Bind(wx.EVT_TEXT_ENTER, self.on_climhigh) # Labels self.variable_label = wx.StaticText(self.panel, -1,"Variable:",size=(200,-1)) self.time_label = wx.StaticText(self.panel, -1,"Time step:",size=(200,-1)) self.depth_label = wx.StaticText(self.panel, -1,"Vertical level:",size=(200,-1)) # Create the navigation toolbar, tied to the canvas # self.toolbar = NavigationToolbar(self.canvas) #self.toolbar.toolitems[8][3]='my_save_fig' #def my_save_fig(self,*args): # print 'saving figure' # return "break" ######### # Layout with box sizers ######### self.vbox = wx.BoxSizer(wx.VERTICAL) self.vbox.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW) self.vbox.Add(self.toolbar, 0, wx.EXPAND) self.vbox.AddSpacer(10) #self.vbox.Add((-1,25)) flags = wx.ALIGN_LEFT | wx.ALL | wx.ALIGN_CENTER_VERTICAL self.hbox0 = wx.BoxSizer(wx.HORIZONTAL) self.hbox0.Add(self.show_edge_check, 0, border=10, flag=flags) self.hbox0.Add(self.colormap_label, 0, border=10, flag=flags) self.hbox0.Add(self.colormap_list, 0, border=10, flag=flags) self.hbox0.Add(self.clim_check, 0, border=10, flag=flags) self.hbox0.Add(self.climlow, 0, border=10, flag=flags) self.hbox0.Add(self.climhigh, 0, border=10, flag=flags) self.vbox.AddSpacer(5) self.hbox1 = wx.BoxSizer(wx.HORIZONTAL) self.hbox1.Add(self.variable_label, 0, border=10, flag=flags) self.hbox1.Add(self.time_label, 0, border=10, flag=flags) self.hbox1.Add(self.depth_label, 0, border=10, flag=flags) self.vbox.AddSpacer(5) self.hbox2 = wx.BoxSizer(wx.HORIZONTAL) self.hbox2.Add(self.variable_list, 0, border=10, flag=flags) self.hbox2.Add(self.time_list, 0, border=10, flag=flags) self.hbox2.Add(self.depthlayer_list, 0, border=10, flag=flags) self.vbox.Add(self.hbox1, 0, flag = wx.ALIGN_LEFT | wx.TOP) self.vbox.Add(self.hbox2, 0, flag = wx.ALIGN_LEFT | wx.TOP) self.vbox.Add(self.hbox0, 0, flag = wx.ALIGN_LEFT | wx.TOP) self.panel.SetSizer(self.vbox) self.vbox.Fit(self) ########## # Event functions ########## def create_figure(self): """ Creates the figure """ # Find the colorbar limits if unspecified if self.autoclim: self.clim = [self.data.min(),self.data.max()] self.climlow.SetValue('%3.1f'%self.clim[0]) self.climhigh.SetValue('%3.1f'%self.clim[1]) if self.__dict__.has_key('collection'): #self.collection.remove() self.axes.collections.remove(self.collection) else: # First call - set the axes limits self.axes.set_aspect('equal') self.axes.set_xlim(self.xlims) self.axes.set_ylim(self.ylims) if self.collectiontype=='cells': self.collection = PolyCollection(self.xy,cmap=self.cmap) self.collection.set_array(np.array(self.data[:])) if not self.showedges: self.collection.set_edgecolors(self.collection.to_rgba(np.array((self.data[:])))) elif self.collectiontype=='edges': xylines = [self.xp[self.edges],self.yp[self.edges]] linesc = [zip(xylines[0][ii,:],xylines[1][ii,:]) for ii in range(self.Ne)] self.collection = LineCollection(linesc,array=np.array(self.data[:]),cmap=self.cmap) self.collection.set_clim(vmin=self.clim[0],vmax=self.clim[1]) self.axes.add_collection(self.collection) self.title=self.axes.set_title(self.genTitle(),color=self.textcolor) self.axes.set_xlabel('Easting [m]') self.axes.set_ylabel('Northing [m]') # create a colorbar if not self.__dict__.has_key('cbar'): self.cbar = self.fig.colorbar(self.collection) #SetAxColor(self.cbar.ax.axes,self.textcolor,self.bgcolor) else: #pass print 'Updating colorbar...' #self.cbar.check_update(self.collection) self.cbar.on_mappable_changed(self.collection) self.canvas.draw() def update_figure(self): if self.autoclim: self.clim = [self.data.min(),self.data.max()] self.climlow.SetValue('%3.1f'%self.clim[0]) self.climhigh.SetValue('%3.1f'%self.clim[1]) else: self.clim = [float(self.climlow.GetValue()),\ float(self.climhigh.GetValue())] # check whether it is cell or edge type if self.hasDim(self.variable,self.griddims['Ne']): self.collectiontype='edges' elif self.hasDim(self.variable,self.griddims['Nc']): self.collectiontype='cells' # Create a new figure if the variable has gone from cell to edge of vice # versa if not self.collectiontype==self.oldcollectiontype: self.create_figure() self.oldcollectiontype=self.collectiontype self.collection.set_array(np.array(self.data[:])) self.collection.set_clim(vmin=self.clim[0],vmax=self.clim[1]) # Cells only if self.collectiontype=='cells': if not self.showedges: self.collection.set_edgecolors(self.collection.to_rgba(np.array((self.data[:])))) else: self.collection.set_edgecolors('k') self.collection.set_linewidths(0.2) # Update the title self.title=self.axes.set_title(self.genTitle(),color=self.textcolor) #Update the colorbar self.cbar.update_normal(self.collection) # redraw the figure self.canvas.draw() def on_pick(self, event): # The event received here is of the type # matplotlib.backend_bases.PickEvent # # It carries lots of information, of which we're using # only a small amount here. # box_points = event.artist.get_bbox().get_points() msg = "You've clicked on a bar with coords:\n %s" % box_points dlg = wx.MessageDialog( self, msg, "Click!", wx.OK | wx.ICON_INFORMATION) dlg.ShowModal() dlg.Destroy() def on_select_variable(self, event): vname = event.GetString() self.flash_status_message("Selecting variable: %s"%vname) # update the spatial object and load the data self.variable = vname self.loadData(variable=self.variable) # Check if the variable has a depth coordinate depthstr = [''] # If so populate the vertical layer box if self.hasDim(self.variable,self.griddims['Nk']): depthstr = ['%3.1f'%self.z_r[k] for k in range(self.Nkmax)] depthstr += ['surface','seabed'] elif self.hasDim(self.variable,'Nkw'): depthstr = ['%3.1f'%self.z_w[k] for k in range(self.Nkmax+1)] self.depthlayer_list.SetItems(depthstr) # Update the plot self.update_figure() def on_select_time(self, event): self.tindex = event.GetSelection() # Update the object time index and reload the data if self.plot_type=='hydro': if not self.tstep==self.tindex: self.tstep=self.tindex self.loadData() self.flash_status_message("Selecting variable: %s..."%event.GetString()) # Update the plot self.update_figure() elif self.plot_type=='particles': self.PTM.plot(self.tindex,ax=self.axes,\ xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim()) self.canvas.draw() def on_select_depth(self, event): kindex = event.GetSelection() if not self.klayer[0]==kindex: # Check if its the seabed or surface value if kindex>=self.Nkmax: kindex=event.GetString() self.klayer = [kindex] self.loadData() self.flash_status_message("Selecting depth: %s..."%event.GetString()) # Update the plot self.update_figure() def on_open_file(self, event): file_choices = "SUNTANS NetCDF (*.nc)|*.nc*|UnTRIM NetCDF (*.nc)|*.nc*|All Files (*.*)|*.*" dlg = wx.FileDialog( self, message="Open SUNTANS file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style= wx.FD_MULTIPLE) if dlg.ShowModal() == wx.ID_OK: self.plot_type='hydro' path = dlg.GetPaths() # Initialise the class if dlg.GetFilterIndex() == 0 or dlg.GetFilterIndex() > 1: #SUNTANS self.flash_status_message("Opening SUNTANS file: %s" % path) try: Spatial.__init__(self, path, _FillValue=self._FillValue) except: Spatial.__init__(self, path, _FillValue=-999999) startvar='dv' if dlg.GetFilterIndex()==1: #UnTRIM self.flash_status_message("Opening UnTRIMS file: %s" % path) #Spatial.__init__(self,path,gridvars=untrim_gridvars,griddims=untrim_griddims) UNTRIMSpatial.__init__(self,path) startvar='Mesh2_face_depth' # Populate the drop down menus vnames = self.listCoordVars() self.variable_list.SetItems(vnames) # Update the time drop down list if self.__dict__.has_key('time'): self.timestr = [datetime.strftime(tt,'%d-%b-%Y %H:%M:%S') for tt in self.time] else: # Assume that it is a harmonic-type file self.timestr = self.nc.Constituent_Names.split() self.time_list.SetItems(self.timestr) # Draw the depth if startvar in vnames: self.variable=startvar self.loadData() self.create_figure() def on_load_grid(self, event): dlg = wx.DirDialog( self, message="Open SUNTANS grid from folder...", defaultPath=os.getcwd(), style= wx.DD_DEFAULT_STYLE) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() # Initialise the class self.flash_status_message("Opening SUNTANS grid from folder: %s" % path) Grid.__init__(self,path) # Plot the Grid if self.__dict__.has_key('collection'): self.axes.collections.remove(self.collection) self.axes,self.collection = self.plotmesh(ax=self.axes,edgecolors='y') # redraw the figure self.canvas.draw() def on_load_ptm(self, event): file_choices = "PTM NetCDF (*.nc)|*.nc|PTM Binary (*_bin.out)|*_bin.out|All Files (*.*)|*.*" dlg = wx.FileDialog( self, message="Open PTM file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style= wx.FD_MULTIPLE) if dlg.ShowModal() == wx.ID_OK: self.plot_type = 'particles' path = dlg.GetPath() # Initialise the class if dlg.GetFilterIndex() == 0: #SUNTANS self.flash_status_message("Opening PTM netcdf file: %s" % path) self.PTM = PtmNC(path) elif dlg.GetFilterIndex() == 1: #PTM self.flash_status_message("Opening PTM binary file: %s" % path) self.PTM = PtmBin(path) self.Nt = self.PTM.nt # Update the time drop down list self.timestr = [datetime.strftime(tt,'%d-%b-%Y %H:%M:%S') for tt in self.PTM.time] self.time_list.SetItems(self.timestr) # Plot the first time step if self.__dict__.has_key('xlims'): self.PTM.plot(self.PTM.nt-1,ax=self.axes,xlims=self.xlims,\ ylims=self.ylims,color=self.particlecolor,\ fontcolor='w',markersize=self.particlesize) else: self.PTM.plot(self.PTM.nt-1,ax=self.axes,fontcolor='w',\ color=self.particlecolor,markersize=self.particlesize) # redraw the figure self.canvas.draw() def on_show_edges(self,event): sender=event.GetEventObject() self.showedges = sender.GetValue() # Update the figure self.update_figure() def on_clim_check(self,event): sender=event.GetEventObject() if sender.GetValue() == True: self.autoclim=False self.update_figure() else: self.autoclim=True def on_climlow(self,event): self.clim[0] = event.GetString() #self.update_figure() def on_climhigh(self,event): self.clim[1] = event.GetString() #self.update_figure() def on_select_cmap(self,event): self.cmap=event.GetString() if USECMOCEAN: self.collection.set_cmap(getattr(cm,self.cmap)) else: self.collection.set_cmap(self.cmap) # Update the figure self.update_figure() def on_save_fig(self,event): """ Save a figure of the current scene to a file """ file_choices = " (*.png)|*.png| (*.pdf)|*.pdf |(*.jpg)|*.jpg |(*.eps)|*eps " filters=['.png','.pdf','.png','.png'] dlg = wx.FileDialog( self, message="Save figure to file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style= wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() ext = filters[dlg.GetFilterIndex()] if ext in path: outfile=path else: outfile = path+ext self.fig.savefig(outfile) def on_save_anim(self,event): """ Save an animation of the current scene to a file """ file_choices = "Quicktime (*.mov)|*.mov| (*.gif)|*.gif| (*.avi)|*.avi |(*.mp4)|*.mp4 " filters=['.mov','.gif','.avi','.mp4'] dlg = wx.FileDialog( self, message="Output animation file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style= wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() ext = filters[dlg.GetFilterIndex()] if ext in path: outfile=path else: outfile = path+ext self.flash_status_message("Saving figure to file: %s" %outfile) self.flash_status_message("Saving animation to file: %s" %outfile) # Create the animation #self.tstep = range(self.Nt) # Use all time steps for animation #self.animate(cbar=self.cbar,cmap=self.cmap,\ # xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim()) def initanim(): if not self.plot_type=='particles': return (self.title, self.collection) else: return (self.PTM.title,self.PTM.p_handle) def updateScalar(i): if not self.plot_type=='particles': self.tstep=[i] self.loadData() self.update_figure() return (self.title,self.collection) elif self.plot_type=='particles': self.PTM.plot(i,ax=self.axes,\ xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim()) return (self.PTM.title,self.PTM.p_handle) self.anim = animation.FuncAnimation(self.fig, \ updateScalar, init_func = initanim, frames=self.Nt, interval=50, blit=True) if ext=='.gif': self.anim.save(outfile,writer='imagemagick',fps=6) elif ext=='.mp4': print 'Saving html5 video...' # Ensures html5 compatibility self.anim.save(outfile,writer='mencoder',fps=6,\ bitrate=3600,extra_args=['-ovc','x264']) # mencoder options #bitrate=3600,extra_args=['-vcodec','libx264']) else: self.anim.save(outfile,writer='mencoder',fps=6,bitrate=3600) # Return the figure back to its status del self.anim self.tstep=self.tindex if not self.plot_type=='particles': self.loadData() self.update_figure() # Bring up a dialog box dlg2= wx.MessageDialog(self, 'Animation complete.', "Done", wx.OK) dlg2.ShowModal() dlg2.Destroy() def on_exit(self, event): self.Destroy() def on_about(self, event): msg = """ SUNTANS NetCDF visualization tool *Author: Matt Rayson *Institution: Stanford University *Created: October 2013 """ dlg = wx.MessageDialog(self, msg, "About", wx.OK) dlg.ShowModal() dlg.Destroy() def on_count_cells(self,eveny): msg = "Total 3-D grid cells = %d"%(self.count_cells()) dlg = wx.MessageDialog(self, msg, "No. cells", wx.OK) dlg.ShowModal() dlg.Destroy() def on_overlay_bathy(self,event): # Plot depth contours print 'Plotting contours...' self.contourf(z=self.dv, clevs=self.depthlevs,\ ax=self.axes,\ filled=False, colors='0.5', linewidths=0.5, zorder=1e6) print 'Done' def on_plot_gridstat(self, event): """ Plot the grid size histogram in a new figure """ matplotlib.pyplot.figure() self.plothist() matplotlib.pyplot.show() def create_status_bar(self): self.statusbar = self.CreateStatusBar() def flash_status_message(self, msg, flash_len_ms=1500): self.statusbar.SetStatusText(msg) self.timeroff = wx.Timer(self) self.Bind( wx.EVT_TIMER, self.on_flash_status_off, self.timeroff) self.timeroff.Start(flash_len_ms, oneShot=True) def on_flash_status_off(self, event): self.statusbar.SetStatusText('')
def plot_graph(settings=None, macro_data_z=None, negate_fields=None): symbols = np.array(settings['data_fieldnames']).T graph_data = macro_data_z[macro_data_z.index > settings['common_start_date'] ][settings['data_fields']].iloc[2:] if negate_fields is not None: graph_data[negate_fields] = -graph_data[negate_fields] graph_data = graph_data.rolling(window=3, center=False).sum() variation = graph_data.values.T ############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i]))) ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane from sklearn.decomposition import kernel_pca # node_position_model = manifold.LocallyLinearEmbedding( # n_components=2, eigen_solver='dense', n_neighbors=8) # node_position_model = KernelPCA(kernel='rbf', # fit_inverse_transform=True, # gamma=10, # n_components=2) node_position_model = manifold.SpectralEmbedding(n_components=2, n_neighbors=6) # node_position_model = PCA(n_components=2) embedding = node_position_model.fit_transform(X.T).T # embedding = components[[0, 1]].values.T f1 = 0 f2 = 1 ############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(12, 6)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) # plt.axis('off') # ax.set_axis_bgcolor('k') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[f1], embedding[f2], s=100 * d ** 2, c=labels, cmap=plt.cm.coolwarm) # Plot the edges start_idx, end_idx = np.where(non_zero) segments = [[embedding[[f1, f2], start], embedding[[f1, f2], stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.coolwarm, norm=plt.Normalize(0, .7 * np.sqrt(values.max()))) lc.set_array(np.sqrt(values)) lc.set_linewidths(15 * np.sqrt(values)) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels label_offset = 0.002 for index, (name, label, (f_1, f_2)) in enumerate( zip(symbols, labels, embedding.T)): if f1 == 0: x = f_1 if f1 == 1: x = f_2 if f2 == 0: y = f_1 if f2 == 1: y = f_2 dx = x - embedding[f1] dx[index] = 1 dy = y - embedding[f2] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x += label_offset else: horizontalalignment = 'right' x -= label_offset if this_dy > 0: verticalalignment = 'bottom' y += label_offset else: verticalalignment = 'top' y -= label_offset plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim(embedding[f1].min() - .15 * embedding[f1].ptp(), embedding[f1].max() + .10 * embedding[f1].ptp(),) plt.ylim(embedding[f2].min() - .03 * embedding[f2].ptp(), embedding[f2].max() + .03 * embedding[f2].ptp()) plt.show() plt.savefig('figures/macro_graph.png', facecolor='w', edgecolor='w', transparent=True)
c=labels, cmap=pl.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=pl.cm.hot_r, norm=pl.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002
def StockMarketOLD(): ############################################################################### # Retrieve the data from Internet # Choose a time period reasonnably calm (not too long ago so that we get # high-tech firms, and before the 2008 crash) d1 = datetime.datetime(2005, 1, 1) d2 = datetime.datetime(2009, 12, 31) # kraft symbol has now changed from KFT to MDLZ in yahoo symbol_dict = { 'TOT': 'Total', 'XOM': 'Exxon', 'CVX': 'Chevron', 'COP': 'ConocoPhillips', 'VLO': 'Valero Energy', 'MSFT': 'Microsoft', 'IBM': 'IBM', 'TWX': 'Time Warner', 'CMCSA': 'Comcast', #'CVC': 'Cablevision', #'YHOO': 'Yahoo', #'DELL': 'Dell', 'HPQ': 'HP', 'AMZN': 'Amazon', 'TM': 'Toyota', 'CAJ': 'Canon', 'MTU': 'Mitsubishi', 'SNE': 'Sony', #'F': 'Ford', 'HMC': 'Honda', #'NAV': 'Navistar', 'NOC': 'Northrop Grumman', 'BA': 'Boeing', 'KO': 'Coca Cola', 'MMM': '3M', 'MCD': 'Mc Donalds', #'PEP': 'Pepsi', 'MDLZ': 'Kraft Foods', 'K': 'Kellogg', 'UN': 'Unilever', 'MAR': 'Marriott', 'PG': 'Procter Gamble', 'CL': 'Colgate-Palmolive', 'GE': 'General Electrics', 'WFC': 'Wells Fargo', 'JPM': 'JPMorgan Chase', #'AIG': 'AIG', 'AXP': 'American Express', 'BAC': 'Bank of America', 'GS': 'Goldman Sachs', 'AAPL': 'Apple', 'SAP': 'SAP', 'CSCO': 'Cisco', 'TXN': 'Texas Instruments', 'XRX': 'Xerox', #'LMT': 'Lookheed Martin', 'WMT': 'Wal-Mart', 'WBA': 'Walgreen', 'HD': 'Home Depot', 'GSK': 'GlaxoSmithKline', 'PFE': 'Pfizer', 'SNY': 'Sanofi-Aventis', 'NVS': 'Novartis', 'KMB': 'Kimberly-Clark', 'R': 'Ryder', 'GD': 'General Dynamics', 'RTN': 'Raytheon', 'CVS': 'CVS', 'CAT': 'Caterpillar', 'DD': 'DuPont de Nemours', #'GM': 'General Motors', #'GOOG' : 'Google', 'ORCL' : 'Oracle', 'NVO':'Novo Nordisk', 'LLY':'Eli Lilly and Company', #'FB':'Facebook', 'MRK':'Merck Co', } ''' symbol_dict = {'Danske.CO':'Danske Bank', 'Maersk-B.CO':'Maersk', 'DSV.CO':'DSV', 'FLS.CO':'FLS', 'Gen.CO':'Genmab', 'TDC.CO':'TDC', 'CARL-B.CO':'Carlsberg', 'CHR.CO':'Chr Hansen', 'COLO-B.CO':'Coloplast', 'GN.CO':'GN Store Nord', 'NDA-DKK.co':'Nordea', 'Novo-B.co':'Novo Nordisk', 'NZYM-B.CO':'Novozymes', 'PNDORA.CO':'Pandora', 'Tryg.co':'Tryg', 'VWS.CO':'Vestas', 'WDH.CO':'William Demant', 'G4s.co':'G4S', 'JYSK.CO':'Jyske Bank', 'KBHL.CO':'Kobenhavns Lufthavne', 'RBREW.CO':'Royal Unibrew', 'ROCK-B.CO':'Rockwool', 'SYDB.CO':'Sydbank', 'TOP.CO':'Topdanmark', #'ALMB.CO':'Alm Brand', 'AURI-B.CO':'Auriga', 'Bava.CO':'Bavarian Nordic', 'BO.CO':'Bang Olufsen', 'DFDS.CO':'DFDS', 'DNORD.CO':'DS Norden', 'GES.CO':'Greentech', 'IC.CO':'IC Group', 'JDAN.CO':'Jeudan', #'JUTBK.CO':'Jutlander Bank', #'MATAS.CO':'Matas', 'NKT.CO':'NKT', #'NNIT.CO':'NNIT', 'NORDJB.CO':'Nordjyske Bank', #'ONXEO.CO':'Onxeo', #'OSSR.CO':'Ossur', 'PAAL-B.CO':'Per Aarslef', 'RILBA.CO':'Ringkobing Landbobank', 'SAS-DKK.CO':'SAS', 'SCHO.CO':'Schouw Co.', 'SIM.CO':'SimCorp', 'Solar-B.co':'Solar B', 'SPNO.CO':'Spar Nord', 'TIV.CO':'Tivoli', 'UIE.CO':'UIE', 'VELO.CO':'Veloxis', 'ZEAL.CO':'Zealand Pharma' } ''' symbols, names = np.array(list(symbol_dict.items())).T for symbol in symbols: print symbol if len(pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)]]).T)) != 1259: print symbol, len(pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)]]).T)) open = pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)] for symbol in symbols]).T) close = pd.DataFrame(np.array([[q[6] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)] for symbol in symbols]).T) # The daily variations of the quotes are what carry most information variation = np.array(close - open) ############################################################################### # Learn a graphical structure from the correlations #edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery df = pd.read_csv('data/TData9313_final5.csv',index_col=0) X = variation.copy() pd.DataFrame(np.round(np.cov(X.T),3),columns=symbols,index=symbols).to_latex('covariancetable.tex') print np.max(np.round(np.cov(X.T),3)) X /= X.std(axis=0) covariance_,precision_ = graphical_lasso(X,0.3) print pd.DataFrame(precision_) #edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i]))) ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T ############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(20, 16)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') plt.annotate('From %s to %s' % (d1.strftime('%Y-%m-%d'),d2.strftime('%Y-%m-%d')),xy=(0.11,-0.37),size=25) print X.shape for i in range(n_labels + 1): plt.annotate('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])),xy=(-0.43,0.02-i*0.02),size=18) pass # Display a graph of the partial correlations #partial_correlations = edge_model.precision_.copy() partial_correlations = precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=200 * d ** 2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.get_cmap('Greys'), norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=22, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim(embedding[0].min() - .25 * embedding[0].ptp(), embedding[0].max() + .20 * embedding[0].ptp(),) plt.ylim(embedding[1].min() - .20 * embedding[1].ptp(), embedding[1].max() + .20 * embedding[1].ptp()) plt.savefig('Graphs/StockCluster.pdf',bbox_inches='tight') plt.savefig('Graphs/StockCluster.svg',bbox_inches='tight') plt.show()
def clusterSymbol(dbdf): global dflength saveType = False try: book_kosdaq = xlrd.open_workbook("../../Kosdaq_symbols.xls") sheet_kosdaq = book_kosdaq.sheet_by_name('kosdaq') book_kospi = xlrd.open_workbook('../../Kospi_Symbols.xls') sheet_kospi = book_kospi.sheet_by_name('kospi') quotes2 = [] nametitles = [] codearrs = [] titlefound = False for title in dbdf['title']: if ' ' in title: title = title.replace(' ','') if '&' in title: title = title.replace('&','and') if '-' in title: title = title.replace('-','') print 'title',title for cnt in range(sheet_kospi.nrows): if sheet_kospi.row_values(cnt)[1] == title: code = '{0:06d}'.format(int(sheet_kospi.row_values(cnt)[0])) name = sheet_kospi.row_values(cnt)[1] print code,name markettype = 1 titlefound = True break for cnt in range(sheet_kosdaq.nrows): if sheet_kosdaq.row_values(cnt)[1] == title: code = '{0:06d}'.format(int(sheet_kosdaq.row_values(cnt)[0])) name = sheet_kosdaq.row_values(cnt)[1] print code,name markettype = 2 titlefound = True break if titlefound == False: continue titlefound = False try: startdatemode = 2 dbtradinghist = 'none' histmode = 'none' plotly = 'plotly' stdmode = 'stddb' tangentmode = 'tangentdb' daych =0 runcount = 0 srcsite = 1#google # srcsite = 2#yahoo writedblog = 'none' updbpattern = 'none' appenddb = 'none' print 'found code',code, name bars = cluster_fetchData(str(code),markettype,name,'realtime','dbpattern',histmode,runcount,srcsite,writedblog,updbpattern\ ,appenddb,startdatemode,\ dbtradinghist,plotly,stdmode,'none',daych,tangentmode) # bars = bars[1:] if dflength == 0: dflength = len(bars) else: if dflength > len(bars): dflength = len(bars) quotes2.append(bars) nametitles.append(name) codearrs.append(code) clear_output() except Exception,e: # print 'error title',name pass npquotesOpen = [] npquotesClose = [] count = 0 for q in quotes2: # print q.tail() # print pd.isnull(q).any().any() # if pd.isnull(q).any().any() == True: # print 'NaN' # continue q = q.fillna(0) if dflength < len(q): q = q[:dflength] npquotesOpen.append(q['Open'].values) npquotesClose.append(q['Close'].values) # print q['Close'].values,'count',count,len(q) else: npquotesOpen.append(q['Open'].values) npquotesClose.append(q['Close'].values) # print q['Close'].values,'count',count,len(q) count += 1 # print len(q.values),'dflength',dflength open2 = np.array(npquotesOpen).astype(np.float) close2 = np.array(npquotesClose).astype(np.float) # npquotesClose = [] # for q in quotes2: # npquotesClose.append(q['Close'].values) # npquotesOpen = np.array([q['Open'].values for q in quotes2]) # open2 = npquotesOpen # npquotesClose = np.array([q['Close'].values for q in quotes2]) # close2 = npquotesClose # print npquotesOpen # print npquotesClose variation = (close2 - open2) symbol_dict = dict(zip(codearrs,nametitles)) symbols, names = np.array(symbol_dict.items()).T edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery tempX = variation.T # print tempX,'tempX len',len(tempX) X = variation.copy().T # print 'open len',len(open2),'close len',len(close2),'variation len',len(variation),'X len',len(X) print 'type open',type(open2),'type close',type(close2),'type variation',type(variation),'type X',type(X) print 'shape open',open2.shape,'shape close',close2.shape,'shape variation',variation.shape,'shape X',X.shape X /= X.std(axis=0) edge_model.fit(X) # ############################################################################### # # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() # print names # print 'type symbols',type(symbols),'type names',type(names) # for name in names: # print 'name',name # print names[0],names[1],names[2],names[3] # print 'lables',labels,'n_labels',n_labels,'type labels',type(labels) randomtitles = pd.DataFrame() for i in range(n_labels+1): # print labels == i print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) if 1 < len(names[labels==i]) <= 3: # print 'random cluster ',np.random.choice(names[labels==i],3) tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],1)}) randomtitles = pd.concat([tmpdf, randomtitles]) elif 3 < len(names[labels==i]) <= 5: tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],2)}) randomtitles = pd.concat([tmpdf, randomtitles]) elif 5 < len(names[labels==i]) <= 7: tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],4)}) randomtitles = pd.concat([tmpdf, randomtitles]) elif 7 < len(names[labels==i]) : tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],5)}) randomtitles = pd.concat([tmpdf, randomtitles]) # print randomtitles # for i in range(n_labels + 1): # print 'Cluster '+str(i + 1)+', '+ names[labels == i] # ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T # ############################################################################### # Visualization pl.figure(1, facecolor='w', figsize=(15, 15)) pl.clf() ax = pl.axes([0., 0., 1., 1.]) pl.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding pl.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=pl.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=pl.cm.hot_r, norm=pl.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 pl.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=pl.cm.spectral(label / float(n_labels)), alpha=.6)) pl.xlim(embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(),) pl.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) pl.show() return randomtitles
def cluster_data(data): names = data.columns edge_model = covariance.GraphLassoCV() data = np.array(data) X = data.copy().T X /= X.std(axis=0) edge_model.fit(X) _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) #Visualization node_position_model = manifold.LocallyLinearEmbedding(n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): name = str(name).decode('utf-8').encode('utf-8') dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show()
def plotManifoldDistances(self, segments: List[Union[MessageSegment, TypedSegment, RawMessage, Any]], distances: numpy.ndarray, labels: numpy.ndarray, templates: List = None, plotEdges=False, countMarkers=False): """ Plot distances of segments according to (presumably multidimensional) features. This function abstracts from the actual feature by directly taking a precomputed similarity matrix and arranging the segments relative to each other according to their distances using Multidimensional Scaling (MDS). See module `manifold` from package `sklearn`. If segments is a list of `TypedSegment` or `MessageSegment`, this function plots the feature values of each given segment overlaying each other besides the distances; they are colored according to the given labels. >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage >>> from utils.loader import BaseLoader >>> from inference.analyzers import Value >>> >>> bytedata = [ ... bytes([1, 2, 3, 4]), ... bytes([ 2, 3, 4]), ... bytes([ 1, 3, 4]), ... bytes([ 2, 4 ]), ... bytes([ 2, 3 ]), ... bytes([20, 30, 37, 50, 69, 2, 30]), ... bytes([ 37, 5, 69 ]), ... bytes([70, 2, 3, 4]), ... bytes([3, 2, 3, 4]) ... ] >>> messages = [RawMessage(bd) for bd in bytedata] >>> specimens = BaseLoader(messages) >>> analyzers = [Value(message) for message in messages] >>> segments = [TypedSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers] >>> for seg in segments[:4]: ... seg.fieldtype = "ft1" >>> for seg in segments[4:6]: ... seg.fieldtype = "ft2" >>> for seg in segments[6:]: ... seg.fieldtype = "ft3" >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments, thresholdFunction=DistanceCalculator.neutralThreshold, thresholdArgs=None) Calculated distances for 37 segment pairs in ... seconds. >>> dp = DistancesPlotter(specimens, "test", False) >>> dp.plotManifoldDistances(segments, dc.distanceMatrix, numpy.array([1,2,3,1,1,0,1,0,2])) >>> # comment out writing of file for doctest >>> # dp.writeOrShowFigure() :param segments: If `segments` is a list of `TypedSegment`s, field types are marked as small markers within the label marker. labels containing "Noise" then are not explicitly marked like the other labeled segments :param distances: The precomputed similarity matrix: symmetric matrix, rows/columns in the order of `segments` :param labels: Labels of strings (or ints or any other printable type) identifying the cluster for each segment :param templates: Templates of clusters to be printed alongside with the feature values. CURRENTLY UNTESTED :param plotEdges: Plot of edges between each pair of segment markers. Caution: Adds n^2 lines which takes very long compared to the scatterplot and quickly becomes a huge load especially when rendering the plot as PDF. :param countMarkers: add text labels with information at positions with multiple markers """ from sklearn import manifold from sklearn.decomposition import PCA # plot configuration labsize = 150 # label markers: size factor typsize = 30 # type markers: size factor # self._cm # label color map fcm = cm.cubehelix # type color map # identify unique labels allabels = set(labels) if all( isinstance(l, numpy.integer) or l.isdigit() for l in allabels if l != "Noise"): ulab = sorted(allabels, key=lambda l: -1 if l == "Noise" else int(l)) else: ulab = sorted(allabels) # subsample if segment count is larger than maxSamples maxSamples = 1000 originalSegmentCount = len(segments) if originalSegmentCount > 2 * maxSamples: import math ratiorev = originalSegmentCount / maxSamples step2keep = math.floor(ratiorev) lab2idx = dict() for idx, lab in enumerate(labels): if lab not in lab2idx: lab2idx[lab] = list() lab2idx[lab].append(idx) # copy list to remove elements without side-effects segments = segments.copy() # to save the indices to be removed idx2rem = list() # determines a subset evenly distributed over all clusters while honoring the ratio to reduce to. for lab, ics in lab2idx.items(): keep = set(ics[::step2keep]) idx2rem.extend(set(ics) - keep) idx2rem = sorted(idx2rem, reverse=True) for idx in idx2rem: del segments[idx] labels = numpy.delete(labels, idx2rem, 0) distances = numpy.delete(numpy.delete(distances, idx2rem, 0), idx2rem, 1) else: idx2rem = None # prepare MDS seed = numpy.random.RandomState(seed=3) mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed", n_jobs=1) pos = mds.fit(distances).embedding_ # print(distances) # Rotate the data clf = PCA(n_components=2) pos = clf.fit_transform(pos) fig = self._fig axMDS, axSeg = self._axes # type: plt.Axes, plt.Axes if idx2rem is not None: axSeg.text( 0, -5, 'Subsampled: {} of {} segments'.format(len(segments), originalSegmentCount)) # omit noise in cluster labels if types are plotted anyway. if isinstance(segments[0], TypedSegment): for l in ulab: if isinstance(l, str) and "Noise" in l: ulab.remove(l) elif isinstance(segments[0], RawMessage) and segments[0].messageType != "Raw": for l in ulab: try: if int(l) == -1: ulab.remove(l) except ValueError as e: pass # not a problem, just keep the cluster, since its not noise. # prepare color space cIdx = [ int(round(each)) for each in numpy.linspace(2, self._cm.N - 2, len(ulab)) ] if templates is None: templates = ulab # iterate unique labels and scatter plot each of these clusters for c, (l, t) in enumerate(zip(ulab, templates)): # type: int, (Any, Template) # test with: # color = [list(numpy.random.randint(0, 10, 4) / 10)] # plt.scatter(numpy.random.randint(0,10,4), numpy.random.randint(0,10,4), c=color) lColor = self._cm(cIdx[c]) class_member_mask = (labels == l) try: x = list(compress(pos[:, 0].tolist(), class_member_mask)) y = list(compress(pos[:, 1].tolist(), class_member_mask)) # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row." # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html: axMDS.scatter( x, y, c=colors.to_rgba_array(lColor), alpha=.6, s=labsize, # s=s-(c*s/len(ulab)), # lw=0, label=str(l)) except IndexError as e: print(pos) print(distances) print(segments) raise e if isinstance(t, Template): axSeg.plot(t.values, c=lColor, linewidth=4) # include field type labels for TypedSegments input if isinstance(segments[0], (TypedSegment, RawMessage)): if isinstance(segments[0], TypedSegment): ftypes = numpy.array([seg.fieldtype for seg in segments]) # PP elif isinstance(segments[0], RawMessage) and segments[0].messageType != 'Raw': ftypes = numpy.array([msg.messageType for msg in segments]) # PP else: ftypes = set() # identify unique types utyp = sorted(set(ftypes)) # prepare color space # noinspection PyUnresolvedReferences cIdx = [ int(round(each)) for each in numpy.linspace(30, fcm.N - 30, len(utyp)) ] # iterate unique types and scatter plot each of these groups for n, ft in enumerate(utyp): # PP fColor = fcm(cIdx[n]) type_member_mask = (ftypes == ft) x = list(compress(pos[:, 0].tolist(), type_member_mask)) y = list(compress(pos[:, 1].tolist(), type_member_mask)) # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row." # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html: axMDS.scatter(x, y, c=colors.to_rgba_array(fColor), alpha=1, s=typsize, lw=0, label=str(ft)) if isinstance(segments[0], TypedSegment): for seg in compress(segments, type_member_mask): axSeg.plot(seg.values, c=fColor, alpha=0.05) elif isinstance(segments[0], MessageSegment): for c, l in enumerate(ulab): lColor = self._cm(cIdx[c]) class_member_mask = (labels == l) for seg in compress(segments, class_member_mask): axSeg.plot(seg.values, c=lColor, alpha=0.1) else: axSeg.text(.5, .5, 'nothing to plot\n(message alignment)', horizontalalignment='center') # place the label/type legend at the best position if isinstance(segments[0], RawMessage): axMDS.legend(bbox_to_anchor=(1.04, 1), scatterpoints=1, shadow=False) axSeg.patch.set_alpha(0.0) axSeg.axis('off') else: axMDS.legend(scatterpoints=1, loc='best', shadow=False) if plotEdges: # plotting of edges takes a long time compared to the scatterplot (and especially when rendering the PDF) from matplotlib.collections import LineCollection # Plot the edges lines = [[pos[i, :], pos[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = numpy.abs(distances) # noinspection PyUnresolvedReferences lc = LineCollection(lines, zorder=0, cmap=plt.cm.Blues, norm=plt.Normalize(0, values.max())) # lc.set_alpha(.1) lc.set_array(distances.flatten()) lc.set_linewidths(0.5 * numpy.ones(len(segments))) axMDS.add_collection(lc) if countMarkers: # Count markers at identical positions and plot text with information about the markers at this position from collections import Counter import math if isinstance(segments[0], TypedSegment): coordCounter = Counter([ (posX, posY, seg.fieldtype) for seg, lab, posX, posY in zip( segments, labels, pos[:, 0].tolist(), pos[:, 1].tolist()) ]) else: coordCounter = Counter([ (posX, posY, lab) for lab, posX, posY in zip( labels, pos[:, 0].tolist(), pos[:, 1].tolist()) ]) for (posX, posY, lab), cnt in coordCounter.items(): if cnt > 1: theta = hash(str(lab)) % 360 r = 1 posXr = posX + r * math.cos(theta) posYr = posY + r * math.sin(theta) axMDS.text(posXr, posYr, "{}: {}".format(lab, cnt), withdash=True) fig.canvas.toolbar.update()
def plot_cluster(X, labels, model): # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding(n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show()
# 第一个元素为非零的数在O轴即竖轴的下标,第二个元素为非零的数在1轴即横轴的下标 # a sequence of (*line0*, *line1*, *line2*), where::linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] #embedding为二维数组 values = np.abs(partial_correlations[non_zero]) # 用non_zero遮罩后的15个元素的数组 lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize( 0, .7 * values.max())) # zorder:调整层次,cmap:colormap lc.set_array(values) lc.set_linewidths(6 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else:
def plots_topography(dpa, ax_dendrogram, ax_project): Nclus_m = np.max(dpa.labels_) + 1 cmap = plt.get_cmap('tab10', Nclus_m) # Convert from border densities to distances nd = int((Nclus_m * Nclus_m - Nclus_m) / 2) Dis = np.empty(nd, dtype=float) nl = 0 Fmax = max(dpa.densities_) Rho_bord = np.zeros((Nclus_m, Nclus_m), dtype=float) for row in dpa.topography_: Rho_bord[row[0]][row[1]] = row[2] Rho_bord[row[1]][row[0]] = row[2] Dis[nl] = Fmax - row[2] nl = nl + 1 # dendrogram representation DD = sp.cluster.hierarchy.single(Dis) dn = sp.cluster.hierarchy.dendrogram(DD, color_threshold=0, above_threshold_color='k', ax=ax_dendrogram) xlbls = ax_dendrogram.get_xmajorticklabels() dorder = [] for lbl in xlbls: dorder.append(int(lbl._text)) lbl.set_color(cmap(int(lbl._text))) lbl.set_weight('bold') # 2D projection representation of the topography pop = np.zeros((Nclus_m), dtype=int) for i in range(len(dpa.labels_)): pop[dpa.labels_[i]] = pop[dpa.labels_[i]] + 1 d_dis = np.zeros((Nclus_m, Nclus_m), dtype=float) model = manifold.MDS(n_components=2, n_jobs=10, dissimilarity='precomputed') for i in range(Nclus_m): for j in range(Nclus_m): d_dis[i][j] = Fmax - Rho_bord[i][j] for i in range(Nclus_m): d_dis[i][i] = 0. out = model.fit_transform(d_dis) ax_project.yaxis.set_major_locator(plt.NullLocator()) ax_project.xaxis.set_major_locator(plt.NullLocator()) s = [] col = [] for i in range(Nclus_m): s.append(20. * sqrt(pop[i])) col.append(i) ax_project.scatter(out[:, 0], out[:, 1], s=s, c=col, cmap=cmap) #plt.colorbar(ticks=range(Nclus_m)) #plt.clim(-0.5, Nclus_m-0.5) for i in range(Nclus_m): ax_project.annotate(i, (out[i, 0], out[i, 1])) for i in range(Nclus_m): for j in range(Nclus_m): d_dis[i][j] = Rho_bord[i][j] rr = np.amax(d_dis) if (rr > 0.): d_dis = d_dis / rr * 100. start_idx, end_idx = np.where(out) segments = [[out[i, :], out[j, :]] for i in range(len(out)) for j in range(len(out))] values = np.abs(d_dis) lc = LineCollection(segments, zorder=0, norm=plt.Normalize(0, values.max())) lc.set_array(d_dis.flatten()) lc.set_edgecolor(np.full(len(segments), 'black')) lc.set_facecolor(np.full(len(segments), 'black')) lc.set_linewidths(0.2 * Rho_bord.flatten()) ax_project.add_collection(lc) return ax_dendrogram, ax_project
def affinity_propagation_network(X, names=None): """ Cluster (affinity propagation based on the correlation of ) rows of X, printing out cluster contents and drawing a labeled network of the results, with darker edges for more correlated pairs X can be an array or a pandas DataFrame. names are labels for the rows, which will be taken to be the indices of the dataframe, or the "names" column, or 0..n-1 otherwise Very lightly adapted from http://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html#example-applications-plot-stock-market-py Author: Gael Varoquaux [email protected] License: BSD 3 clause The output of the 3 models are combined in a 2D graph where nodes represents the columns and edges the: * cluster labels are used to define the color of the nodes * the sparse covariance model is used to display the strength of the edges * the 2D embedding is used to position the nodes in the plan This example has a fair amount of visualization-related code, as visualization is crucial here to display the graph. One of the challenge is to position the labels minimizing overlap. For this we use an heuristic based on the direction of the nearest neighbor along each axis """ X = X.copy() if isinstance(X, pd.DataFrame): if isinstance(names, basestring): names = X.pop(names) elif names is None: names = X.index.values X = X.as_matrix().T elif names is None: names = range(X.shape[0]) ############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery # X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T ############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(),) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show()
def plot_market_structure(names, labels, embedding, partial_correlations): import matplotlib.pyplot as plt from matplotlib.collections import LineCollection # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) try: lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(8 * values) except ValueError: print "Warning: skip line normalization" lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r) lc.set_linewidths(1) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, color='black', bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(labels.max())), alpha=.6)) plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(),) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show() plt.close() del plt, LineCollection
def graphicalAnalysis_plot(d, partial_correlations, my_colors, names, labels, embedding, val_max, title): non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) n_labels = labels.max() #For correlation network graph fig = plt.figure(1, facecolor='w', figsize=(12, 5)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=500 * d**2, c=my_colors) # Plot the edges start_idx, end_idx = np.where(non_zero) # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * val_max)) lc.set_array(values) temp = (15 * values) temp2 = np.repeat(5, len(temp)) w = np.minimum(temp, temp2) lc.set_linewidths(w) ax.add_collection(lc) axcb = fig.colorbar(lc) axcb.set_label('Strength') # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.nipy_spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.title(title) plt.show()
def visual_stock_relationship(dataset, edge_model, labels, stock_names): """ 可视化结果 :param dataset: 数据集 :param edge_model: 模型 :param labels: 标签 :param stock_names:股票名称 :return: none: 无 """ # LocallyLinearEmbedding LLE降维 node_position_model = manifold.LocallyLinearEmbedding(n_components=2, n_neighbors=6, eigen_solver='dense') """ n_components 降维到的维数 n_neighbors 搜索样本的近邻的个数,越大,降纬后的局部数据越好 eigen_solver 特征分解的方法。有‘arpack’和‘dense’两者算法选择 """ # 处理数据 embedding = node_position_model.fit_transform(dataset.T).T """ fit_transform()是对数据进行某种统一处理 比如标准化~N(0,1) 将数据缩放(映射)到某个固定区间 归一化 正则化等 """ # 画出图表 figure = pyplot.figure(1, facecolor='w', figsize=(10, 8)) # 清除所有轴 pyplot.clf() """ 清除所有轴但是窗口打开 这样它可以被重复使用。 """ # 生成子图 axe = pyplot.axes([0., 0., 1., 1.]) # 设置轴属性 pyplot.axis('off') # 显示偏相关图 partial_correlations = edge_model.precision_.copy() d = 1 / numpy.sqrt(numpy.diag(partial_correlations)) """ np.sqrt() 开根号 numpy.diag()返回一个矩阵的对角线元素 或者创建一个对角阵( diagonal array) """ partial_correlations *= d partial_correlations *= d[:, numpy.newaxis] """ numpy.newaxis从字面上来理解就是用来创建新轴的 或者说是用来对array进行维度扩展的。 """ non_zero = (numpy.abs(numpy.triu(partial_correlations, k=1)) > 0.02) """ numpy.abs() 计算数组各元素的绝对值 numpy.triu() 与tril类似,返回的是矩阵的上三角矩阵 """ # 使用嵌入的坐标绘制节点 pyplot.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=pyplot.cm.nipy_spectral) """ pyplot.scatter() 画散点图 """ # 绘制边缘 start_idx, end_idx = numpy.where(non_zero) """ numpy.where() 输出满足条件 (即非0) 元素的坐标 等价于numpy.nonzero """ # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = numpy.abs(partial_correlations[non_zero]) # 绘制LineCollection曲线 lc = LineCollection(segments, zorder=0, cmap=pyplot.cm.hot_r, norm=pyplot.Normalize(0, .7 * values.max())) """ LineCollection实现在图形中绘制多条线 作为面向对象绘图的一部分。 """ # 将LineCollection曲线添加到子图中 lc.set_array(values) lc.set_linewidths(15 * values) axe.add_collection(lc) # 向每个节点添加标签 # 放置标签以避免与其他标签重叠 n_labels = max(labels) for index, (name, label, (x, y)) in enumerate(zip(stock_names, labels, embedding.T)): # 计算坐标 dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[numpy.argmin(numpy.abs(dy))] this_dy = dy[numpy.argmin(numpy.abs(dx))] # 根据其位置调整方向 if this_dx > 0: horizontalalignment = 'left' x = x + .001 else: horizontalalignment = 'right' x = x - .001 if this_dy > 0: verticalalignment = 'bottom' y = y + .001 else: verticalalignment = 'top' y = y - .001 pyplot.text(x, y, name, size=10, fontproperties='SimHei', horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=pyplot.cm.nipy_spectral( label / float(n_labels)), alpha=.6)) """ pyplot.text()添加文本信息 """ pyplot.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) pyplot.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) """ pyplot.xlim() 设定横坐标的上下限 pyplot.ylim() 设定纵坐标的上下限 """ pyplot.show()
x = np.arange(n) rs = check_random_state(0) y = rs.randint(-50, 50, size=(n,)) + 50. * np.log(1 + np.arange(n)) ############################################################################### # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() y_ = ir.fit_transform(x, y) lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression ############################################################################### # plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(0.5 * np.ones(n)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'g.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.show()
def showCovariances(names,variation): ############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T ############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(),) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show()
def visualize(self, cluster=False, savefile=None, doshow=True, seed=None, node_labels=None, label_idx=None, mark_nodes=False): """ Visualize the graph structure. The nodes positions are derived from the normalized PMI using the t-distributed stochastic neighbors embedding, while the graph edges are derived from the normalized PMI values. To reduce clutter, only those edges within top 5% of positive PMI values are drawn. The sizes of the nodes represent the marginal frequencies of the features represented by each node. :param cluster: If true, also cluster the nodes using affinity propagation and color them according to cluster label. :param savefile: The name of a file to save the figure to. :param doshow: If true, then display the figure. :param seed: The seed for the random number generator used for initialization of the t-distributed stochastic neighbors embedding. :param node_labels: A list of strings containing the labels for a set of nodes. :param label_idx: The indices of the nodes to be labeled. :param mark_nodes: If true, also mark the labeled nodes using a large green circle. :return: """ if node_labels is None: node_labels = [] if label_idx is None: label_idx = [] if len(label_idx) != len(node_labels): raise ValueError("Length of node_labels must be the same as label_idx.") # use normalized PMI for similarity metric similarity = self.pmi / -np.log(self.joint_probs) similarity[np.diag_indices_from(similarity)] = 1.0 # compute the 2-d manifold and the projection of the data onto it. this defines the node positions distance = -(similarity - 1.0) # convert to [-2.0, 0.0] and then make positive node_position_model = manifold.TSNE(verbose=self.verbose, metric='precomputed', learning_rate=100, random_state=seed) node_positions = node_position_model.fit_transform(distance).T if cluster: # also include cluster information in the visualization clusters = self.cluster(normalize=True) plt.figure(1, facecolor='k', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Plot the nodes using the coordinates of our embedding base_symbol_size = self.train_marginal / float(self.train_marginal.max()) + 0.05 if cluster: # color ingredient nodes by cluster plt.scatter(node_positions[0], node_positions[1], s=300 * base_symbol_size, c=clusters, cmap=plt.cm.spectral_r) else: plt.scatter(node_positions[0], node_positions[1], s=300 * base_symbol_size, cmap=plt.cm.spectral_r, c='DodgerBlue') # Display a graph of ingredients commonly found together based on pointwise mutual information (PMI) non_zero = np.triu(similarity, k=1) > np.percentile(similarity[similarity > 0], 95.0) start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[node_positions[:, start], node_positions[:, stop]] for start, stop in zip(start_idx, end_idx)] values = similarity[non_zero] lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot, norm=plt.Normalize(values.min(), np.percentile(values, 95.0))) lc.set_array(values) lc.set_linewidths(2 * values) ax.add_collection(lc) # plt.colorbar(lc) for label, node_idx in zip(node_labels, label_idx): if mark_nodes: plt.scatter(node_positions[0, node_idx], node_positions[1, node_idx], s=500, c='Green') plt.text(node_positions[0, node_idx] + 0.02 * node_positions[0].ptp(), node_positions[1, node_idx] + 0.02 * node_positions[1].ptp(), label, size=20, color='White') plt.xlim(node_positions[0].min() - .15 * node_positions[0].ptp(), node_positions[0].max() + .10 * node_positions[0].ptp(),) plt.ylim(node_positions[1].min() - .03 * node_positions[1].ptp(), node_positions[1].max() + .03 * node_positions[1].ptp()) if savefile is not None: plt.savefig(savefile, facecolor='k', edgecolor='Yellow') if doshow: plt.show() return ax, node_positions
class HoughDemo(ImageProcessDemo): TITLE = u"Hough Demo" DEFAULT_IMAGE = "stuff.jpg" SETTINGS = ["th2", "show_canny", "rho", "theta", "hough_th", "minlen", "maxgap", "dp", "mindist", "param2", "min_radius", "max_radius", "blur_sigma", "linewidth", "alpha", "check_line", "check_circle"] check_line = Bool(True) check_circle = Bool(True) #Gaussian blur parameters blur_sigma = Range(0.1, 5.0, 2.0) show_blur = Bool(False) # Canny parameters th2 = Range(0.0, 255.0, 200.0) show_canny = Bool(False) # HoughLine parameters rho = Range(1.0, 10.0, 1.0) theta = Range(0.1, 5.0, 1.0) hough_th = Range(1, 100, 40) minlen = Range(0, 100, 10) maxgap = Range(0, 20, 10) # HoughtCircle parameters dp = Range(1.0, 5.0, 1.9) mindist = Range(1.0, 100.0, 50.0) param2 = Range(5, 100, 50) min_radius = Range(5, 100, 20) max_radius = Range(10, 100, 70) # draw parameters linewidth = Range(1.0, 3.0, 1.0) alpha = Range(0.0, 1.0, 0.6) def control_panel(self): return VGroup( Group( Item("blur_sigma", label=u"标准方差"), Item("show_blur", label=u"显示结果"), label=u"高斯模糊参数" ), Group( Item("th2", label=u"阈值2"), Item("show_canny", label=u"显示结果"), label=u"边缘检测参数" ), Group( Item("rho", label=u"偏移分辨率(像素)"), Item("theta", label=u"角度分辨率(角度)"), Item("hough_th", label=u"阈值"), Item("minlen", label=u"最小长度"), Item("maxgap", label=u"最大空隙"), label=u"直线检测" ), Group( Item("dp", label=u"分辨率(像素)"), Item("mindist", label=u"圆心最小距离(像素)"), Item("param2", label=u"圆心检查阈值"), Item("min_radius", label=u"最小半径"), Item("max_radius", label=u"最大半径"), label=u"圆检测" ), Group( Item("linewidth", label=u"线宽"), Item("alpha", label=u"alpha"), HGroup( Item("check_line", label=u"直线"), Item("check_circle", label=u"圆"), ), label=u"绘图参数" ) ) def __init__(self, **kwargs): super(HoughDemo, self).__init__(**kwargs) self.connect_dirty("th2, show_canny, show_blur, rho, theta, hough_th," "min_radius, max_radius, blur_sigma," "minlen, maxgap, dp, mindist, param2, " "linewidth, alpha, check_line, check_circle") self.lines = LineCollection([], linewidths=2, alpha=0.6) self.axe.add_collection(self.lines) self.circles = EllipseCollection( [], [], [], units="xy", facecolors="none", edgecolors="red", linewidths=2, alpha=0.6, transOffset=self.axe.transData) self.axe.add_collection(self.circles) def _img_changed(self): self.img_gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) def draw(self): img_smooth = cv2.GaussianBlur(self.img_gray, (0, 0), self.blur_sigma, self.blur_sigma) img_edge = cv2.Canny(img_smooth, self.th2 * 0.5, self.th2) if self.show_blur and self.show_canny: show_img = cv2.cvtColor(np.maximum(img_smooth, img_edge), cv2.COLOR_BAYER_BG2BGR) elif self.show_blur: show_img = cv2.cvtColor(img_smooth, cv2.COLOR_BAYER_BG2BGR) elif self.show_canny: show_img = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2BGR) else: show_img = self.img if self.check_line: theta = self.theta / 180.0 * np.pi lines = cv2.HoughLinesP(img_edge, self.rho, theta, self.hough_th, minLineLength=self.minlen, maxLineGap=self.maxgap) if lines is not None: lines = lines[0] lines.shape = -1, 2, 2 self.lines.set_segments(lines) self.lines.set_visible(True) else: self.lines.set_visible(False) else: self.lines.set_visible(False) if self.check_circle: circles = cv2.HoughCircles(img_smooth, 3, self.dp, self.mindist, param1=self.th2, param2=self.param2, minRadius=self.min_radius, maxRadius=self.max_radius) if circles is not None: circles = circles[0] self.circles._heights = self.circles._widths = circles[:, 2] self.circles.set_offsets(circles[:, :2]) self.circles._angles = np.zeros(len(circles)) self.circles._transOffset = self.axe.transData self.circles.set_visible(True) else: self.circles.set_visible(False) else: self.circles.set_visible(False) self.lines.set_linewidths(self.linewidth) self.circles.set_linewidths(self.linewidth) self.lines.set_alpha(self.alpha) self.circles.set_alpha(self.alpha) self.draw_image(show_img)
f = open('Datatest.csv') for row in csv.reader(f): diabetes_X_test.append(float(row[3])) diabetes_y_test.append(float(row[4])) f.close() ir = IsotonicRegression() y_ = ir.fit_transform(diabetes_X_train, diabetes_y_train) #lr = LinearRegression() #lr.fit(diabetes_X_train, diabetes_y_train) # x needs to be 2d for LinearRegression segments = [[[i, diabetes_y_train[i]], [i, y_[i]]] for i in range(len(diabetes_X_train))] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(diabetes_y_train))) lc.set_linewidths(0.5 * np.ones(len(diabetes_X_train))) fig = plt.figure() #plt.plot(diabetes_X_train, diabetes_y_train, 'r.', markersize=12,color='green') plt.plot(diabetes_X_test, diabetes_y_test, 'r.', markersize=12,color='black') #plt.plot(diabetes_X_train, y_, 'g.-', markersize=12,color='yellow') plt.plot(diabetes_X_test, ir.predict(diabetes_X_test), 'b-',color='red') #plt.gca().add_collection(lc) print("a=",diabetes_X_test) print("a=",ir.predict(diabetes_X_test)) r1=r2_score(diabetes_y_train , ir.predict(diabetes_X_train), multioutput='variance_weighted') print("r1=",r1) #r2=r2_score(diabetes_y_test , ir.predict(diabetes_X_test), multioutput='variance_weighted') #print("r2=",r2) #plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
def relation_plot(self, df, good_list): close_price_list = [ df[df.code == code].close.tolist() for code in good_list ] close_prices = np.vstack(close_price_list) open_price_list = [ df[df.code == code].open.tolist() for code in good_list ] open_prices = np.vstack(open_price_list) # the daily variations of the quotes are what carry most information variation = (close_prices - open_prices) * 100 / open_prices logger.info("get variation succeed") # ############################################################################# # learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance is more efficient for structure recovery X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) logger.info("mode compute succeed") # ############################################################################# # cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() code_list = np.array(good_list) industry_dict = dict() industry_df_info = IndustryInfo.get() for index, name in industry_df_info.name.iteritems(): content = industry_df_info.loc[index]['content'] a_code_list = json.loads(content) for code in a_code_list: industry_dict[code] = name cluster_dict = dict() for i in range(n_labels + 1): cluster_dict[i] = code_list[labels == i] name_list = [ CStockInfo.get(code, 'name') for code in code_list[labels == i] ] logger.info('cluster code %i: %s' % ((i + 1), ', '.join(name_list))) cluster_info = dict() for group, _code_list in cluster_dict.items(): for code in _code_list: iname = industry_dict[code] if group not in cluster_info: cluster_info[group] = set() cluster_info[group].add(iname) logger.info('cluster inustry %i: %s' % ((i + 1), ', '.join(list(cluster_info[group])))) # ############################################################################# # find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # we use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T # ############################################################################# # visualizatio plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.nipy_spectral) # plot the edges start_idx, end_idx = np.where(non_zero) # a sequence of (*line0*, *line1*, *line2*), where:: linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # add a label to each node. The challenge here is that we want to position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(code_list, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.nipy_spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.savefig('/tmp/relation.png', dpi=1000)
def visual_stock_relationship(dataset, edge_model, labels, stock_names): node_position_model = manifold.LocallyLinearEmbedding(n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(dataset.T).T plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations\n", partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding\n", plt.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.nipy_spectral) # Plot the edges\n", start_idx, end_idx = np.where(non_zero) # a sequence of (*line0*, *line1*, *line2*), where::\n", # linen = (x0, y0), (x1, y1), ... (xm, ym)\n", segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to\n", # position the labels to avoid overlap with other labels\n", n_labels = max(labels) for index, (name, label, (x, y)) in enumerate(zip(stock_names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .001 else: horizontalalignment = 'right' x = x - .001 if this_dy > 0: verticalalignment = 'bottom' y = y + .001 else: verticalalignment = 'top' y = y - .001 plt.text(x, y, name, size=10, fontproperties='SimHei', horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.nipy_spectral(label / float(n_labels)), alpha=.6)), plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show()
class SunPlotPy(wx.Frame, Spatial, Grid): """ The main frame of the application """ title = 'sunplot(py)' # Plotting options autoclim = True showedges = False bgcolor = 'k' textcolor = 'w' cmap = 'RdBu' particlesize = 1.8 particlecolor = 'm' # other flags collectiontype = 'cells' oldcollectiontype = 'cells' # tindex = 0 depthlevs = [0., 10., 100., 200., 300., 400., 500.,\ 1000.,2000.,3000.,4000.,5000] _FillValue = 999999 def __init__(self): wx.Frame.__init__(self, None, -1, self.title) self.create_menu() self.create_status_bar() self.create_main_panel() #self.draw_figure() def create_menu(self): self.menubar = wx.MenuBar() ### # File Menu ### menu_file = wx.Menu() # Load a hydro output file m_expt = menu_file.Append(-1, "&Open file\tCtrl-O", "Open netcdf file") self.Bind(wx.EVT_MENU, self.on_open_file, m_expt) # Load a grid file m_grid = menu_file.Append(-1, "&Load grid\tCtrl-G", "Load SUNTANS grid from folder") self.Bind(wx.EVT_MENU, self.on_load_grid, m_grid) # Load a particle file m_part = menu_file.Append(-1, "&Load PTM file\tCtrl-Shift-P", "Load a PTM file") self.Bind(wx.EVT_MENU, self.on_load_ptm, m_part) # Save current scene as an animation m_anim = menu_file.Append(-1, "&Save animation of current scene\tCtrl-S", "Save animation") self.Bind(wx.EVT_MENU, self.on_save_anim, m_anim) # Save the current figure m_prin = menu_file.Append(-1, "&Print current scene\tCtrl-P", "Save figure") self.Bind(wx.EVT_MENU, self.on_save_fig, m_prin) menu_file.AppendSeparator() # Exit m_exit = menu_file.Append(-1, "E&xit\tCtrl-X", "Exit") self.Bind(wx.EVT_MENU, self.on_exit, m_exit) ### # Tools menu ### menu_tools = wx.Menu() m_gridstat = menu_tools.Append(-1, "&Plot grid size statistics", "SUNTANS grid size") self.Bind(wx.EVT_MENU, self.on_plot_gridstat, m_gridstat) m_countcells = menu_tools.Append(-1, "&Count # grid cells", "Grid cell count") self.Bind(wx.EVT_MENU, self.on_count_cells, m_countcells) m_overlaybathy = menu_tools.Append(-1, "&Overlay depth contours", "Depth overlay") self.Bind(wx.EVT_MENU, self.on_overlay_bathy, m_overlaybathy) ### # Help Menu ### menu_help = wx.Menu() m_about = menu_help.Append(-1, "&About\tF1", "About the demo") self.Bind(wx.EVT_MENU, self.on_about, m_about) # Add all of the menu bars self.menubar.Append(menu_file, "&File") self.menubar.Append(menu_tools, "&Tools") self.menubar.Append(menu_help, "&Help") self.SetMenuBar(self.menubar) def create_main_panel(self): """ Creates the main panel with all the controls on it: * mpl canvas * mpl navigation toolbar * Control panel for interaction """ self.panel = wx.Panel(self) # Create the mpl Figure and FigCanvas objects. # 5x4 inches, 100 dots-per-inch # self.dpi = 100 #self.fig = Figure((7.0, 6.0), dpi=self.dpi,facecolor=self.bgcolor) self.fig = Figure((7.0, 6.0), dpi=self.dpi) self.canvas = FigCanvas(self.panel, -1, self.fig) # Since we have only one plot, we can use add_axes # instead of add_subplot, but then the subplot # configuration tool in the navigation toolbar wouldn't # work. # self.axes = self.fig.add_subplot(111) #SetAxColor(self.axes,self.textcolor,self.bgcolor) # Bind the 'pick' event for clicking on one of the bars # #self.canvas.mpl_connect('pick_event', self.on_pick) ######## # Create widgets ######## self.variable_list = wx.ComboBox(self.panel, size=(200, -1), choices=['Select a variable...'], style=wx.CB_READONLY) self.variable_list.Bind(wx.EVT_COMBOBOX, self.on_select_variable) self.time_list = wx.ComboBox(self.panel, size=(200, -1), choices=['Select a time step...'], style=wx.CB_READONLY) self.time_list.Bind(wx.EVT_COMBOBOX, self.on_select_time) self.depthlayer_list = wx.ComboBox( self.panel, size=(200, -1), choices=['Select a vertical layer...'], style=wx.CB_READONLY) self.depthlayer_list.Bind(wx.EVT_COMBOBOX, self.on_select_depth) self.show_edge_check = wx.CheckBox(self.panel, -1, "Show Edges", style=wx.ALIGN_RIGHT) self.show_edge_check.Bind(wx.EVT_CHECKBOX, self.on_show_edges) if USECMOCEAN: cmaps = [] for cmap in cm.cmapnames: cmaps.append(cmap) cmaps.append(cmap + '_r') # Add all reverse map options else: # Use matplotlib standard cmaps = list(matplotlib.cm.datad.keys()) cmaps.sort() self.colormap_list = wx.ComboBox(self.panel, size=(100, -1), choices=cmaps, style=wx.CB_READONLY) self.colormap_list.Bind(wx.EVT_COMBOBOX, self.on_select_cmap) self.colormap_label = wx.StaticText(self.panel, -1, "Colormap:") self.clim_check = wx.CheckBox(self.panel, -1, "Manual color limits ", style=wx.ALIGN_RIGHT) self.clim_check.Bind(wx.EVT_CHECKBOX, self.on_clim_check) self.climlow = wx.TextCtrl(self.panel, size=(100, -1), style=wx.TE_PROCESS_ENTER) self.climlow.Bind(wx.EVT_TEXT_ENTER, self.on_climlow) self.climhigh = wx.TextCtrl(self.panel, size=(100, -1), style=wx.TE_PROCESS_ENTER) self.climhigh.Bind(wx.EVT_TEXT_ENTER, self.on_climhigh) # Labels self.variable_label = wx.StaticText(self.panel, -1, "Variable:", size=(200, -1)) self.time_label = wx.StaticText(self.panel, -1, "Time step:", size=(200, -1)) self.depth_label = wx.StaticText(self.panel, -1, "Vertical level:", size=(200, -1)) # Create the navigation toolbar, tied to the canvas # self.toolbar = NavigationToolbar(self.canvas) #self.toolbar.toolitems[8][3]='my_save_fig' #def my_save_fig(self,*args): # print 'saving figure' # return "break" ######### # Layout with box sizers ######### self.vbox = wx.BoxSizer(wx.VERTICAL) self.vbox.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW) self.vbox.Add(self.toolbar, 0, wx.EXPAND) self.vbox.AddSpacer(10) #self.vbox.Add((-1,25)) flags = wx.ALIGN_LEFT | wx.ALL | wx.ALIGN_CENTER_VERTICAL self.hbox0 = wx.BoxSizer(wx.HORIZONTAL) self.hbox0.Add(self.show_edge_check, 0, border=10, flag=flags) self.hbox0.Add(self.colormap_label, 0, border=10, flag=flags) self.hbox0.Add(self.colormap_list, 0, border=10, flag=flags) self.hbox0.Add(self.clim_check, 0, border=10, flag=flags) self.hbox0.Add(self.climlow, 0, border=10, flag=flags) self.hbox0.Add(self.climhigh, 0, border=10, flag=flags) self.vbox.AddSpacer(5) self.hbox1 = wx.BoxSizer(wx.HORIZONTAL) self.hbox1.Add(self.variable_label, 0, border=10, flag=flags) self.hbox1.Add(self.time_label, 0, border=10, flag=flags) self.hbox1.Add(self.depth_label, 0, border=10, flag=flags) self.vbox.AddSpacer(5) self.hbox2 = wx.BoxSizer(wx.HORIZONTAL) self.hbox2.Add(self.variable_list, 0, border=10, flag=flags) self.hbox2.Add(self.time_list, 0, border=10, flag=flags) self.hbox2.Add(self.depthlayer_list, 0, border=10, flag=flags) self.vbox.Add(self.hbox1, 0, flag=wx.ALIGN_LEFT | wx.TOP) self.vbox.Add(self.hbox2, 0, flag=wx.ALIGN_LEFT | wx.TOP) self.vbox.Add(self.hbox0, 0, flag=wx.ALIGN_LEFT | wx.TOP) self.panel.SetSizer(self.vbox) self.vbox.Fit(self) ########## # Event functions ########## def create_figure(self): """ Creates the figure """ # Find the colorbar limits if unspecified if self.autoclim: self.clim = [self.data.min(), self.data.max()] self.climlow.SetValue('%3.1f' % self.clim[0]) self.climhigh.SetValue('%3.1f' % self.clim[1]) if 'collection' in self.__dict__: #self.collection.remove() self.axes.collections.remove(self.collection) else: # First call - set the axes limits self.axes.set_aspect('equal') self.axes.set_xlim(self.xlims) self.axes.set_ylim(self.ylims) if self.collectiontype == 'cells': self.collection = PolyCollection(self.xy, cmap=self.cmap) self.collection.set_array(np.array(self.data[:])) if not self.showedges: self.collection.set_edgecolors( self.collection.to_rgba(np.array((self.data[:])))) elif self.collectiontype == 'edges': xylines = [self.xp[self.edges], self.yp[self.edges]] linesc = [ list(zip(xylines[0][ii, :], xylines[1][ii, :])) for ii in range(self.Ne) ] self.collection = LineCollection(linesc, array=np.array(self.data[:]), cmap=self.cmap) self.collection.set_clim(vmin=self.clim[0], vmax=self.clim[1]) self.axes.add_collection(self.collection) self.title = self.axes.set_title(self.genTitle(), color=self.textcolor) self.axes.set_xlabel('Easting [m]') self.axes.set_ylabel('Northing [m]') # create a colorbar if 'cbar' not in self.__dict__: self.cbar = self.fig.colorbar(self.collection) #SetAxColor(self.cbar.ax.axes,self.textcolor,self.bgcolor) else: #pass print('Updating colorbar...') #self.cbar.check_update(self.collection) self.cbar.on_mappable_changed(self.collection) self.canvas.draw() def update_figure(self): if self.autoclim: self.clim = [self.data.min(), self.data.max()] self.climlow.SetValue('%3.1f' % self.clim[0]) self.climhigh.SetValue('%3.1f' % self.clim[1]) else: self.clim = [float(self.climlow.GetValue()),\ float(self.climhigh.GetValue())] # check whether it is cell or edge type if self.hasDim(self.variable, self.griddims['Ne']): self.collectiontype = 'edges' elif self.hasDim(self.variable, self.griddims['Nc']): self.collectiontype = 'cells' # Create a new figure if the variable has gone from cell to edge of vice # versa if not self.collectiontype == self.oldcollectiontype: self.create_figure() self.oldcollectiontype = self.collectiontype self.collection.set_array(np.array(self.data[:])) self.collection.set_clim(vmin=self.clim[0], vmax=self.clim[1]) # Cells only if self.collectiontype == 'cells': if not self.showedges: self.collection.set_edgecolors( self.collection.to_rgba(np.array((self.data[:])))) else: self.collection.set_edgecolors('k') self.collection.set_linewidths(0.2) # Update the title self.title = self.axes.set_title(self.genTitle(), color=self.textcolor) #Update the colorbar self.cbar.update_normal(self.collection) # redraw the figure self.canvas.draw() def on_pick(self, event): # The event received here is of the type # matplotlib.backend_bases.PickEvent # # It carries lots of information, of which we're using # only a small amount here. # box_points = event.artist.get_bbox().get_points() msg = "You've clicked on a bar with coords:\n %s" % box_points dlg = wx.MessageDialog(self, msg, "Click!", wx.OK | wx.ICON_INFORMATION) dlg.ShowModal() dlg.Destroy() def on_select_variable(self, event): vname = event.GetString() self.flash_status_message("Selecting variable: %s" % vname) # update the spatial object and load the data self.variable = vname self.loadData(variable=self.variable) # Check if the variable has a depth coordinate depthstr = [''] # If so populate the vertical layer box if self.hasDim(self.variable, self.griddims['Nk']): depthstr = ['%3.1f' % self.z_r[k] for k in range(self.Nkmax)] depthstr += ['surface', 'seabed'] elif self.hasDim(self.variable, 'Nkw'): depthstr = ['%3.1f' % self.z_w[k] for k in range(self.Nkmax + 1)] self.depthlayer_list.SetItems(depthstr) # Update the plot self.update_figure() def on_select_time(self, event): self.tindex = event.GetSelection() # Update the object time index and reload the data if self.plot_type == 'hydro': if not self.tstep == self.tindex: self.tstep = self.tindex self.loadData() self.flash_status_message("Selecting variable: %s..." % event.GetString()) # Update the plot self.update_figure() elif self.plot_type == 'particles': self.PTM.plot(self.tindex,ax=self.axes,\ xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim()) self.canvas.draw() def on_select_depth(self, event): kindex = event.GetSelection() if not self.klayer[0] == kindex: # Check if its the seabed or surface value if kindex >= self.Nkmax: kindex = event.GetString() self.klayer = [kindex] self.loadData() self.flash_status_message("Selecting depth: %s..." % event.GetString()) # Update the plot self.update_figure() def on_open_file(self, event): file_choices = "SUNTANS NetCDF (*.nc)|*.nc*|UnTRIM NetCDF (*.nc)|*.nc*|All Files (*.*)|*.*" dlg = wx.FileDialog(self, message="Open SUNTANS file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style=wx.FD_MULTIPLE) if dlg.ShowModal() == wx.ID_OK: self.plot_type = 'hydro' path = dlg.GetPaths() # Initialise the class if dlg.GetFilterIndex() == 0 or dlg.GetFilterIndex() > 1: #SUNTANS self.flash_status_message("Opening SUNTANS file: %s" % path) try: Spatial.__init__(self, path, _FillValue=self._FillValue) except: Spatial.__init__(self, path, _FillValue=-999999) startvar = 'dv' if dlg.GetFilterIndex() == 1: #UnTRIM self.flash_status_message("Opening UnTRIMS file: %s" % path) #Spatial.__init__(self,path,gridvars=untrim_gridvars,griddims=untrim_griddims) UNTRIMSpatial.__init__(self, path) startvar = 'Mesh2_face_depth' # Populate the drop down menus vnames = self.listCoordVars() self.variable_list.SetItems(vnames) # Update the time drop down list if 'time' in self.__dict__: self.timestr = [ datetime.strftime(tt, '%d-%b-%Y %H:%M:%S') for tt in self.time ] else: # Assume that it is a harmonic-type file self.timestr = self.nc.Constituent_Names.split() self.time_list.SetItems(self.timestr) # Draw the depth if startvar in vnames: self.variable = startvar self.loadData() self.create_figure() def on_load_grid(self, event): dlg = wx.DirDialog(self, message="Open SUNTANS grid from folder...", defaultPath=os.getcwd(), style=wx.DD_DEFAULT_STYLE) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() # Initialise the class self.flash_status_message("Opening SUNTANS grid from folder: %s" % path) Grid.__init__(self, path) # Plot the Grid if 'collection' in self.__dict__: self.axes.collections.remove(self.collection) self.axes, self.collection = self.plotmesh(ax=self.axes, edgecolors='y') # redraw the figure self.canvas.draw() def on_load_ptm(self, event): file_choices = "PTM NetCDF (*.nc)|*.nc|PTM Binary (*_bin.out)|*_bin.out|All Files (*.*)|*.*" dlg = wx.FileDialog(self, message="Open PTM file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style=wx.FD_MULTIPLE) if dlg.ShowModal() == wx.ID_OK: self.plot_type = 'particles' path = dlg.GetPath() # Initialise the class if dlg.GetFilterIndex() == 0: #SUNTANS self.flash_status_message("Opening PTM netcdf file: %s" % path) self.PTM = PtmNC(path) elif dlg.GetFilterIndex() == 1: #PTM self.flash_status_message("Opening PTM binary file: %s" % path) self.PTM = PtmBin(path) self.Nt = self.PTM.nt # Update the time drop down list self.timestr = [ datetime.strftime(tt, '%d-%b-%Y %H:%M:%S') for tt in self.PTM.time ] self.time_list.SetItems(self.timestr) # Plot the first time step if 'xlims' in self.__dict__: self.PTM.plot(self.PTM.nt-1,ax=self.axes,xlims=self.xlims,\ ylims=self.ylims,color=self.particlecolor,\ fontcolor='w',markersize=self.particlesize) else: self.PTM.plot(self.PTM.nt-1,ax=self.axes,fontcolor='w',\ color=self.particlecolor,markersize=self.particlesize) # redraw the figure self.canvas.draw() def on_show_edges(self, event): sender = event.GetEventObject() self.showedges = sender.GetValue() # Update the figure self.update_figure() def on_clim_check(self, event): sender = event.GetEventObject() if sender.GetValue() == True: self.autoclim = False self.update_figure() else: self.autoclim = True def on_climlow(self, event): self.clim[0] = event.GetString() #self.update_figure() def on_climhigh(self, event): self.clim[1] = event.GetString() #self.update_figure() def on_select_cmap(self, event): self.cmap = event.GetString() if USECMOCEAN: self.collection.set_cmap(getattr(cm, self.cmap)) else: self.collection.set_cmap(self.cmap) # Update the figure self.update_figure() def on_save_fig(self, event): """ Save a figure of the current scene to a file """ file_choices = " (*.png)|*.png| (*.pdf)|*.pdf |(*.jpg)|*.jpg |(*.eps)|*eps " filters = ['.png', '.pdf', '.png', '.png'] dlg = wx.FileDialog(self, message="Save figure to file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() ext = filters[dlg.GetFilterIndex()] if ext in path: outfile = path else: outfile = path + ext self.fig.savefig(outfile) def on_save_anim(self, event): """ Save an animation of the current scene to a file """ file_choices = "Quicktime (*.mov)|*.mov| (*.gif)|*.gif| (*.avi)|*.avi |(*.mp4)|*.mp4 " filters = ['.mov', '.gif', '.avi', '.mp4'] dlg = wx.FileDialog(self, message="Output animation file...", defaultDir=os.getcwd(), defaultFile="", wildcard=file_choices, style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT) if dlg.ShowModal() == wx.ID_OK: path = dlg.GetPath() ext = filters[dlg.GetFilterIndex()] if ext in path: outfile = path else: outfile = path + ext self.flash_status_message("Saving figure to file: %s" % outfile) self.flash_status_message("Saving animation to file: %s" % outfile) # Create the animation #self.tstep = range(self.Nt) # Use all time steps for animation #self.animate(cbar=self.cbar,cmap=self.cmap,\ # xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim()) def initanim(): if not self.plot_type == 'particles': return (self.title, self.collection) else: return (self.PTM.title, self.PTM.p_handle) def updateScalar(i): if not self.plot_type == 'particles': self.tstep = [i] self.loadData() self.update_figure() return (self.title, self.collection) elif self.plot_type == 'particles': self.PTM.plot(i,ax=self.axes,\ xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim()) return (self.PTM.title, self.PTM.p_handle) self.anim = animation.FuncAnimation(self.fig, \ updateScalar, init_func = initanim, frames=self.Nt, interval=50, blit=True) if ext == '.gif': self.anim.save(outfile, writer='imagemagick', fps=6) elif ext == '.mp4': print('Saving html5 video...') # Ensures html5 compatibility self.anim.save(outfile,writer='mencoder',fps=6,\ bitrate=3600,extra_args=['-ovc','x264']) # mencoder options #bitrate=3600,extra_args=['-vcodec','libx264']) else: self.anim.save(outfile, writer='mencoder', fps=6, bitrate=3600) # Return the figure back to its status del self.anim self.tstep = self.tindex if not self.plot_type == 'particles': self.loadData() self.update_figure() # Bring up a dialog box dlg2 = wx.MessageDialog(self, 'Animation complete.', "Done", wx.OK) dlg2.ShowModal() dlg2.Destroy() def on_exit(self, event): self.Destroy() def on_about(self, event): msg = """ SUNTANS NetCDF visualization tool *Author: Matt Rayson *Institution: Stanford University *Created: October 2013 """ dlg = wx.MessageDialog(self, msg, "About", wx.OK) dlg.ShowModal() dlg.Destroy() def on_count_cells(self, eveny): msg = "Total 3-D grid cells = %d" % (self.count_cells()) dlg = wx.MessageDialog(self, msg, "No. cells", wx.OK) dlg.ShowModal() dlg.Destroy() def on_overlay_bathy(self, event): # Plot depth contours print('Plotting contours...') self.contourf(z=self.dv, clevs=self.depthlevs,\ ax=self.axes,\ filled=False, colors='0.5', linewidths=0.5, zorder=1e6) print('Done') def on_plot_gridstat(self, event): """ Plot the grid size histogram in a new figure """ matplotlib.pyplot.figure() self.plothist() matplotlib.pyplot.show() def create_status_bar(self): self.statusbar = self.CreateStatusBar() def flash_status_message(self, msg, flash_len_ms=1500): self.statusbar.SetStatusText(msg) self.timeroff = wx.Timer(self) self.Bind(wx.EVT_TIMER, self.on_flash_status_off, self.timeroff) self.timeroff.Start(flash_len_ms, oneShot=True) def on_flash_status_off(self, event): self.statusbar.SetStatusText('')
s=s, lw=0, label='True Position') plt.scatter(pos[:, 0], pos[:, 1], color='turquoise', s=s, lw=0, label='MDS') plt.scatter(npos[:, 0], npos[:, 1], color='darkorange', s=s, lw=0, label='NMDS') plt.legend(scatterpoints=1, loc='best', shadow=False) similarities = similarities.max() / (similarities + EPSILON) * 100 np.fill_diagonal(similarities, 0) # Plot the edges start_idx, end_idx = np.where(pos) # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[X_true[i, :], X_true[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = np.abs(similarities) lc = LineCollection(segments, zorder=0, cmap=plt.cm.Blues, norm=plt.Normalize(0, values.max())) lc.set_array(similarities.flatten()) lc.set_linewidths(np.full(len(segments), 0.5)) ax.add_collection(lc) plt.show()
def plot_market_structure(names, labels, embedding, partial_correlations): import matplotlib.pyplot as plt from matplotlib.collections import LineCollection # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) try: lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(8 * values) except ValueError: print "Warning: skip line normalization" lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r) lc.set_linewidths(1) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, color='black', bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(labels.max())), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show() plt.close() del plt, LineCollection
npos = clf.fit_transform(pos) fig = plt.figure(1) ax = plt.axes([0., 0., 1., 1.]) plt.scatter(X_true[:, 0], X_true[:, 1], c='r', s=20) plt.scatter(pos[:, 0] + 0.2, pos[:, 1] + 0.2, s=20, c='g') plt.scatter(npos[:, 0] - 0.2, npos[:, 1] - 0.2, s=20, c='b') plt.legend(('True position', 'MDS', 'NMDS'), loc='best') similarities = similarities.max() / similarities * 100 similarities[np.isinf(similarities)] = 0 # Plot the edges start_idx, end_idx = np.where(pos) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[pos[i, :], pos[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = np.abs(similarities) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, values.max())) lc.set_array(similarities.flatten()) lc.set_linewidths(0.5 * np.ones(len(segments))) ax.add_collection(lc) plt.show()
def _visualize(self, names, close_prices, open_prices): # The daily variations of the quotes are what carry most information variation = close_prices - open_prices # NaN值赋值为0,下面在调用GraphLassoCV的时候会报一些除0的RuntimeWarning,但是可以通过 variation[np.isnan(variation)] = 0 # ############################################################################# # Learn a graphical structure from the correlations edge_model = covariance.GraphicalLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) # ############################################################################# # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) # ############################################################################# # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T # ############################################################################# # Visualization # 支持中文 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 plt.figure(1, facecolor='w', figsize=(15, 12)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.nipy_spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.nipy_spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show()
def plot_mds(rdm, level=None): '''function to visualize RDM via multidimensional scaling''' # big kudos to Jona Sassenhagen for doing an amazing job # adding condition names and colors to the mds plot # import modules and functions import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn import manifold from sklearn.decomposition import PCA from matplotlib.collections import LineCollection ## computation/transformation section # read in the rdm in .csv format, creating a data frame if isinstance(rdm, str) is True: df = pd.read_csv(rdm) if 'Unnamed: 0' in rdm: del rdm['Unnamed: 0'] else: df=rdm df.index = df.columns # set data frame index based on columns if level == '2nd': df= df.mask(df.values > -1.05, 1 - df.values) # set seed for mds seed = 0 # create mds object mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed", n_jobs=1) # apply mds to data frame rdm_mds = mds.fit(df.values).embedding_ # create new data frame from mds df_mds = pd.DataFrame(rdm_mds, index=df.index, columns=["dim1", "dim2"]) df_mds["cond"] = df_mds.index # create condition column based on index # create pca object clf = PCA(n_components=2) # set rdm data frame based on data frame values rdm = pd.DataFrame(df.values) # scale data rdm = rdm.max() / rdm * 100 rdm[np.isinf(rdm)] = 0 # convert rdm data frame to array rdm = rdm.to_numpy() # apply pca to mds rdm_mds_pca = clf.fit_transform(rdm_mds) ## plotting section sns.set_style("white") # set seaborn style to white # create lmplot from the mds data frame g = sns.lmplot("dim1", "dim2", hue="cond", data=df_mds, fit_reg=False, legend=False) ax = g.ax # set axes sns.despine(ax=ax, trim=True, left=True, bottom=True) # despine graphic ax.axes.get_xaxis().set_visible(False) # remove x axis ax.axes.get_yaxis().set_visible(False) # remove y axis ax.grid(False) # remove gird # add condition names to plot for dim1, dim2, name in df_mds.values: ax.text(dim1 * 1.05, dim2 * 1.05, name) # create segments segments = [[rdm_mds[i, :], rdm_mds[j, :]] for i in range(len(rdm_mds_pca)) for j in range(len(rdm_mds_pca))] values = np.abs(rdm) # set line collection lc = LineCollection(segments, zorder=0, cmap=plt.cm.Greys, norm=plt.Normalize(0, values.max())) lc.set_array(rdm.flatten()) lc.set_linewidths(0.5 * np.ones(len(segments))) ax.add_collection(lc) # add line collection to plot plt.tight_layout() plt.show()
def plotManifoldDistances(self, segments: List[Union[MessageSegment, TypedSegment, TypedTemplate, Template, RawMessage, Any]], distances: numpy.ndarray, labels: numpy.ndarray, templates: List = None, plotEdges=False, countMarkers=False): # noinspection PyUnresolvedReferences """ Plot distances of segments according to (presumably multidimensional) features. This function abstracts from the actual feature by directly taking a precomputed similarity matrix and arranging the segments relative to each other according to their distances using Multidimensional Scaling (MDS). See module `manifold` from package `sklearn`. If segments is a list of `TypedSegment` or `MessageSegment`, this function plots the feature values of each given segment overlaying each other besides the distances; they are colored according to the given labels. >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage >>> from nemere.utils.loader import BaseLoader >>> from nemere.inference.analyzers import Value >>> >>> bytedata = [ ... bytes([1, 2, 3, 4]), ... bytes([ 2, 3, 4]), ... bytes([ 1, 3, 4]), ... bytes([ 2, 4 ]), ... bytes([ 2, 3 ]), ... bytes([20, 30, 37, 50, 69, 2, 30]), ... bytes([ 37, 5, 69 ]), ... bytes([70, 2, 3, 4]), ... bytes([3, 2, 3, 4]) ... ] >>> messages = [RawMessage(bd) for bd in bytedata] >>> specimens = BaseLoader(messages) >>> analyzers = [Value(message) for message in messages] >>> segments = [TypedSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers] >>> for seg in segments[:4]: ... seg.fieldtype = "ft1" >>> for seg in segments[4:6]: ... seg.fieldtype = "ft2" >>> for seg in segments[6:]: ... seg.fieldtype = "ft3" >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments, thresholdFunction=DistanceCalculator.neutralThreshold, thresholdArgs=None) Calculated distances for 37 segment pairs in ... seconds. >>> dp = DistancesPlotter(specimens, "test", False) >>> dp.plotManifoldDistances(segments, dc.distanceMatrix, numpy.array([1,2,3,1,1,0,1,0,2])) >>> dp.writeOrShowFigure() # doctest: +SKIP :param segments: If `segments` is a list of `TypedSegment`s, field types are marked as small markers within the label marker. labels containing "Noise" then are not explicitly marked like the other labeled segments :param distances: The precomputed similarity matrix: symmetric matrix, rows/columns in the order of `segments` :param labels: Labels of strings (or ints or any other printable type) identifying the cluster for each segment :param templates: Templates of clusters to be printed alongside with the feature values. CURRENTLY UNTESTED :param plotEdges: Plot of edges between each pair of segment markers. Caution: Adds n^2 lines which takes very long compared to the scatterplot and quickly becomes a huge load especially when rendering the plot as PDF. :param countMarkers: add text labels with information at positions with multiple markers """ assert isinstance(segments, Sequence) assert isinstance(distances, numpy.ndarray) assert isinstance(labels, numpy.ndarray) assert len(segments) == distances.shape[0] == distances.shape[1] axMDS, axSeg = self._axes # type: plt.Axes, plt.Axes axMDS.set_aspect('equal', adjustable='datalim') # subsample if segment count is larger than maxSamples subret = self.subsample(segments, distances, labels) if subret: originalSegmentCount, segments, distances, labels = subret if self._plotSegmentValues: botlef = (0, -5) else: botlef = (0.1, 0.1) axSeg.text( *botlef, 'Subsampled: {} of {} segments'.format(len(segments), originalSegmentCount)) # without subsampling, existing values need not to be overwritten pos = DistancesPlotter.manifoldPositions(distances) # identify unique labels ulab = DistancesPlotter.uniqueLabels(labels, segments) if templates is None: templates = ulab # prepare color space cIdx = [ int(round(each)) for each in numpy.linspace(2, self.cm.N - 2, len(ulab)) ] # CLUSTERS (large bobbles): iterate unique labels and scatter plot each of these clusters for c, (l, t) in enumerate(zip(ulab, templates)): # type: int, (Any, Template) lColor = self.cm(cIdx[c]) class_member_mask = (labels == l) try: x = list(compress(pos[:, 0].tolist(), class_member_mask)) y = list(compress(pos[:, 1].tolist(), class_member_mask)) # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row." # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html: axMDS.scatter( x, y, c=colors.to_rgba_array(lColor), alpha=.6, s=self.labsize, # s=s-(c*s/len(ulab)), # lw=0, label=str(l)) except IndexError as e: print(pos) print(distances) print(segments) raise e if isinstance(t, Template) and self._plotSegmentValues: axSeg.plot(t.values, c=lColor, linewidth=4) # GROUND TRUTH (small bobbles): include field type labels for TypedSegments input if any( isinstance(seg, (TypedSegment, TypedTemplate, RawMessage)) for seg in segments): if any( isinstance(seg, (TypedSegment, TypedTemplate)) for seg in segments): ftypes = numpy.array([ seg.fieldtype if isinstance( seg, (TypedSegment, TypedTemplate)) else "[unknown]" for seg in segments ]) # PP elif any( isinstance(seg, RawMessage) and seg.messageType != 'Raw' for seg in segments): ftypes = numpy.array([ msg.messageType if isinstance(msg, RawMessage) and msg.messageType != 'Raw' else "[unknown]" for msg in segments ]) # PP else: ftypes = set() # identify unique types utyp = sorted(set(ftypes)) # prepare color space cIdx = [ int(round(each)) for each in numpy.linspace(30, self.fcm.N - 30, len(utyp)) ] # iterate unique types and scatter plot each of these groups for n, ft in enumerate(utyp): # PP fColor = self.fcm(cIdx[n]) type_member_mask = (ftypes == ft) x = list(compress(pos[:, 0].tolist(), type_member_mask)) y = list(compress(pos[:, 1].tolist(), type_member_mask)) # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row." # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html: axMDS.scatter(x, y, c=colors.to_rgba_array(fColor), alpha=1, s=self.typsize, lw=0, label=str(ft)) if isinstance( segments[0], (TypedSegment, TypedTemplate)) and self._plotSegmentValues: for seg in compress(segments, type_member_mask): axSeg.plot(seg.values, c=fColor, alpha=0.05) elif isinstance(segments[0], MessageSegment) and self._plotSegmentValues: for c, l in enumerate(ulab): lColor = self.cm(cIdx[c]) class_member_mask = (labels == l) for seg in compress(segments, class_member_mask): axSeg.plot(seg.values, c=lColor, alpha=0.1) elif self._plotSegmentValues: axSeg.text(.5, .5, 'nothing to plot\n(message alignment)', horizontalalignment='center') # place the label/type legend in the (otherwise empty) axSeg subfigure if isinstance(segments[0], RawMessage) or not self._plotSegmentValues: legendHandles, legendLabels = axMDS.get_legend_handles_labels() # axMDS.legend(bbox_to_anchor=(1.04,1), scatterpoints=1, shadow=False) axSeg.legend(handles=legendHandles, labels=legendLabels, loc='best', scatterpoints=1, shadow=False) axSeg.patch.set_alpha(0.0) axSeg.axis('off') else: # place the label/type legend at the best position axMDS.legend(scatterpoints=1, loc='best', shadow=False) if plotEdges: # plotting of edges takes a long time compared to the scatterplot (and especially when rendering the PDF) from matplotlib.collections import LineCollection # Plot the edges lines = [[pos[i, :], pos[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = numpy.abs(distances) # noinspection PyUnresolvedReferences lc = LineCollection(lines, zorder=0, cmap=plt.cm.Blues, norm=plt.Normalize(0, values.max())) # lc.set_alpha(.1) lc.set_array(distances.flatten()) lc.set_linewidths(0.5 * numpy.ones(len(segments))) axMDS.add_collection(lc) if countMarkers: # Count markers at identical positions and plot text with information about the markers at this position from collections import Counter import math if isinstance(segments[0], (TypedSegment, TypedTemplate)): # TODO for TypedTemplates we rather need to count the number of base segments, so for now this is not accurate coordCounter = Counter([ (posX, posY, seg.fieldtype) for seg, lab, posX, posY in zip( segments, labels, pos[:, 0].tolist(), pos[:, 1].tolist()) ]) else: # TODO for Templates we rather need to count the number of base segments, so for now this is not accurate coordCounter = Counter([ (posX, posY, lab) for lab, posX, posY in zip( labels, pos[:, 0].tolist(), pos[:, 1].tolist()) ]) for (posX, posY, lab), cnt in coordCounter.items(): if cnt > 1: theta = hash(str(lab)) % 360 r = 1 posXr = posX + r * math.cos(theta) posYr = posY + r * math.sin(theta) axMDS.text(posXr, posYr, "{}: {}".format(lab, cnt), withdash=True) if self._fig.canvas.toolbar is not None: self._fig.canvas.toolbar.update()
plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(0.15 / values) ax.add_collection(lc) if animation: sca = ax.scatter([], [], cmap=plt.cm.spectral) texts = [] def init(): sca.set_offsets([]) return sca, tuple(texts) def animate(i): x = embedding[0][labels <= i] y = embedding[1][labels <= i] s = 100 * d[labels <= i]**2 c = labels[labels <= i]
npos = clf.fit_transform(npos) fig = plt.figure(1) ax = plt.axes([0., 0., 1., 1.]) s = 100 plt.scatter(X_true[:, 0], X_true[:, 1], color='navy', s=s, lw=0, label='True Position') plt.scatter(pos[:, 0], pos[:, 1], color='turquoise', s=s, lw=0, label='MDS') plt.scatter(npos[:, 0], npos[:, 1], color='darkorange', s=s, lw=0, label='NMDS') plt.legend(scatterpoints=1, loc='best', shadow=False) similarities = similarities.max() / similarities * 100 similarities[np.isinf(similarities)] = 0 # Plot the edges start_idx, end_idx = np.where(pos) # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[X_true[i, :], X_true[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = np.abs(similarities) lc = LineCollection(segments, zorder=0, cmap=plt.cm.Blues, norm=plt.Normalize(0, values.max())) lc.set_array(similarities.flatten()) lc.set_linewidths(np.full(len(segments), 0.5)) ax.add_collection(lc) plt.show()
x = np.arange(n) rs = check_random_state(0) y = rs.randint(-50, 50, size=(n, )) + 50. * np.log1p(np.arange(n)) # ############################################################################# # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() y_ = ir.fit_transform(x, y) lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression # ############################################################################# # Plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(np.full(n, 0.5)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'b.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.show()
def plot_environment(self): fig, ax = plt.subplots() nodes = {} for node_id in self.otm4rl.otmwrapper.otm.scenario().get_node_ids(): node_info = self.otm4rl.otmwrapper.otm.scenario().get_node_with_id( node_id) nodes[node_id] = {'x': node_info.getX(), 'y': node_info.getY()} lines = [] norms = [] minX = float('Inf') maxX = -float('Inf') minY = float('Inf') maxY = -float('Inf') state = self.otm4rl.get_queues() for link_id in self.otm4rl.otmwrapper.otm.scenario().get_link_ids(): link_info = self.otm4rl.otmwrapper.otm.scenario().get_link_with_id( link_id) start_point = nodes[link_info.getStart_node_id()] end_point = nodes[link_info.getEnd_node_id()] x0 = start_point['x'] y0 = start_point['y'] x1 = end_point['x'] y1 = end_point['y'] if x1 - x0 > 0: y0 -= 150 y1 -= 150 if x1 - x0 < 0: y0 += 150 y1 += 150 if y1 - y0 > 0: x0 += 100 x1 += 100 if y1 - y0 < 0: x0 -= 100 x1 -= 100 p0 = (x0, y0) p1 = (x1, y1) lines.append([p0, p1]) norms.append(state[link_id]["waiting"] / self.max_queues[link_id]) minX = min([minX, p0[0], p1[0]]) maxX = max([maxX, p0[0], p1[0]]) minY = min([minY, p0[1], p1[1]]) maxY = max([maxY, p0[1], p1[1]]) cmap = plt.get_cmap('hot') all_colors = [cmap(z) for z in norms] lc = LineCollection(lines, colors=all_colors) lc.set_linewidths(15) ax.add_collection(lc) dY = maxY - minY dX = maxX - minX if (dY > dX): ax.set_ylim((minY, maxY)) c = (maxX + minX) / 2 ax.set_xlim((c - dY / 2, c + dY / 2)) else: ax.set_xlim((minX, maxX)) c = (maxY + minY) / 2 ax.set_ylim((c - dX / 2, c + dX / 2)) return plt
x = np.arange(n) rs = check_random_state(0) y = rs.randint(-50, 50, size=(n,)) + 50. * np.log1p(np.arange(n)) # ############################################################################# # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() y_ = ir.fit_transform(x, y) lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression # ############################################################################# # Plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(np.full(n, 0.5)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'g.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.show()
def plot(): # Input path of the file while True: try: path = raw_input('Enter the name (xxx.csv) of your csv file (in the same folder): ') dataframe = pd.read_csv(path, delimiter=",", skipinitialspace=True) break except IOError: print 'Cannot find the file. Try again! (You must have this file in the same folder.' # Preprocess the data dataframe = dataframe.convert_objects(convert_numeric=True) dataframe = dataframe.fillna(dataframe.mean()) # Vectorize nominal features d = dataframe.T.to_dict().values() v = DictVectorizer(sparse=False) X = v.fit_transform(d) # Eliminate nan values X = np.nan_to_num(X) # The GraphLasso estimator uses an l1 penalty to enforce sparsity on the precision matrix: # the higher its alpha parameter, the more sparse the precision matrix. # The corresponding GraphLassoCV object uses cross-validation to automatically set the alpha parameter. print 'computing edge model... (large data may take a significant time)' edge_model = covariance.GraphLassoCV() edge_model.fit(X.T) # Affinity Propagation clustering print 'computing clustering...' clustering = cluster.AffinityPropagation() clustering.fit(edge_model.covariance_) labels = clustering.labels_ # Print data based on cluster dataframe.insert(loc=0, column="clustering group", value=labels, allow_duplicates=False) dataframe.sort(columns="clustering group", inplace=True) print '***************Data Cluster**************' pd.set_option('display.max_rows', len(dataframe)) print(dataframe) print '*******************End*******************' # Make sure the same scale is used over all features. # Because manifold learning methods are based on a nearest-neighbor search, # the algorithm may perform poorly otherwise. scaler = StandardScaler() X = scaler.fit_transform(X) node_position_model = manifold.SpectralEmbedding(n_components=2) embedding = node_position_model.fit_transform(X).T # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations # Compute partial correlation according to precision matrix partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding d_new = Normalizer().fit_transform(d) plt.scatter(embedding[0], embedding[1], s= 100 * d_new ** 0.5, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(5 * values) ax.add_collection(lc) # Show the result graph plt.show()
from matplotlib.collections import LineCollection from sklearn.linear_model import LinearRegression from sklearn.isotonic import IsotonicRegression from sklearn.utils import check_random_state n = 100 x = np.arange(n) rs = check_random_state(0) y = rs.randint(-50, 50, size=(n,)) + 50. * np.log(1 + np.arange(n)) # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() y_ = ir.fit_transform(x, y) lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression # plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(0.5 * np.ones(n)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'g.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.show()
x = X_train y = y_train y_ = ir.fit_transform(x, y) ir_model = ir.fit(x, y) lr = LinearRegression() lr_model = lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression ############################################################################### # plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(len(x))] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(0.5 * np.ones(len(x))) fig = plt.figure() plt.plot(x, y, 'r.', markersize=6) plt.plot(x, y_, 'g.-', markersize=6) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') #plt.show() lr_predict = lr.predict(X_test[:, np.newaxis]) ir_predict = ir_model.predict(X_test) #print np.isinf(ir_predict) #print np.isnan(ir_predict)
pos = clf.fit_transform(pos) npos = clf.fit_transform(npos) fig = plt.figure(1) ax = plt.axes([0., 0., 1., 1.]) plt.scatter(data[:, 0], data[:, 1], c='r', s=52) plt.scatter(pos[:, 0], pos[:, 1], s=52, c='g') plt.scatter(npos[:, 0], npos[:, 1], s=52, c='b') #plt.legend(('True position', 'MDS', 'NMDS'), loc='best') similarities = similarities.max() / similarities * 100 similarities[np.isinf(similarities)] = 0 # Plot the edges start_idx, end_idx = np.where(pos) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[data[i, :], data[j, :]] for i in range(len(pos)) for j in range(len(pos))] values = np.abs(similarities) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, values.max())) lc.set_array(similarities.flatten()) lc.set_linewidths(0.5 * np.ones(len(segments))) ax.add_collection(lc) plt.show()
def contingencyTableChi2andPOISpaceStructure(dataBunch, pred, class_mapping, dbLabel): '''独立性检验''' mergingData = np.hstack( (pred.reshape(-1, 1), dataBunch.target.reshape(-1, 1))) #水平组合聚类预测值和行业分类类标 targetStack = [] for i in range(len( np.array(class_mapping)[..., 0])): #按行业类标重新组织数据,每行对应行业类标所有的聚类预测值 targetStack.append(mergingData[mergingData[..., -1] == int( np.array(class_mapping)[..., 0][i])]) clusterFrequency = {} for p in targetStack: #按行业类标计算每类所有点所属聚类簇的数量(频数) clusterFrequency[(p[..., -1][0])] = [(j, np.sum(p[..., 0] == int(j)) + 1) for j in dbLabel if j != -1] #独立性检验值不能为零,因此将所有值+1 # print(clusterFrequency) CTableTarget = list(clusterFrequency.keys()) CTableIdx = np.array(list(clusterFrequency.values())) CTable = CTableIdx[..., 1] #建立用于独立性分析的列联表,横向为行业类所属聚类簇频数,纵向为行业类标 totalIndependence = chi2_contingency(CTable) #列联表的独立性检验 g, p, dof, expctd = totalIndependence #提取卡方值g,p值,自由度dof和与元数据数组同维度的对应理论值。此次实验计算p=0.00120633349692,小于0.05,因此行业分类与聚类簇相关。 print(g, p, dof) '''poi的空间分布结构。参考官方案例Visualizing the stock market structure:http://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html#sphx-glr-auto-examples-applications-plot-stock-market-py''' #A-协方差逆矩阵(精度矩阵)。The matrix inverse of the covariance matrix, often called the precision matrix, is proportional to the partial correlation matrix. It gives the partial independence relationship. In other words, if two features are independent conditionally on the others, the corresponding coefficient in the precision matrix will be zero。来自官网说明摘录 edge_model = covariance.GraphLassoCV( ) #稀疏逆协方差估计器GraphLassoCV(),翻译有待数学专业确认。官网解释:http://scikit-learn.org/stable/modules/covariance.html#sparse-inverse-covariance X = CTable.copy().T print(X, X.shape) X = X / X.std(axis=0) #标准化。可以自行实验小规模数组,查看变化,分析结果,获取结论。 print(X) edge_model.fit(X) print("******************************************************************") print(edge_model.covariance_.shape) #B-affinity_propagation(AP)聚类算法是基于数据点间"信息传递"的一种聚类算法,不用预先给出cluster簇数。聚类协方差矩阵 _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() print(labels) #C-Manifold中的降维方法可以能够处理数据中的非线性结构信息。具体可以查看官网http://scikit-learn.org/stable/modules/manifold.html#locally-linear-embedding。降维的目的是降到2维,作为xy坐标值,在二维图表中绘制为点。 node_position_model = manifold.LocallyLinearEmbedding(n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T print(embedding.shape) '''图表可视化poi空间分布结构''' plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes( [0., 0., 1., 1.] ) #可以参考官方示例程序 http://matplotlib.org/examples/pylab_examples/axis_equal_demo.html plt.axis('off') # Display a graph of the partial correlations/偏相关分析:在多要素所构成的系统中,当研究某一个要素对另一个要素的影响或相关程度时,把其他要素的影响视作常数(保持不变),即暂时不考虑其他要素影响,单独研究两个要素之间的相互关系的密切程度,所得数值结果为偏相关系数。在多元相关分析中,简单相关系数可能不能够真实的反映出变量X和Y之间的相关性,因为变量之间的关系很复杂,它们可能受到不止一个变量的影响。这个时候偏相关系数是一个更好的选择。 partial_correlations = edge_model.precision_.copy() print(partial_correlations.shape) d = 1 / np.sqrt( np.diag(partial_correlations)) #umpy.diag()返回一个矩阵的对角线元素,计算该元素平方根的倒数。 partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02 ) #np.triu()返回矩阵的上三角矩阵。 # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=300 * d**2, c=labels, cmap=plt.cm.spectral) #簇类标用于定义节点的颜色,降维后数据作为点坐标 # Plot the edges start_idx, end_idx = np.where( non_zero ) #numpy.where(condition[, x, y])这里x,y是可选参数,condition是条件,这三个输入参数都是array_like的形式;而且三者的维度相同。当conditon的某个位置的为true时,输出x的对应位置的元素,否则选择y对应位置的元素;如果只有参数condition,则函数返回为true的元素的坐标位置信息; segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) cm = plt.cm.get_cmap( 'OrRd' ) #具体的`matplotlib.colors.Colormap'实例可以查看matplotlib官网 http://matplotlib.org/users/colormaps.html,替换不同色系 lc = LineCollection(segments, zorder=0, cmap=cm, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) #定义边缘的强度。 ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to position the labels to avoid overlap with other labels,添加行业分类标签,并避免标签重叠。 names = [i[-1] for i in class_mapping] for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(), ) #numpy.ptp()极差函数返回沿轴的值的范围(最大值-最小值)。 plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) plt.show() return CTable
def dailyStockClusters(): import datetime import os import numpy as np import pandas.io.data as web from pandas import DataFrame from matplotlib import pylab as pl from matplotlib import finance from matplotlib.collections import LineCollection from sklearn import cluster, covariance, manifold ######################################################################## ### ### This example employs several unsupervised learning techniques to ### extract the stock market structure from variations in historical quotes. ### The quantity that we use is the daily variation in quote price: ### quotes that are linked tend to co-fluctuate during a day. ### ### stocks used are all Nasdaq 100 stocks that have one year of history ### from the current date. ### ### adopted from example at: ### http://scikit-learn.org/0.14/auto_examples/applications/plot_stock_market.html ### ######################################################################## # Retrieve the data from Internet # Choose a time period reasonnably calm (not too long ago so that we get # high-tech firms, and before the 2008 crash) today = datetime.datetime.now() d1 = datetime.datetime(today.year-1, today.month, today.day) d2 = datetime.datetime(today.year, today.month, today.day) # input symbols and company names from text file companyName_file = os.path.join( os.getcwd(), "symbols", "companyNames.txt" ) with open( companyName_file, "r" ) as f: companyNames = f.read() print "\n\n\n" companyNames = companyNames.split("\n") ii = companyNames.index("") del companyNames[ii] companySymbolList = [] companyNameList = [] symbol_dict = {} for iname,name in enumerate(companyNames): name = name.replace("amp;", "") testsymbol, testcompanyName = name.split(";") companySymbolList.append(format(testsymbol,'s')) companyNameList.append(format(testcompanyName,'s')) if testsymbol != "CASH": symbol_dict[ testsymbol ] = format(testcompanyName,'s') print " ... symbol_dict = ", symbol_dict symbols = companySymbolList[:] names = companyNameList[:] all_data = {} for ticker in symbols: try: all_data[ticker] = web.get_data_yahoo(ticker, d1, d2) qclose = DataFrame({tic: data['Close'] for tic, data in all_data.iteritems()}) qopen = DataFrame({tic: data['Open'] for tic, data in all_data.iteritems()}) except: print "Cant find ", ticker symbols_edit = [] names_edit = [] for i, ticker in enumerate( symbols ): if True in np.isnan(np.array(qclose[ticker])).tolist(): print ticker, " nans found, ticker removed" del qclose[ticker] del qopen[ticker] else: symbols_edit.append(ticker) names_edit.append( names[i] ) # The daily variations of the quotes are what carry most information variation = qclose - qopen variation[ np.isnan(variation) ] = 0. ############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery X = variation.copy() #X = variation.copy().T X /= X.std(axis=0) edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print "Cluster "+str(i)+":" for j in range(len(labels)): if labels[j] == i: print " ... "+names_edit[j] #print('Cluster %i: %s' % ((i + 1), ', '.join(names_edit[labels == i]))) for i in range(n_labels + 1): print "Cluster "+str(i)+":" for j in range(len(labels)): if labels[j] == i: print " ... "+names_edit[j] figure7path = 'Clustered_companyNames.png' # re-set to name without full path figure7_htmlText = "\n<br><h3>Daily stock clustering analyis. Based on one year performance correlations.</h3>\n" figure7_htmlText = figure7_htmlText + "\nClustering based on daily variation in Nasdaq 100 quotes.\n" figure7_htmlText = figure7_htmlText + '''<br><img src="'''+figure7path+'''" alt="PyTAAA by DonaldPG" width="850" height="500"><br>\n''' ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T ############################################################################### # Visualization pl.figure(1, facecolor='w', figsize=(10, 8)) pl.clf() ax = pl.axes([0., 0., 1., 1.]) pl.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding pl.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=pl.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=pl.cm.hot_r, norm=pl.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 pl.text(x, y, name, size=10, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=pl.cm.spectral(label / float(n_labels)), alpha=.6)) pl.xlim(embedding[0].min() - .15 * embedding[0].ptp(), embedding[0].max() + .10 * embedding[0].ptp(),) pl.ylim(embedding[1].min() - .03 * embedding[1].ptp(), embedding[1].max() + .03 * embedding[1].ptp()) pl.savefig( os.path.join( os.getcwd(), "pyTAAA_web", "Clustered_companyNames.png" ), format='png' ) return figure7_htmlText
def StockMarketOLD(): ############################################################################### # Retrieve the data from Internet # Choose a time period reasonnably calm (not too long ago so that we get # high-tech firms, and before the 2008 crash) d1 = datetime.datetime(2005, 1, 1) d2 = datetime.datetime(2009, 12, 31) # kraft symbol has now changed from KFT to MDLZ in yahoo symbol_dict = { 'TOT': 'Total', 'XOM': 'Exxon', 'CVX': 'Chevron', 'COP': 'ConocoPhillips', 'VLO': 'Valero Energy', 'MSFT': 'Microsoft', 'IBM': 'IBM', 'TWX': 'Time Warner', 'CMCSA': 'Comcast', #'CVC': 'Cablevision', #'YHOO': 'Yahoo', #'DELL': 'Dell', 'HPQ': 'HP', 'AMZN': 'Amazon', 'TM': 'Toyota', 'CAJ': 'Canon', 'MTU': 'Mitsubishi', 'SNE': 'Sony', #'F': 'Ford', 'HMC': 'Honda', #'NAV': 'Navistar', 'NOC': 'Northrop Grumman', 'BA': 'Boeing', 'KO': 'Coca Cola', 'MMM': '3M', 'MCD': 'Mc Donalds', #'PEP': 'Pepsi', 'MDLZ': 'Kraft Foods', 'K': 'Kellogg', 'UN': 'Unilever', 'MAR': 'Marriott', 'PG': 'Procter Gamble', 'CL': 'Colgate-Palmolive', 'GE': 'General Electrics', 'WFC': 'Wells Fargo', 'JPM': 'JPMorgan Chase', #'AIG': 'AIG', 'AXP': 'American Express', 'BAC': 'Bank of America', 'GS': 'Goldman Sachs', 'AAPL': 'Apple', 'SAP': 'SAP', 'CSCO': 'Cisco', 'TXN': 'Texas Instruments', 'XRX': 'Xerox', #'LMT': 'Lookheed Martin', 'WMT': 'Wal-Mart', 'WBA': 'Walgreen', 'HD': 'Home Depot', 'GSK': 'GlaxoSmithKline', 'PFE': 'Pfizer', 'SNY': 'Sanofi-Aventis', 'NVS': 'Novartis', 'KMB': 'Kimberly-Clark', 'R': 'Ryder', 'GD': 'General Dynamics', 'RTN': 'Raytheon', 'CVS': 'CVS', 'CAT': 'Caterpillar', 'DD': 'DuPont de Nemours', #'GM': 'General Motors', #'GOOG' : 'Google', 'ORCL': 'Oracle', 'NVO': 'Novo Nordisk', 'LLY': 'Eli Lilly and Company', #'FB':'Facebook', 'MRK': 'Merck Co', } ''' symbol_dict = {'Danske.CO':'Danske Bank', 'Maersk-B.CO':'Maersk', 'DSV.CO':'DSV', 'FLS.CO':'FLS', 'Gen.CO':'Genmab', 'TDC.CO':'TDC', 'CARL-B.CO':'Carlsberg', 'CHR.CO':'Chr Hansen', 'COLO-B.CO':'Coloplast', 'GN.CO':'GN Store Nord', 'NDA-DKK.co':'Nordea', 'Novo-B.co':'Novo Nordisk', 'NZYM-B.CO':'Novozymes', 'PNDORA.CO':'Pandora', 'Tryg.co':'Tryg', 'VWS.CO':'Vestas', 'WDH.CO':'William Demant', 'G4s.co':'G4S', 'JYSK.CO':'Jyske Bank', 'KBHL.CO':'Kobenhavns Lufthavne', 'RBREW.CO':'Royal Unibrew', 'ROCK-B.CO':'Rockwool', 'SYDB.CO':'Sydbank', 'TOP.CO':'Topdanmark', #'ALMB.CO':'Alm Brand', 'AURI-B.CO':'Auriga', 'Bava.CO':'Bavarian Nordic', 'BO.CO':'Bang Olufsen', 'DFDS.CO':'DFDS', 'DNORD.CO':'DS Norden', 'GES.CO':'Greentech', 'IC.CO':'IC Group', 'JDAN.CO':'Jeudan', #'JUTBK.CO':'Jutlander Bank', #'MATAS.CO':'Matas', 'NKT.CO':'NKT', #'NNIT.CO':'NNIT', 'NORDJB.CO':'Nordjyske Bank', #'ONXEO.CO':'Onxeo', #'OSSR.CO':'Ossur', 'PAAL-B.CO':'Per Aarslef', 'RILBA.CO':'Ringkobing Landbobank', 'SAS-DKK.CO':'SAS', 'SCHO.CO':'Schouw Co.', 'SIM.CO':'SimCorp', 'Solar-B.co':'Solar B', 'SPNO.CO':'Spar Nord', 'TIV.CO':'Tivoli', 'UIE.CO':'UIE', 'VELO.CO':'Veloxis', 'ZEAL.CO':'Zealand Pharma' } ''' symbols, names = np.array(list(symbol_dict.items())).T for symbol in symbols: print symbol if len( pd.DataFrame( np.array([[ q[5] for q in quotes_historical_yahoo( symbol, d1, d2, True, False) ]]).T)) != 1259: print symbol, len( pd.DataFrame( np.array([[ q[5] for q in quotes_historical_yahoo( symbol, d1, d2, True, False) ]]).T)) open = pd.DataFrame( np.array([[ q[5] for q in quotes_historical_yahoo(symbol, d1, d2, True, False) ] for symbol in symbols]).T) close = pd.DataFrame( np.array([[ q[6] for q in quotes_historical_yahoo(symbol, d1, d2, True, False) ] for symbol in symbols]).T) # The daily variations of the quotes are what carry most information variation = np.array(close - open) ############################################################################### # Learn a graphical structure from the correlations #edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance # is more efficient for structure recovery df = pd.read_csv('data/TData9313_final5.csv', index_col=0) X = variation.copy() pd.DataFrame(np.round(np.cov(X.T), 3), columns=symbols, index=symbols).to_latex('covariancetable.tex') print np.max(np.round(np.cov(X.T), 3)) X /= X.std(axis=0) covariance_, precision_ = graphical_lasso(X, 0.3) print pd.DataFrame(precision_) #edge_model.fit(X) ############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i]))) ############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding(n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T ############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(20, 16)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') plt.annotate('From %s to %s' % (d1.strftime('%Y-%m-%d'), d2.strftime('%Y-%m-%d')), xy=(0.11, -0.37), size=25) print X.shape for i in range(n_labels + 1): plt.annotate('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])), xy=(-0.43, 0.02 - i * 0.02), size=18) pass # Display a graph of the partial correlations #partial_correlations = edge_model.precision_.copy() partial_correlations = precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis] non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02) # Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=200 * d**2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) #a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.get_cmap('Greys'), norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left' x = x + .002 else: horizontalalignment = 'right' x = x - .002 if this_dy > 0: verticalalignment = 'bottom' y = y + .002 else: verticalalignment = 'top' y = y - .002 plt.text(x, y, name, size=22, horizontalalignment=horizontalalignment, verticalalignment=verticalalignment, bbox=dict(facecolor='w', edgecolor=plt.cm.spectral(label / float(n_labels)), alpha=.6)) plt.xlim( embedding[0].min() - .25 * embedding[0].ptp(), embedding[0].max() + .20 * embedding[0].ptp(), ) plt.ylim(embedding[1].min() - .20 * embedding[1].ptp(), embedding[1].max() + .20 * embedding[1].ptp()) plt.savefig('Graphs/StockCluster.pdf', bbox_inches='tight') plt.savefig('Graphs/StockCluster.svg', bbox_inches='tight') plt.show()
# Plot the nodes using the coordinates of our embedding plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels, cmap=plt.cm.spectral) # Plot the edges start_idx, end_idx = np.where(non_zero) # a sequence of (*line0*, *line1*, *line2*), where:: # linen = (x0, y0), (x1, y1), ... (xm, ym) segments = [[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx)] values = np.abs(partial_correlations[non_zero]) lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max())) lc.set_array(values) lc.set_linewidths(15 * values) ax.add_collection(lc) # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate( zip(names, labels, embedding.T)): dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] dy[index] = 1 this_dx = dx[np.argmin(np.abs(dy))] this_dy = dy[np.argmin(np.abs(dx))] if this_dx > 0: horizontalalignment = 'left'
class SpiroGraph(object): ''' Spirograph drawer with matplotlib slider widgets to change parameters. Parameters of line are: R: The radius of the big circle r: The radius of the small circle which rolls along the inside of the bigger circle p: distance from centre of smaller circle to point in the circle where the pen hole is. tmax: the angle through which the smaller circle is rotated to draw the spirograph tstep: how often matplotlib plots a point a, b, c: parameters of the linewidth equation. ''' # kwargs for each of the matplotlib sliders slider_kwargs = ( {'label': 't_max', 'valmin': np.pi, 'valmax': 200 * np.pi, 'valinit': tmax0, 'valfmt': PiString()}, {'label': 't_step', 'valmin': 0.01, 'valmax': 10, 'valinit': tstep0}, {'label': 'R', 'valmin': 1, 'valmax': 200, 'valinit': R0}, {'label': 'r', 'valmin': 1, 'valmax': 200, 'valinit': r0}, {'label': 'p', 'valmin': 1, 'valmax': 200, 'valinit': p0}, {'label': 'colour', 'valmin': 0, 'valmax': 1, 'valinit': 1}, {'label': 'width_a', 'valmin': 0.5, 'valmax': 10, 'valinit': 1}, {'label': 'width_b', 'valmin': 0, 'valmax': 10, 'valinit': 0}, {'label': 'width_c', 'valmin': 0, 'valmax': 10, 'valinit': 0.5}) rbutton_kwargs = ( {'labels': ('black', 'white'), 'activecolor': 'white', 'active': 0}, {'labels': ('solid', 'variable'), 'activecolor': 'white', 'active': 0}) def __init__(self, colormap, figsize=(7, 10)): self.colormap_name = colormap self.variable_color = False # Use ScalarMappable to map full colormap to range 0 - 1 self.colormap = ScalarMappable(cmap=colormap) self.colormap.set_clim(0, 1) # set up main axis onto which to draw spirograph self.figsize = figsize plt.rcParams['figure.figsize'] = figsize self.fig, self.mainax = plt.subplots() plt.subplots_adjust(bottom=0.3) title = self.mainax.set_title('Spirograph Drawer!', size=20, color='white') self.text = [title, ] # set up slider axes self.slider_axes = [plt.axes([0.25, x, 0.65, 0.015]) for x in np.arange(0.05, 0.275, 0.025)] # same again for radio buttons self.rbutton_axes = [plt.axes([0.025, x, 0.1, 0.15]) for x in np.arange(0.02, 0.302, 0.15)] # use log scale for tstep slider self.slider_axes[1].set_xscale('log') # turn off frame, ticks and tick labels for all axes for ax in chain(self.slider_axes, self.rbutton_axes, [self.mainax, ]): ax.axis('off') # use axes and kwargs to create list of sliders/rbuttons self.sliders = [Slider(ax, **kwargs) for ax, kwargs in zip(self.slider_axes, self.slider_kwargs)] self.rbuttons = [RadioButtons(ax, **kwargs) for ax, kwargs in zip(self.rbutton_axes, self.rbutton_kwargs)] self.update_figcolors() # set up initial line self.t = np.arange(0, tmax0, tstep0) x, y = spiro_linefunc(self.t, R0, r0, p0) self.linecollection = LineCollection( segments(x, y), linewidths=spiro_linewidths(self.t, a0, b0, c0), color=self.colormap.to_rgba(col0)) self.mainax.add_collection(self.linecollection) # creates the plot and connects sliders to various update functions self.run() def update_figcolors(self, bgcolor='black'): ''' function run by background color radiobutton. Sets all labels, text, and sliders to foreground color, all axes to background color ''' fgcolor = 'white' if bgcolor == 'black' else 'black' self.fig.set_facecolor(bgcolor) self.mainax.set_axis_bgcolor(bgcolor) for ax in chain(self.slider_axes, self.rbutton_axes): ax.set_axis_bgcolor(bgcolor) # set fgcolor elements to black or white, mostly elements of sliders for item in chain(map(attrgetter('label'), self.sliders), map(attrgetter('valtext'), self.sliders), map(attrgetter('poly'), self.sliders), self.text, *map(attrgetter('labels'), self.rbuttons)): item.set_color(fgcolor) self.update_radiobutton_colors() plt.draw() def update_linewidths(self, *args): ''' function run by a, b and c parameter sliders. Sets width of each line in linecollection according to sine function ''' a, b, c = (s.val for s in self.sliders[6:]) self.linecollection.set_linewidths(spiro_linewidths(self.t, a, b, c)) plt.draw() def update_linecolors(self, *args): ''' function run by color slider and indirectly by variable/solid color radiobutton. Updates colors of each line in linecollection using the set colormap. ''' # get current color value (a value between 1 and 0) col_val = self.sliders[5].val if not self.variable_color: # if solid color, convert color value to rgb and set the color self.linecollection.set_color(self.colormap.to_rgba(col_val)) else: # create values between 0 and 1 for each line segment colors = (self.t / max(self.t)) + col_val # use color value to roll colors colors[colors > 1] -= 1 self.linecollection.set_color( [self.colormap.to_rgba(i) for i in colors]) plt.draw() def update_lineverts(self, *args): ''' function run by R, r, p, tmax and tstep sliders to update line vertices ''' tmax, tstep, R, r, p = (s.val for s in self.sliders[:5]) self.t = np.arange(0, tmax, tstep) x, y = spiro_linefunc(self.t, R, r, p) self.linecollection.set_verts(segments(x, y)) # change axis limits to pad new line nicely self.mainax.set(xlim=(min(x) - 5, max(x) + 5), ylim=(min(y) - 5, max(y) + 5)) plt.draw() def update_linecolor_setting(self, val): ''' function run by solid/variable colour slider, alters variable_color attribute then calls update_linecolors ''' if val == 'variable': self.variable_color = True elif val == 'solid': self.variable_color = False # need to update radiobutton colors here. self.update_radiobutton_colors() self.update_linecolors() def update_radiobutton_colors(self): ''' makes radiobutton colors correct even on a changing axis background ''' bgcolor = self.rbuttons[0].value_selected fgcolor = 'white' if bgcolor == 'black' else 'black' for i, b in enumerate(self.rbuttons): # find out index of the active button active_idx = self.rbutton_kwargs[i]['labels'].index( b.value_selected) # set button colors accordingly b.circles[not active_idx].set_color(bgcolor) b.circles[active_idx].set_color(fgcolor) def run(self): ''' set up slider functions ''' verts_func = self.update_lineverts colors_func = self.update_linecolors widths_func = self.update_linewidths # create iterable of slider funcs to zip with sliders slider_update_funcs = chain(repeat(verts_func, 5), [colors_func, ], repeat(widths_func, 3)) # set slider on_changed functions for s, f in zip(self.sliders, slider_update_funcs): s.on_changed(f) self.rbuttons[0].on_clicked(self.update_figcolors) self.rbuttons[1].on_clicked(self.update_linecolor_setting) plt.show()