def nodes_graph_custom(self, datacustom, colnum=0, show=False, printout=True, path='./', name=''): """Plot a 2D map with hexagonal nodes and weights values Args: colnum (int): The index of the weight that will be shown as colormap. show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. """ centers = [[node.pos[0], node.pos[1]] for node in self.nodeList] widthP = 100 dpi = 72 xInch = self.netWidth * widthP / dpi yInch = self.netHeight * widthP / dpi fig = plt.figure(figsize=(xInch, yInch), dpi=dpi) if self.colorEx == True: cols = [[ np.float(node.weights[0]), np.float(node.weights[1]), np.float(node.weights[2]) ] for node in self.nodeList] ax = hx.plot_hex(fig, centers, cols) ax.set_title('Node Grid w Color Features', size=80) printName = os.path.join(path, 'nodesColors.png') else: if len(datacustom.shape) == 1: cols = datacustom else: cols = datacustom[:, colnum] ax = hx.plot_hex(fig, centers, cols) #ax.set_title('Node Grid w Feature #' + str(colnum), size=80) ax.set_title('Node Grid feature' + name, size=80) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) cbar.set_label('Feature #' + str(colnum) + ' value', size=80, labelpad=50) cbar.ax.tick_params(labelsize=60) plt.sca(ax) printName = os.path.join(path, 'nodesFeatureCustom_' + name + '.png') if printout == True: plt.savefig(printName, bbox_inches='tight', dpi=dpi) if show == True: plt.show() if show != False and printout != False: plt.clf()
def nodes_graph(self, colnum=0, show=False, printout=True, path='./', colname=None, cbar=True, cmap_type="Greys"): """Plot a 2D map with hexagonal nodes and weights values Args: colnum (int): The index of the weight that will be shown as colormap. show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. colname (str, optional): Name of the column to be shown on the map. """ if not colname: colname = str(colnum) centers = [[node.pos[0], node.pos[1]] for node in self.nodeList] widthP = 100 dpi = 72 xInch = self.netWidth * widthP / dpi yInch = self.netHeight * widthP / dpi fig = plt.figure(figsize=(xInch, yInch), dpi=dpi) if self.colorEx == True: cols = [[ np.float(node.weights[0]), np.float(node.weights[1]), np.float(node.weights[2]) ] for node in self.nodeList] ax = hx.plot_hex(fig, centers, cols, cmap_type=cmap_type) ax.set_title('Node Grid w Color Features', size=80) printName = os.path.join(path, 'nodesColors.png') else: cols = [node.weights[colnum] for node in self.nodeList] ax = hx.plot_hex(fig, centers, cols, cmap_type=cmap_type) ax.set_title('Node Grid w Feature ' + colname, size=80) divider = make_axes_locatable(ax) if cbar: cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) cbar.set_label(colname, size=80, labelpad=50) cbar.ax.tick_params(labelsize=60) plt.sca(ax) printName = os.path.join(path, 'nodesFeature_' + str(colnum) + '.png') if printout == True: plt.savefig(printName, bbox_inches='tight', dpi=dpi) if show == True: plt.show() if show != False and printout != False: plt.clf()
def diff_graph(self, show=False, printout=True, returns=False, path='./'): """Plot a 2D map with nodes and weights difference among neighbouring nodes. Args: show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. returns (bool, optional): Choose to return the difference value. Returns: (list): difference value for each node. """ neighbours = [] for node in self.nodeList: nodelist = [] for nodet in self.nodeList: if node != nodet and node.get_nodeDistance(nodet) <= 1.001: nodelist.append(nodet) neighbours.append(nodelist) diffs = [] for node, neighbours in zip(self.nodeList, neighbours): diff = 0 for nb in neighbours: diff = diff + node.get_distance(nb.weights) diffs.append(diff) centers = [[node.pos[0], node.pos[1]] for node in self.nodeList] if show == True or printout == True: widthP = 100 dpi = 72 xInch = self.netWidth * widthP / dpi yInch = self.netHeight * widthP / dpi fig = plt.figure(figsize=(xInch, yInch), dpi=dpi) ax = hx.plot_hex(fig, centers, diffs) ax.set_title('Nodes Grid w Weights Difference', size=80) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) cbar.set_label('Weights Difference', size=80, labelpad=50) cbar.ax.tick_params(labelsize=60) plt.sca(ax) printName = os.path.join(path, 'nodesDifference.png') if printout == True: plt.savefig(printName, bbox_inches='tight', dpi=dpi) if show == True: plt.show() if show != False and printout != False: plt.clf() if returns == True: return diffs
def show_exogenous(self, array, exogenous, colname=None): """Plots an exogenous variable into the grid Args: array (np.array): An array containing datapoints to be clustered. array (np.array): An array containing the value of the variable to be plotted Returns: ax: The matplotlib.pyplot ax """ hash_point = lambda p: round(p[0], 2) * 1000 + round(p[1], 2) mean_per_node = {} # hash_point: (point_coords, sum, N) for row, ex in zip(array, exogenous): point = self.find_bmu(row)[0].pos hashed_point = hash_point(point) if hashed_point in mean_per_node: mean_per_node[hashed_point][2] += 1 mean_per_node[hashed_point][1] += ex else: mean_per_node[hashed_point] = [np.array(point), ex, 1] points = [point for _, (point, s, N) in mean_per_node.items()] values = [s / N for _, (point, s, N) in mean_per_node.items()] printName = os.path.join( self.path, 'SOM_%s.png' % ('exogenous' if colname is None else colname)) fig = self.get_fig() cmap = 'Set3' if len( np.unique(exogenous) ) < 8 else self.cmap #Use a set cmap if the exog. var. is a class. ax = hx.plot_hex(fig, points, values, cmap=cmap, radius=self.radius) ax.set_title('Exogenous variable' if colname is None else colname, size=self.text_size) if self.bar: divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) cbar.ax.tick_params(labelsize=self.text_size) plt.sca(ax) plt.gca().invert_yaxis() ax.axis('off') if self.printout == True: plt.savefig(printName, bbox_inches='tight', dpi=self.dpi) if self.show == True: plt.show() plt.clf()
def diff_graph(self, colors=[], return_ax_only=False): """Plot a 2D map with nodes and weights difference among neighbouring nodes. Args: show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. returns (bool, optional): Choose to return the difference value. Returns: (list): difference value for each node. """ diffs, neighbours, centers = self.get_diffs() fig = self.get_fig() ax = hx.plot_hex(fig, centers, diffs, edges=colors, radius=self.radius, cmap=self.cmap) ax.set_title('Weights Difference', size=self.text_size) if self.bar: divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) cbar.ax.tick_params(labelsize=self.text_size) plt.sca(ax) printName = os.path.join(self.path, 'SOM_difference.png') if not return_ax_only: if self.printout == True: plt.savefig(printName, bbox_inches='tight', dpi=self.dpi) if self.show == True: plt.show() if self.show != False and self.printout != False: plt.clf() return ax
def find_clusters(self, array, type='qthresh', cutoff=5, quant=0.2, percent=0.02, numcl=8,\ savefile=True, filetype='dat'): """Clusters the data in a given array according to the SOM trained map. The clusters can also be plotted. Args: array (np.array): An array containing datapoints to be clustered. type (str, optional): The type of clustering to be applied, so far only quality threshold (qthresh) and density peak algorithma are directly implemented, other algorithms require sklearn, and do not support PBC cutoff (float, optional): Cutoff for the quality threshold algorithm. This also doubles as maximum distance of two points to be considered in the same cluster with DBSCAN. percent (float, optional): The percentile that defines the reference distance in density peak clustering (dpeak). numcl (int, optional): The number of clusters for K-Means clustering quant (float, optional): Quantile used to calculate the bandwidth of the mean shift algorithm. savefile (bool, optional): Choose to save the resulting clusters in a text file. filetype (string, optional): Format of the file where the clusters will be saved (csv or dat) show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. Returns: (list of int): A nested list containing the clusters with indexes of the input array points. """ """ Call project to first find the bmu for each array datapoint, but without producing any graph. """ bmuList = self.project(array, return_only=True) clusters = [] if type == 'qthresh': #Cluster according to the quality threshold algorithm (slow!). clusters = qt.qualityThreshold(bmuList, cutoff, self.PBC, self.netHeight, self.netWidth) elif type == 'dpeak': #Cluster according to the density peak algorithm. clusters = dp.densityPeak(bmuList, PBC=self.PBC, netHeight=self.netHeight, netWidth=self.netWidth) elif type in ['MeanShift', 'DBSCAN', 'KMeans']: #Cluster according to algorithms implemented in sklearn. if self.PBC == True: print( "Warning: Only Quality Threshold and Density Peak clustering work with PBC" ) try: if type == 'MeanShift': bandwidth = cluster.estimate_bandwidth(np.asarray(bmuList), quantile=quant, n_samples=500) cl = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True).fit(bmuList) if type == 'DBSCAN': cl = cluster.DBSCAN(eps=cutoff, min_samples=5).fit(bmuList) if type == 'KMeans': cl = cluster.KMeans(n_clusters=numcl).fit(bmuList) clLabs = cl.labels_ for i in np.unique(clLabs): clList = [] tmpList = range(len(bmuList)) for j, k in zip(tmpList, clLabs): if i == k: clList.append(j) clusters.append(clList) except: print(('Unexpected error: ', sys.exc_info()[0])) raise else: sys.exit("Error: unkown clustering algorithm " + type) if savefile == True: file = open( os.path.join(self.path, 'SOM_' + type + '_clusters.' + filetype), 'w') if filetype == 'csv': separator = ',' else: separator = ' ' for line in clusters: for id in line: file.write(str(id) + separator) file.write('\n') file.close() xc, yc, color = [], [], [] for i in range(len(clusters)): for c in clusters[i]: #again, invert y and x to be consistent with the previous maps xc.append(bmuList[int(c)][0]) yc.append(self.netHeight - bmuList[int(c)][1]) color.append(i / len(clusters)) if self.printout == True or self.show == True: printName = os.path.join(self.path, 'SOM_' + type + '_clusters.png') fig = self.get_fig() #ax.scatter(xc, yc, color=color, label='Cluster %d'%(i)) ax = hx.plot_hex(fig, list(zip(xc, yc)), color, cmap='Set3', radius=self.radius) ax.set_title('Clusters', size=self.text_size) plt.sca(ax) plt.gca().invert_yaxis() ax.axis('off') if self.printout == True: plt.savefig(printName, bbox_inches='tight', dpi=self.dpi) if self.show == True: plt.show() plt.clf() self.clusters, self.xc, self.yc, self.color = clusters, xc, yc, color
def nodes_graph(self, colnum=0, colname=None, colors=[], return_ax_only=False): """Plot a 2D map with hexagonal nodes and weights values Args: colnum (int): The index of the weight that will be shown as colormap. show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. colname (str, optional): Name of the column to be shown on the map. """ if not colname: colname = str(colnum) centers = [[node.pos[0], node.pos[1]] for node in self.nodeList] fig = self.get_fig() if self.colorEx == True: cols = [[ np.float(node.weights[0]), np.float(node.weights[1]), np.float(node.weights[2]) ] for node in self.nodeList] ax = hx.plot_hex(fig, centers, cols, radius=self.radius, cmap=self.cmap) ax.set_title('Node Grid with Color Features', size=self.text_size) printName = os.path.join(self.path, 'nodesColors.png') else: cols = [node.weights[colnum] for node in self.nodeList] ax = hx.plot_hex(fig, centers, cols, edges=colors, radius=self.radius) if colname is None: ax.set_title('Feature #' + str(colnum), size=self.text_size) else: ax.set_title(str(colname), size=self.text_size) if self.bar: divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) cbar.set_label(colname, size=self.text_size, labelpad=10) cbar.ax.tick_params(labelsize=self.text_size) plt.sca(ax) printName = os.path.join( self.path, 'SOM_' + str(colnum if colname is None else colname) + '.png') if not return_ax_only: if self.printout == True: plt.savefig(printName, bbox_inches='tight', dpi=self.dpi) if self.show == True: plt.show() if self.show != False and self.printout != False: plt.clf() return ax
def projectsample(self, array, colnum=-1, name="", normbybmu=False, show=False, printout=True, path='./', meanmatrix=[], clim=3): """Project the datapoints of a given array to the 2D space of the SOM by calculating the bmus. If requested plot a 2D map with as implemented in nodes_graph and adds circles to the bmu of each datapoint in a given array. Args: array (np.array): An array containing datapoints to be mapped. colnum (int): The index of the weight that will be shown as colormap. If not chosen, the difference map will be used instead. show (bool, optional): Choose to display the plot. printout (bool, optional): Choose to save the plot to a file. Returns: (list): bmu x,y position for each input array datapoint. """ if len(self.bmuList) == 0: print("load setBMUtable before") return 0 proj = np.zeros(len(self.nodeList), dtype=np.float) projmean = np.zeros(len(self.nodeList), dtype=np.float) if len(array.shape) == 2: array = array[:, colnum] for i, val in enumerate(array): proj[self.bmuList[i]] += val if len(meanmatrix) != 0: projmean[self.bmuList[i]] += meanmatrix[i] #normalize by the number of kmer by node if normbybmu: uniqubmu, countbmu = np.unique(self.bmuList, return_counts=True) allcountbmu = np.ones(proj.shape) allcountbmu[uniqubmu] = countbmu proj = proj / allcountbmu centers = [[node.pos[0], node.pos[1]] for node in self.nodeList] if show == True or printout == True: widthP = 100 dpi = 72 xInch = self.netWidth * widthP / dpi yInch = self.netHeight * widthP / dpi fig = plt.figure(figsize=(xInch, yInch), dpi=dpi) #ax = hx.plot_hex(fig, centers, proj) #test visualize from mean: ax = hx.plot_hex(fig, centers, proj - projmean) ax.set_title(name) #, size=80) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.0) cbar = plt.colorbar(ax.collections[0], cax=cax) #cbar.set_clim(-clim, clim) cbar.set_label('Weights Difference') #, size=80, labelpad=50 #cbar.ax.tick_params(labelsize=60) plt.sca(ax) printName = os.path.join(path, name + 'centeredSampleProj.png') if printout == True: plt.savefig(printName, bbox_inches='tight', dpi=dpi) if show == True: plt.show() if show != False and printout != False: plt.clf() plt.close() return proj