def __init__(self, image_height, image_width, show_display=True): self.image_height = image_height self.image_width = image_width self.show_display = show_display self.heatmaps = [] self.object_history = [] self.max_n_heatmaps = 2 self.heatmap_threshold = 75 self.heat_drawer = Drawer(bbox_settings=BBoxSettings( color=DynamicColor(cmap=cmap_builder('black', 'red', 'yellow'), value_range=[0, 255], colorbar=Colorbar(ticks=np.array([0, 255]), pos=np.array([0.03, 0.96]), size=np.array([0.3, 0.01])))), inplace=False) self.cluster_drawer = Drawer( bbox_settings=BBoxSettings(color=DynamicColor( cmap=cmap_builder('yellow', 'lime (w3c)', 'cyan'), value_range=[0, 20], colorbar=Colorbar(ticks=np.array([0, 10, 20]), pos=np.array([0.03, 0.90]), size=np.array([0.3, 0.01])))), inplace=True) if self.show_display: self._init_heatmap_display()
def get_html_table(self, collapse_table=False, clip_threshold=2, index=False, header=True, escape=False): cmap_clip = cmap_builder('#ffffff', '#0070FF') cmap_absmax = cmap_builder('green', 'white', 'red') columns = ANOVAResults().colnames_subset # The copy is used because we'll change it afterwards df = self.df[self.colnames_subset].copy() colname = 'ANOVA_FEATURE_FDR' df.loc[self.df[colname] < 0.01, colname] = '<0.01' html = HTMLTable(self.df, 'notused') # Those columns should be links for this in ['FEATURE', 'DRUG_ID', 'ASSOC_ID']: html.add_href(this) for this in ['FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta', 'FEATURE_pos_Glass_delta']: html.add_bgcolor(this, cmap_clip, mode='clip', threshold=clip_threshold) # normalise data and annotate with color html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax, mode='absmax') html.df.columns = [x.replace("_", " ") for x in html.df.columns] return html.to_html(escape=escape, header=header, index=index, collapse_table=collapse_table, justify='center')
def get_html_table(self, collapse_table=False, clip_threshold=2, index=False, header=True, escape=False, add_href=True): """Return an HTML table for the reports :param add_href: add href to the FEATURE, DRUG ID and ASSOC ID """ cmap_clip = cmap_builder('#ffffff', '#0070FF') cmap_absmax = cmap_builder('green', 'white', 'red') # The copy is used because we'll change it afterwards df = self.df[self.colnames_subset].copy() colname = 'ANOVA_FEATURE_FDR' df.loc[df[colname] < 0.01, colname] = '<0.01' # In the assoc column, we remove the first "a" letter so that # the column is properly sorted by Id but the link should be with the # "a" as prefix df.ASSOC_ID = df.ASSOC_ID.apply(lambda x: int(str(x).replace("a", ""))) html = HTMLTable(df, 'notused') # Those columns should be links if add_href: html.add_href("FEATURE") html.add_href("ASSOC_ID", url="a", suffix=".html") # here url works like a prefix html.add_href("DRUG_ID", url="drug_", suffix=".html") # here url works like a prefix for this in [ 'FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta', 'FEATURE_pos_Glass_delta' ]: html.add_bgcolor(this, cmap_clip, mode='clip', threshold=clip_threshold) # normalise data and annotate with color html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax, mode='absmax') html.df.columns = [x.replace("_", " ") for x in html.df.columns] return html.to_html(escape=escape, header=header, index=index, collapse_table=collapse_table, justify='center')
def __init__(self, grid_generator, force_train=False, use_cache=True, show_display=True): self.grid_generator = grid_generator self.use_cache = use_cache self.show_display = show_display if self._classifier_available() and not force_train: self._load_classifier() else: self._train_classifier() self._store_classifier() if self.use_cache: self.cache = ClassifierCache(classifier_path=classifier_path) self.low_threshold = cache_threshold self.medium_threshold = cluster_threshold self.high_threshold = tracking_threshold self.drawer = Drawer(bbox_settings=BBoxSettings(color=DynamicColor( cmap=cmap_builder('yellow', 'lime (w3c)', 'cyan'), value_range=[0.5, 1.0], colorbar=Colorbar(ticks=np.array([0.5, 0.75, 1.0]), pos=np.array([0.03, 0.97]), size=np.array([0.3, 0.01])))), inplace=False) if self.show_display: self._init_display()
def plot_confusion(self, species=None, cmap=None, tight_layout=False): if cmap is None: import colormap cmap = colormap.cmap_builder('white', 'blue','darkblue') from biokit import imshow if species is not None: imshow(self.confusion[species], cmap=cmap) pylab.title(species,fontsize=20) if tight_layout is True: pylab.tight_layout() else: imshow(self.confusion_all, cmap=cmap)
def __init__(self, show_display=True): self.tracked_objects = [] self.show_display = show_display self._init_params() self.raw_tracks_drawer = Drawer( bbox_settings=BBoxSettings(color=DynamicColor( cmap=cmap_builder('yellow', 'lime (w3c)', 'cyan'), value_range=[0, 65], colorbar=Colorbar(ticks=np.array([0, 30, 60]), pos=np.array([0.03, 0.96]), size=np.array([0.3, 0.01])))), inplace=True) self.match_drawer = Drawer(bbox_settings=BBoxSettings( color=StaticColor((0, 0, 0)), border_thickness=2), inplace=True) if self.show_display: self._init_display() self.first_frame = True
def _set_default_cmap(self): self.cm = cmap_builder('#AA0000', 'white', 'darkblue')
def plot(self, fig=None, grid=True, rotation=30, lower=None, upper=None, shrink=0.9, axisbg='white', colorbar=True, label_color='black', fontsize='small', edgecolor='black', method='ellipse', order_method='complete', order_metric='euclidean', cmap=None, ax=None, binarise_color=False, figsize=None): """ plot the correlation matrix from the content of :attr:`df` (dataframe) By default, the correlation is shown on the upper and lower triangle and is symmetric wrt to the diagonal. The symbols are ellipses. The symbols can be changed to e.g. rectangle. The symbols are shown on upper and lower sides but you could choose a symbol for the upper side and another for the lower side using the **lower** and **upper** parameters. :param fig: Create a new figure by default. If an instance of an existing figure is provided, the corrplot is overlayed on the figure provided. Can also be the number of the figure. :param grid: add grid (Defaults to grey color). You can set it to False or a color. :param rotation: rotate labels on y-axis :param lower: if set to a valid method, plots the data on the lower left triangle :param upper: if set to a valid method, plots the data on the upper left triangle :param float shrink: maximum space used (in percent) by a symbol. If negative values are provided, the absolute value is taken. If greater than 1, the symbols wiill overlap. :param axisbg: color of the background (defaults to white). :param colorbar: add the colorbar (defaults to True). :param str label_color: (defaults to black). :param fontsize: size of the fonts defaults to 'small'. :param method: shape to be used in 'ellipse', 'square', 'rectangle', 'color', 'text', 'circle', 'number', 'pie'. :param order_method: see :meth:`order`. :param order_metric: see : meth:`order`. :param cmap: a valid cmap from matplotlib or colormap package (e.g., 'jet', or 'copper'). Default is red/white/blue colors. :param ax: a matplotlib axes. :param figsize: gives that parameter to the new created figure :return: ax (matplotlib axes) The colorbar can be tuned with the parameters stored in :attr:`params`. Here is an example. See notebook for other examples:: c = corrplot.Corrplot(dataframe) c.plot(cmap=('Orange', 'white', 'green')) c.plot(method='circle') c.plot(colorbar=False, shrink=.8, upper='circle' ) """ # default if cmap is not None: try: if isinstance(cmap, str): self.cm = cmap_builder(cmap) else: self.cm = cmap_builder(*cmap) except: print("incorrect cmap. Use default one") self._set_default_cmap() else: self._set_default_cmap() self.shrink = abs(shrink) self.fontsize = fontsize self.edgecolor = edgecolor df = self.order(method=order_method, metric=order_metric) # figure can be a number or an instance; otherwise creates it params = dict(facecolor=axisbg) if isinstance(fig, int): params["num"] = fig.number elif fig is not None: params["num"] = fig.number else: params["num"] = None if figsize is not None: params["figsize"] = figsize fig = plt.figure(**params) # do we have an axes to plot the data in ? if ax is None: ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg) else: # if so, clear the axes. Colorbar cannot be removed easily. plt.sca(ax) ax.clear() # subplot resets the bg color, let us set it again fig.set_facecolor(axisbg) width, height = df.shape labels = (df.columns) # add all patches to the figure # TODO check value of lower and upper if upper is None and lower is None: mode = 'method' elif upper and lower: mode = 'both' elif lower is not None: mode = 'lower' elif upper is not None: mode = 'upper' self.binarise_color = binarise_color if mode == 'upper': self._add_patches(df, upper, 'upper', ax, diagonal=True) elif mode == 'lower': self._add_patches(df, lower, 'lower', ax, diagonal=True) elif mode == 'method': self._add_patches(df, method, 'both', ax, diagonal=True) elif mode == 'both': self._add_patches(df, upper, 'upper', ax, diagonal=False) self._add_patches(df, lower, 'lower', ax, diagonal=False) # shift the limits to englobe the patches correctly ax.set_xlim(-0.5, width - .5) ax.set_ylim(-0.5, height - .5) # set xticks/xlabels on top ax.xaxis.tick_top() xtickslocs = np.arange(len(labels)) ax.set_xticks(xtickslocs) ax.set_xticklabels(labels, rotation=rotation, color=label_color, fontsize=fontsize, ha='left') ax.invert_yaxis() ytickslocs = np.arange(len(labels)) ax.set_yticks(ytickslocs) ax.set_yticklabels(labels, fontsize=fontsize, color=label_color) plt.tight_layout() if grid is not False: if grid is True: grid = 'grey' for i in range(0, width): ratio1 = float(i) / width ratio2 = float(i + 2) / width # TODO 1- set axis off # 2 - set xlabels along the diagonal # set colorbar either on left or bottom if mode == 'lower': plt.axvline(i + .5, ymin=1 - ratio1, ymax=0., color=grid) plt.axhline(i + .5, xmin=0, xmax=ratio2, color=grid) if mode == 'upper': plt.axvline(i + .5, ymin=1 - ratio2, ymax=1, color=grid) plt.axhline(i + .5, xmin=ratio1, xmax=1, color=grid) if mode in ['method', 'both']: plt.axvline(i + .5, color=grid) plt.axhline(i + .5, color=grid) # can probably be simplified if mode == 'lower': plt.axvline(-.5, ymin=0, ymax=1, color='grey') plt.axvline(width - .5, ymin=0, ymax=1. / width, color='grey', lw=2) plt.axhline(width - .5, xmin=0, xmax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1. / width, color='grey', lw=2) plt.xticks([]) for i in range(0, width): plt.text(i, i - .6, labels[i], fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.text(-.6, i, labels[i], fontsize=fontsize, color=label_color, rotation=0, horizontalalignment='right') plt.axis('off') # can probably be simplified elif mode == 'upper': plt.axvline(width - .5, ymin=0, ymax=1, color='grey', lw=2) plt.axvline(-.5, ymin=1 - 1. / width, ymax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1, color='grey', lw=2) plt.axhline(width - .5, xmin=1 - 1. / width, xmax=1, color='grey', lw=2) plt.yticks([]) for i in range(0, width): plt.text(-.6 + i, i, labels[i], fontsize=fontsize, color=label_color, horizontalalignment='right', rotation=0) plt.text(i, -.5, labels[i], fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.axis('off') # set all ticks length to zero ax = plt.gca() ax.tick_params(axis='both', which='both', length=0) if colorbar: N = self.params['colorbar.N'] + 1 assert N >= 2 cb = plt.gcf().colorbar(self.collection, orientation=self.params[ 'colorbar.orientation'], shrink=self.params['colorbar.shrink'], boundaries=np.linspace(0, 1, N), ticks=[0, .25, 0.5, 0.75, 1]) cb.ax.set_yticklabels([-1, -.5, 0, .5, 1]) # make sure it goes from -1 to 1 even though actual values may not # reach that range cb.set_clim(0, 1) return ax
def plot(self, num=1, grid=True, rotation=30, colorbar_width=10, lower=None, upper=None, shrink=0.9, axisbg='white', colorbar=True, label_color='black', fontsize='small', edgecolor='black', method='ellipse', order=None, cmap=None ): """plot the correlation matrix from the content of :attr:`df` (dataframe) :param grid: add grid (Defaults to True) :param rotation: rotate labels on y-axis :param lower: if set to a valid method, plots the data on the lower left triangle :param upper: if set to a valid method, plots the data on the upper left triangle :param method: shape to be used in 'ellipse', 'square', 'rectangle', 'color', 'text', 'circle', 'number', 'pie'. :param cmap: a valid cmap from matplotlib of colormap package (e.g., jet, or Here are some examples provided that the data is created and pass to c:: c = corrplot.Corrplor(dataframe) c.plot(cmap=('Orange', 'white', 'green')) c.plot(method='circle') c.plot(colorbar=False, shrink=.8, upper='circle' ) """ # default if cmap != None: try: if isinstance(cmap, str): self.cm = cmap_builder(cmap) else: self.cm = cmap_builder(*cmap) except: print("incorrect cmap. Use default one") self._set_default_cmap() else: self._set_default_cmap() self.shrink = shrink self.fontsize = fontsize self.edgecolor = edgecolor if order == 'hclust': df = self.order(method='hclust') else: df = self.df plt.clf() fig = plt.figure(num=num, facecolor=axisbg) ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg) # subplot resets the bg color, let us set it again fig.set_facecolor(axisbg) width, height = df.shape labels = (df.columns) # add all patches to the figure # TODO check value of lower and upper if upper is None and lower is None: mode = 'method' diagonal = True elif upper and lower: mode = 'both' diagonal = False elif lower is not None: mode = 'lower' diagonal = True elif upper is not None: mode = 'upper' diagonal = True else: raise ValueError if mode == 'upper': self._add_patches(df, upper, 'upper', ax, diagonal=True) elif mode == 'lower': self._add_patches(df, lower, 'lower', ax, diagonal=True) elif mode == 'method': self._add_patches(df, method, 'both', ax, diagonal=True) elif mode == 'both': self._add_patches(df, upper, 'upper', ax, diagonal=False) self._add_patches(df, lower, 'lower', ax, diagonal=False) # shift the limits to englobe the patches correctly ax.set_xlim(-0.5, width-.5) ax.set_ylim(-0.5, height-.5) # set xticks/xlabels on top ax.xaxis.tick_top() xtickslocs = np.arange(len(labels)) ax.set_xticks(xtickslocs) ax.set_xticklabels(labels, rotation=rotation, color=label_color, fontsize=fontsize, ha='left') ax.invert_yaxis() ytickslocs = np.arange(len(labels)) ax.set_yticks(ytickslocs) ax.set_yticklabels(labels, fontsize=fontsize, color=label_color) plt.tight_layout() if grid is True: for i in range(0, width): ratio1 = float(i)/width ratio2 = float(i+2)/width # TODO 1- set axis off # 2 - set xlabels along the diagonal # set colorbar either on left or bottom if mode == 'lower': plt.axvline(i+.5, ymin=1-ratio1, ymax=0., color='grey') plt.axhline(i+.5, xmin=0, xmax=ratio2, color='grey') if mode == 'upper': plt.axvline(i+.5, ymin=1 - ratio2, ymax=1, color='grey') plt.axhline(i+.5, xmin=ratio1, xmax=1, color='grey') if mode in ['method', 'both']: plt.axvline(i+.5, color='grey') plt.axhline(i+.5, color='grey') # can probably be simplified if mode == 'lower': plt.axvline(-.5, ymin=0, ymax=1, color='grey') plt.axvline(width-.5, ymin=0, ymax=1./width, color='grey', lw=2) plt.axhline(width-.5, xmin=0, xmax=1, color='grey',lw=2) plt.axhline(-.5, xmin=0, xmax=1./width, color='grey',lw=2) plt.xticks([]) for i in range(0, width): plt.text(i, i-.6 ,labels[i],fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.text(-.6, i ,labels[i],fontsize=fontsize, color=label_color, rotation=0, horizontalalignment='right') plt.axis('off') # can probably be simplified elif mode == 'upper': plt.axvline(width-.5, ymin=0, ymax=1, color='grey', lw=2) plt.axvline(-.5, ymin=1-1./width, ymax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1, color='grey',lw=2) plt.axhline(width-.5, xmin=1-1./width, xmax=1, color='grey',lw=2) plt.yticks([]) for i in range(0, width): plt.text(-.6+i, i ,labels[i],fontsize=fontsize, color=label_color, horizontalalignment='right', rotation=0) plt.text(i, -.5 ,labels[i],fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.axis('off') # set all ticks length to zero ax = plt.gca() ax.tick_params(axis='both',which='both', length=0) if colorbar: N = self.params['colorbar.N'] cb = plt.gcf().colorbar(self.collection, orientation=self.params['colorbar.orientation'], shrink=.9, boundaries= np.linspace(0,1,N), ticks=[0,.25, 0.5, 0.75,1]) cb.ax.set_yticklabels([-1,-.5,0,.5,1]) cb.set_clim(0,1) # make sure it goes from -1 to 1 even though actual values may not reach that range
def plot(self, num=1, cmap=None, colorbar=True, vmin=None, vmax=None, colorbar_position='right', gradient_span='None', figsize=(12, 8), fontsize=None): """ Using as input:: df = pd.DataFrame({'A':[1,0,1,1], 'B':[.9,0.1,.6,1], 'C':[.5,.2,0,1], 'D':[.5,.2,0,1]}) we can plot the heatmap + dendogram as follows:: h = Heatmap(df) h.plot(vmin=0, vmax=1.1) .. plot:: :include-source: :width: 80% from sequana.viz import heatmap df = heatmap.get_heatmap_df() h = heatmap.Heatmap(df) h.category_column['A'] = 1 h.category_column['C'] = 1 h.category_column['D'] = 2 h.category_column['B'] = 2 h.plot() """ # save all parameters in a dict layout = {} if cmap is None: cmap = self.params.cmap try: cmap = colormap.cmap_builder(cmap) except: pass # keep track of row and column names for later. row_header = self.frame.index column_header = self.frame.columns # FIXME something clever for the fontsize if len(row_header) > 100 or len(column_header) > 100: matplotlib.rcParams['font.size'] = 6 if len(row_header) > 50 or len(column_header) > 50: matplotlib.rcParams['font.size'] = 7 if len(row_header) > 30 or len(column_header) > 30: matplotlib.rcParams['font.size'] = 8 else: matplotlib.rcParams['font.size'] = 12 if fontsize: matplotlib.rcParams['font.size'] = fontsize # scaling min/max range self.gradient_span = gradient_span #'only_max' # min_to_max, min_to_max_centered, only_max, only_min if self.gradient_span == 'min_to_max_centered': vmax = self.frame.max().max() vmin = self.frame.min().min() vmax = max([vmax, abs(vmin)]) vmin = vmax * -1 if self.gradient_span == 'only_max': vmin = 0 vmax = self.frame.max().max() if self.gradient_span == 'only_min': vmin = self.frame.min().min() vmax = 0 norm = matplotlib.colors.Normalize(vmin, vmax) # Scale the figure window size # fig = pylab.figure(num=num, figsize=figsize) fig.clf() # LAYOUT -------------------------------------------------- # ax1 (dendrogram 1) on the left of the heatmap [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6] width_between_ax1_axr = 0.004 # distance between the top color bar axis and the matrix height_between_ax1_axc = 0.004 # Sufficient size to show color_bar_w = 0.015 # axr, placement of row side colorbar # second to last controls the width of the side color bar - 0.015 when showing [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6] axr_x = ax1_x + ax1_w + width_between_ax1_axr axr_y = ax1_y axr_h = ax1_h width_between_axr_axm = 0.004 # axc, placement of column side colorbar # # last one controls the hight of the top color bar - 0.015 when showing [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w] axc_x = axr_x + axr_w + width_between_axr_axm axc_y = ax1_y + ax1_h + height_between_ax1_axc height_between_axc_ax2 = 0.004 # axm, placement of heatmap for the data matrix # why larger than 1? [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5] axm_x = axr_x + axr_w + width_between_axr_axm axm_y = ax1_y axm_h = ax1_h axm_w = axc_w # ax2 (dendrogram 2), on the top of the heatmap # [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15] ax2_x = axr_x + axr_w + width_between_axr_axm ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2 ax2_w = axc_w # axcb - placement of the color legend # if colorbar_position == 'top left': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09] elif colorbar_position == 'right': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6] else: raise ValueError("'top left' or 'right' accepted for now") # COMPUTATION DENDOGRAM 1 ------------------------------------- if self.column_method: Y = self.linkage(self.frame.transpose(), self.column_method, self.column_metric) ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True) # p=30, truncate_mode=None, color_threshold=None, get_leaves=True, # orientation='top labels=None, count_sort=False, distance_sort=False, # show_leaf_counts=True, no_plot=False, no_labels=False, leaf_font_size=None, # leaf_rotation=None, leaf_label_func=None, show_contracted=False, # link_color_func=None, ax=None, above_threshold_color='b', # # color_threshold=0 and above_threshold_color='k' colors all # dendogram into black Z = hierarchy.dendrogram(Y, color_threshold=0, above_threshold_color="k", distance_sort="descending") ind2 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) ax2.set_xticks([]) ax2.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx2 = Z['leaves'] self.frame = self.frame.iloc[:, idx2] # reorder the flat cluster to match the order of the leaves the dendrogram ind2 = ind2[idx2] layout['dendogram2'] = ax2 else: idx2 = range(self.frame.shape[1]) # COMPUTATION DENDOGRAM 2 --------------------------------- if self.row_method: Y = self.linkage(self.frame, self.row_method, self.row_metric) ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True) Z = hierarchy.dendrogram(Y, orientation='right', color_threshold=0, above_threshold_color="k", distance_sort="descending") ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) ax1.set_xticks([]) ax1.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx1 = Z['leaves'] self.frame = self.frame.iloc[idx1, :] # reorder the flat cluster to match the order of the leaves the dendrogram ind1 = ind1[idx1] layout['dendogram1'] = ax1 else: idx1 = range(self.frame.shape[0]) # HEATMAP itself axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h]) axm.imshow(self.frame, aspect='auto', origin='lower', interpolation='None', cmap=cmap, norm=norm) axm.set_xticks([]) axm.set_yticks([]) layout['heatmap'] = axm # TEXT new_row_header = [] new_column_header = [] for i in range(self.frame.shape[0]): axm.text(self.frame.shape[1] - 0.5, i, ' ' + str(row_header[idx1[i]]), verticalalignment="center") new_row_header.append( row_header[idx1[i]] if self.row_method else row_header[i]) for i in range(self.frame.shape[1]): axm.text(i, -0.9, ' ' + str(column_header[idx2[i]]), rotation=90, verticalalignment="top", horizontalalignment="center") new_column_header.append(column_header[idx2[i]] if self. column_method else column_header[i]) # CATEGORY column ------------------------------ if self.category_column: axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h]) category_col = [ self.category_column[self.df.columns[i]] for i in idx2 ] dc = np.array(category_col, dtype=int) dc.shape = (1, len(ind2)) cmap_c = matplotlib.colors.ListedColormap( self.params.col_side_colors) axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c) axc.set_xticks([]) axc.set_yticks([]) layout['category_column'] = axc # CATEGORY row ------------------------------- if self.category_row: axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h]) # self.category_row must be a dictionary with names as found in the columns # of the dataframe. category_row = [self.category_row[self.df.index[i]] for i in idx1] dr = np.array(category_row, dtype=int) dr.shape = (len(category_row), 1) cmap_r = matplotlib.colors.ListedColormap( self.params.col_side_colors) axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r) axr.set_xticks([]) axr.set_yticks([]) layout['category_row'] = axr # COLORBAR ---------------------- if colorbar == True: axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h], frame_on=False) if colorbar_position == 'right': orientation = 'vertical' else: orientation = 'horizontal' cb = matplotlib.colorbar.ColorbarBase(ax=axcb, cmap=cmap, norm=norm, orientation=orientation) #axcb.set_title("whatever") #max_cb_ticks = 5 #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks)) layout['colorbar'] = cb layout['colorbar_scalablemap'] = axcb # could be useful self.d = {'ordered': self.frame.copy(), 'rorder': idx1, 'corder': idx2} return layout
def add_features(self): # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [ "{}-{}".format(x, get_name(x)) for x in df_drugs.index ] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') if len(self.report.df) == 0: return # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID', url="associations/drug_", suffix=".html") table.df.columns = [ x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns ] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE', url="associations/", suffix=".html") table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False)
def plot(self, num=1, cmap="heat", colorbar=True, vmin=None, vmax=None, colorbar_position='right', gradient_span='None'): """ :param gradient_span: None is default in R iusing:: df = pd.DataFrame({'A':[1,0,1,1], 'B':[.9,0.1,.6,1], 'C':[.5,.2,0,1], 'D':[.5,.2,0,1]}) and :: h = Heatmap(df) h.plot(vmin=0, vmax=1.1) we seem to get the same as in R wiht :: df = data.frame(A=c(1,0,1,1), B=c(.9,.1,.6,1), C=c(.5,.2,0,1), D=c(.5,.2,0,1)) heatmap((as.matrix(df)), scale='none') .. todo:: right now, the order of cols and rows is random somehow. could be ordered like in heatmap (r) byt mean of the row and col or with a set of vector for col and rows. heatmap((as.matrix(df)), Rowv=c(3,2), Colv=c(1), scale='none') gives same as:: df = get_heatmap_df() h = heatmap.Heatmap(df) h.plot(vmin=-0, vmax=1.1) """ # save all parameters in a dict layout = {} cmap = colormap.cmap_builder(cmap) # keep track of row and column names for later. row_header = self.frame.index column_header = self.frame.columns # FIXME sometinh clever for the fontsize if len(row_header) > 100 or len(column_header) > 100: matplotlib.rcParams['font.size'] = 6 if len(row_header) > 50 or len(column_header) > 50: matplotlib.rcParams['font.size'] = 7 else: matplotlib.rcParams['font.size'] = 12 # scaling min/max range self.gradient_span = gradient_span #'only_max' # min_to_max, min_to_max_centered, only_max, only_min if self.gradient_span == 'min_to_max_centered': vmax = max([vmax, abs(vmin)]) vmin = vmax * -1 if self.gradient_span == 'only_max': vmin = 0 vmax = self.frame.max().max() if self.gradient_span == 'only_min': vmin = self.frame.min().min() vmax = 0 norm = matplotlib.colors.Normalize(vmin, vmax) # Scale the figure window size # fig = pylab.figure(num=num, figsize=(12, 8)) fig.clf() # LAYOUT -------------------------------------------------- # ax1 (dendrogram 1) on the left of the heatmap [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6] width_between_ax1_axr = 0.004 # distance between the top color bar axis and the matrix height_between_ax1_axc = 0.004 # Sufficient size to show color_bar_w = 0.015 # axr, placement of row side colorbar # second to last controls the width of the side color bar - 0.015 when showing [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6] axr_x = ax1_x + ax1_w + width_between_ax1_axr axr_y = ax1_y axr_h = ax1_h width_between_axr_axm = 0.004 # axc, placement of column side colorbar # # last one controls the hight of the top color bar - 0.015 when showing [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w] axc_x = axr_x + axr_w + width_between_axr_axm axc_y = ax1_y + ax1_h + height_between_ax1_axc height_between_axc_ax2 = 0.004 # axm, placement of heatmap for the data matrix # why larger than 1? [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5] axm_x = axr_x + axr_w + width_between_axr_axm axm_y = ax1_y axm_h = ax1_h axm_w = axc_w # ax2 (dendrogram 2), on the top of the heatmap # [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15] ax2_x = axr_x + axr_w + width_between_axr_axm ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2 ax2_w = axc_w # axcb - placement of the color legend # if colorbar_position == 'top left': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09] elif colorbar_position == 'right': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6] # COMPUTATION DENDOGRAM 1 ------------------------------------- if self.column_method: Y = self.get_linkage(self.frame.transpose(), self.column_method, self.column_metric) ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True) Z = hierarchy.dendrogram(Y) ind2 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) ax2.set_xticks([]) ax2.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx2 = Z['leaves'] self.frame = self.frame.iloc[:, idx2] # reorder the flat cluster to match the order of the leaves the dendrogram ind2 = ind2[idx2] layout['dendogram2'] = ax2 else: idx2 = range(self.frame.shape[1]) # COMPUTATION DENDOGRAM 2 --------------------------------- if self.row_method: Y = self.get_linkage(self.frame, self.row_method, self.row_metric) ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True) Z = hierarchy.dendrogram(Y, orientation='right') ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) ax1.set_xticks([]) ax1.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx1 = Z['leaves'] self.frame = self.frame.iloc[idx1, :] # reorder the flat cluster to match the order of the leaves the dendrogram ind1 = ind1[idx1] layout['dendogram1'] = ax1 else: idx1 = range(self.frame.shape[0]) # HEATMAP itself axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h]) axm.imshow(self.frame, aspect='auto', origin='lower', interpolation='None', cmap=cmap, norm=norm) axm.set_xticks([]) axm.set_yticks([]) layout['heatmap'] = axm # TEXT new_row_header = [] new_column_header = [] for i in range(self.frame.shape[0]): axm.text(self.frame.shape[1] - 0.5, i, ' ' + str(row_header[idx1[i]]), verticalalignment="center") new_row_header.append( row_header[idx1[i]] if self.row_method else row_header[i]) for i in range(self.frame.shape[1]): axm.text(i, -0.9, ' ' + str(column_header[idx2[i]]), rotation=90, verticalalignment="top", horizontalalignment="center") new_column_header.append(column_header[idx2[i]] if self. column_method else column_header[i]) # CATEGORY column ------------------------------ if self.column_method: axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h]) cmap_c = matplotlib.colors.ListedColormap( self.params.col_side_colors) dc = np.array(ind2, dtype=int) dc.shape = (1, len(ind2)) axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c) axc.set_xticks([]) axc.set_yticks([]) layout['category_column'] = axc # CATEGORY row ------------------------------- if self.row_method: axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h]) dr = np.array(ind1, dtype=int) dr.shape = (len(ind1), 1) cmap_r = matplotlib.colors.ListedColormap( self.params.col_side_colors) axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r) axr.set_xticks([]) axr.set_yticks([]) layout['category_row'] = axr # COLORBAR ---------------------- if colorbar == True: axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h], frame_on=False) if colorbar_position == 'right': orientation = 'vertical' else: orientation = 'horizontal' cb = matplotlib.colorbar.ColorbarBase(axcb, cmap=cmap, norm=norm, orientation=orientation) #axcb.set_title("whatever") #max_cb_ticks = 5 #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks)) layout['colorbar'] = cb # could be useful d = {'ordered': self.frame.copy(), 'rorder': idx1, 'corder': idx2} return layout
def _build_colormap(color1, color2, color3): """ Builds colormap from three given colors (given as strings)""" cm = cmap_builder('blue', 'orange', 'green') return cm
def _create_report(self, onweb=True): # A summary table diag = self.report.diagnostics() table = HTMLTable(diag, 'summary') txt = '' for index, row in diag.iterrows(): if len(row.text) == 0 and len(row.value) == 0: txt += '----<br/>' else: txt += row.text + ": " + str(row.value) + "<br/>" self.jinja['summary'] = txt print('Creating volcano plots') # this can be pretty slow. so keep only 1000 most relevant # values and 1000 random ones to get an idea of the distribution v = VolcanoANOVA(self.report.df, settings=self.settings) v.selector(v.df, 1500, 1500, inplace=True) v.volcano_plot_all() v.savefig_and_js("volcano_all_js") self.jinja['volcano'] = """ <h3></h3> <a href="volcano_all_js.html"> <img alt="volcano plot for all associations" src="volcano_all_js.png"> </a> <br/> <p> A javascript version is available <a href="volcano_all_js.html">here</a> ( or click on the image).</p> """ # MANOVA link N = len(self.report.get_significant_set()) self.jinja['manova'] = """ There were %(N)s significant associations found. All significant associations have been gatherered in the following link: <br/><a href="manova.html">manova results</a>. """ % {'N': N} # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') # drug summary #not_tested = [x for x in self.report.gdsc.drugIds if x not in # self.report.df.DRUG_ID.unique()] #if len(not_tested) > 0: # not_tested = """%s drugs were not analysed due to # lack of valid data points: """ % len(not_tested) + \ # ", ".join(not_tested) #else: # not_tested = "" not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID') table.df.columns = [x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE') table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False) # -------------------------------------- COSMIC table for completeness colnames = self.report.gdsc.features._special_names df = self.report.gdsc.features.df[colnames] # TODO # add other columns if possible e.g., GDSC1, GDSC2, TCGA df = df.reset_index() table = HTMLTable(df) url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id=" table.add_href('COSMIC_ID', url=url, newtab=True) self.jinja['cosmic_table'] = table.to_html() # -------------------------------------- settings and INPUT files input_dir = self.directory + os.sep + 'INPUT' filename = 'ANOVA_input.csv' filename = os.sep.join([input_dir, filename]) self.report.gdsc.ic50.to_csv(filename) filename = os.sep.join(['INPUT', 'ANOVA_input.csv']) self.jinja['ic50_file'] = filename # the genomic features, which may be the default version # one provided by the user. It may have been changed gf_filename = os.sep.join([input_dir, 'genomic_features.csv']) self.report.gdsc.features.to_csv(gf_filename) html = """Saved <a href="INPUT/genomic_features.csv">Genomic Features</a> file<br/> (possibly the default version).""" self.jinja['gf_file'] = html # Always save DRUG_DECODE file even if empty # It may be be interpreted in other pipeline or for reproducibility output_filename = input_dir + os.sep + 'DRUG_DECODE.csv' self.report.drug_decode.to_csv(output_filename) html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>' if len(self.report.drug_decode) == 0: html += 'Note that DRUG_DECODE file was not provided (empty?).' self.jinja['drug_decode'] = html # Save settings as json file filename = os.sep.join([input_dir, 'settings.json']) self.settings.to_json(filename) filename = os.path.basename(filename) self.jinja['settings'] = \ """Get the settings as a <a href="INPUT/%s"> json file</a>.""" % filename # Save all Results dataframe filename = os.sep.join([self.settings.directory, 'OUTPUT', 'results.csv']) ANOVAResults(self.report.df).to_csv(filename) code = """from gdsctools import * import os def getfile(filename, where='../INPUT'): return os.sep.join([where, filename]) # reback the IC50 and genomic features matrices gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'), getfile('DRUG_DECODE.csv')) gdsc.settings.from_json(getfile('settings.json')) gdsc.init() # Analyse the data results = gdsc.anova_all() # Create the HTML report r = ANOVAReport(gdsc, results) r.create_html_pages(onweb=False)""" code = code % { 'ic50': 'ANOVA_input.csv', 'gf_filename': 'genomic_features.csv'} filename = os.sep.join([self.settings.directory, 'code','rerun.py']) fh = open(filename, 'w') fh.write(code) fh.close()
def plot(self, num=1, cmap=None, colorbar=True, vmin=None, vmax=None, colorbar_position='right', gradient_span='None' ): """ :param gradient_span: None is default in R Using:: df = pd.DataFrame({'A':[1,0,1,1], 'B':[.9,0.1,.6,1], 'C':[.5,.2,0,1], 'D':[.5,.2,0,1]}) and :: h = Heatmap(df) h.plot(vmin=0, vmax=1.1) we seem to get the same as in R wiht :: df = data.frame(A=c(1,0,1,1), B=c(.9,.1,.6,1), C=c(.5,.2,0,1), D=c(.5,.2,0,1)) heatmap((as.matrix(df)), scale='none') .. todo:: right now, the order of cols and rows is random somehow. could be ordered like in heatmap (r) byt mean of the row and col or with a set of vector for col and rows. heatmap((as.matrix(df)), Rowv=c(3,2), Colv=c(1), scale='none') gives same as:: df = get_heatmap_df() h = heatmap.Heatmap(df) h.plot(vmin=-0, vmax=1.1) """ # save all parameters in a dict layout = {} if cmap is None: cmap = self.params.cmap try:cmap = colormap.cmap_builder(cmap) except:pass # keep track of row and column names for later. row_header = self.frame.index column_header = self.frame.columns # FIXME something clever for the fontsize if len(row_header) > 100 or len(column_header) > 100: matplotlib.rcParams['font.size'] = 6 if len(row_header) > 50 or len(column_header) > 50: matplotlib.rcParams['font.size'] = 7 else: matplotlib.rcParams['font.size'] = 12 # scaling min/max range self.gradient_span = gradient_span #'only_max' # min_to_max, min_to_max_centered, only_max, only_min if self.gradient_span == 'min_to_max_centered': vmax = max([vmax, abs(vmin)]) vmin = vmax * -1 if self.gradient_span == 'only_max': vmin = 0 vmax = self.frame.max().max() if self.gradient_span == 'only_min': vmin = self.frame.min().min() vmax = 0 norm = matplotlib.colors.Normalize(vmin, vmax) # Scale the figure window size # fig = pylab.figure(num=num, figsize=(12, 8)) fig.clf() # LAYOUT -------------------------------------------------- # ax1 (dendrogram 1) on the left of the heatmap [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6] width_between_ax1_axr = 0.004 # distance between the top color bar axis and the matrix height_between_ax1_axc = 0.004 # Sufficient size to show color_bar_w = 0.015 # axr, placement of row side colorbar # second to last controls the width of the side color bar - 0.015 when showing [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6] axr_x = ax1_x + ax1_w + width_between_ax1_axr axr_y = ax1_y; axr_h = ax1_h width_between_axr_axm = 0.004 # axc, placement of column side colorbar # # last one controls the hight of the top color bar - 0.015 when showing [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w] axc_x = axr_x + axr_w + width_between_axr_axm axc_y = ax1_y + ax1_h + height_between_ax1_axc height_between_axc_ax2 = 0.004 # axm, placement of heatmap for the data matrix # why larger than 1? [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5] axm_x = axr_x + axr_w + width_between_axr_axm axm_y = ax1_y; axm_h = ax1_h axm_w = axc_w # ax2 (dendrogram 2), on the top of the heatmap # [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15] ax2_x = axr_x + axr_w + width_between_axr_axm ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2 ax2_w = axc_w # axcb - placement of the color legend # if colorbar_position == 'top left': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09] elif colorbar_position == 'right': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6] else: raise ValueError("'top left' or 'right' accepted for now") # COMPUTATION DENDOGRAM 1 ------------------------------------- if self.column_method: Y = self.linkage(self.frame.transpose(),self.column_method, self.column_metric ) ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True) Z = hierarchy.dendrogram(Y) ind2 = hierarchy.fcluster(Y, 0.7*max(Y[:,2]), self.cluster_criterion) ax2.set_xticks([]) ax2.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx2 = Z['leaves'] self.frame = self.frame.iloc[:,idx2] # reorder the flat cluster to match the order of the leaves the dendrogram ind2 = ind2[idx2] layout['dendogram2'] = ax2 else: idx2 = range(self.frame.shape[1]) # COMPUTATION DENDOGRAM 2 --------------------------------- if self.row_method: Y = self.linkage(self.frame, self.row_method, self.row_metric ) ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True) Z = hierarchy.dendrogram(Y, orientation='right') ind1 = hierarchy.fcluster(Y, 0.7*max(Y[:,2]), self.cluster_criterion) ax1.set_xticks([]) ax1.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx1 = Z['leaves'] self.frame = self.frame.iloc[idx1,:] # reorder the flat cluster to match the order of the leaves the dendrogram ind1 = ind1[idx1] layout['dendogram1'] = ax1 else: idx1 = range(self.frame.shape[0]) # HEATMAP itself axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h]) axm.imshow(self.frame, aspect='auto', origin='lower', interpolation='None', cmap=cmap, norm=norm) axm.set_xticks([]) axm.set_yticks([]) layout['heatmap'] = axm # TEXT new_row_header = [] new_column_header = [] for i in range(self.frame.shape[0]): axm.text(self.frame.shape[1]-0.5, i, ' ' + str(row_header[idx1[i]]), verticalalignment="center") new_row_header.append(row_header[idx1[i]] if self.row_method else row_header[i]) for i in range(self.frame.shape[1]): axm.text(i, -0.9, ' '+str(column_header[idx2[i]]), rotation=90, verticalalignment="top", horizontalalignment="center") new_column_header.append(column_header[idx2[i]] if self.column_method else column_header[i]) # CATEGORY column ------------------------------ if self.category_column: axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h]) cmap_c = matplotlib.colors.ListedColormap(self.params.col_side_colors) category_col = [self.category_column[self.df.columns[i]] for i in idx2] dc = np.array(category_col, dtype=int) dc.shape = (1,len(ind2)) axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c) axc.set_xticks([]) axc.set_yticks([]) layout['category_column'] = axc # CATEGORY row ------------------------------- if self.category_row: axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h]) # self.category_row must be a dictionary with names as found in the columns # of the dataframe. category_row = [self.category_row[self.df.columns[i]] for i in idx1] dr = np.array(category_row, dtype=int) dr.shape = (len(category_row),1) cmap_r = matplotlib.colors.ListedColormap(self.params.col_side_colors) axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r) axr.set_xticks([]) axr.set_yticks([]) layout['category_row'] = axr # COLORBAR ---------------------- if colorbar == True: axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h], frame_on=False) if colorbar_position == 'right': orientation = 'vertical' else: orientation = 'horizontal' cb = matplotlib.colorbar.ColorbarBase(axcb, cmap=cmap, norm=norm, orientation=orientation) #axcb.set_title("whatever") #max_cb_ticks = 5 #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks)) layout['colorbar'] = cb # could be useful self.d = {'ordered': self.frame.copy(), 'rorder': idx1, 'corder': idx2} return layout
def _create_report(self, onweb=True): # A summary table diag = self.report.diagnostics() table = HTMLTable(diag, 'summary') txt = '' for index, row in diag.iterrows(): if len(row.text) == 0 and len(row.value) == 0: txt += '----<br/>' else: txt += row.text + ": " + str(row.value) + "<br/>" self.jinja['summary'] = txt print('Creating volcano plots') # this can be pretty slow. so keep only 1000 most relevant # values and 1000 random ones to get an idea of the distribution v = VolcanoANOVA(self.report.df, settings=self.settings) v.selector(v.df, 1500, 1500, inplace=True) v.volcano_plot_all() v.savefig_and_js("volcano_all_js") self.jinja['volcano'] = """ <h3></h3> <a href="volcano_all_js.html"> <img alt="volcano plot for all associations" src="volcano_all_js.png"> </a> <br/> <p> A javascript version is available <a href="volcano_all_js.html">here</a> ( or click on the image).</p> """ # MANOVA link N = len(self.report.get_significant_set()) self.jinja['manova'] = """ There were %(N)s significant associations found. All significant associations have been gatherered in the following link: <br/><a href="manova.html">manova results</a>. """ % { 'N': N } # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') # drug summary #not_tested = [x for x in self.report.gdsc.drugIds if x not in # self.report.df.DRUG_ID.unique()] #if len(not_tested) > 0: # not_tested = """%s drugs were not analysed due to # lack of valid data points: """ % len(not_tested) + \ # ", ".join(not_tested) #else: # not_tested = "" not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID') table.df.columns = [ x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns ] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE') table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False) # -------------------------------------- COSMIC table for completeness colnames = self.report.gdsc.features._special_names df = self.report.gdsc.features.df[colnames] # TODO # add other columns if possible e.g., GDSC1, GDSC2, TCGA df = df.reset_index() table = HTMLTable(df) url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id=" table.add_href('COSMIC_ID', url=url, newtab=True) self.jinja['cosmic_table'] = table.to_html() # -------------------------------------- settings and INPUT files input_dir = self.directory + os.sep + 'INPUT' filename = 'ANOVA_input.csv' filename = os.sep.join([input_dir, filename]) self.report.gdsc.ic50.to_csv(filename) filename = os.sep.join(['INPUT', 'ANOVA_input.csv']) self.jinja['ic50_file'] = filename # the genomic features, which may be the default version # one provided by the user. It may have been changed gf_filename = os.sep.join([input_dir, 'genomic_features.csv']) self.report.gdsc.features.to_csv(gf_filename) html = """Saved <a href="INPUT/genomic_features.csv">Genomic Features</a> file<br/> (possibly the default version).""" self.jinja['gf_file'] = html # Always save DRUG_DECODE file even if empty # It may be be interpreted in other pipeline or for reproducibility output_filename = input_dir + os.sep + 'DRUG_DECODE.csv' self.report.drug_decode.to_csv(output_filename) html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>' if len(self.report.drug_decode) == 0: html += 'Note that DRUG_DECODE file was not provided (empty?).' self.jinja['drug_decode'] = html # Save settings as json file filename = os.sep.join([input_dir, 'settings.json']) self.settings.to_json(filename) filename = os.path.basename(filename) self.jinja['settings'] = \ """Get the settings as a <a href="INPUT/%s"> json file</a>.""" % filename # Save all Results dataframe filename = os.sep.join( [self.settings.directory, 'OUTPUT', 'results.csv']) ANOVAResults(self.report.df).to_csv(filename) code = """from gdsctools import * import os def getfile(filename, where='../INPUT'): return os.sep.join([where, filename]) # reback the IC50 and genomic features matrices gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'), getfile('DRUG_DECODE.csv')) gdsc.settings.from_json(getfile('settings.json')) gdsc.init() # Analyse the data results = gdsc.anova_all() # Create the HTML report r = ANOVAReport(gdsc, results) r.create_html_pages(onweb=False)""" code = code % { 'ic50': 'ANOVA_input.csv', 'gf_filename': 'genomic_features.csv' } filename = os.sep.join([self.settings.directory, 'code', 'rerun.py']) fh = open(filename, 'w') fh.write(code) fh.close()
def plot(self, num=1, cmap=None, colorbar=True, figsize=(12, 8), fontsize=None): """ Using as input:: df = pd.DataFrame({'A':[1,0,1,1], 'B':[.9,0.1,.6,1], 'C':[.5,.2,0,1], 'D':[.5,.2,0,1]}) .. plot:: :include-source: :width: 80% from sequana.viz import heatmap df = heatmap.get_heatmap_df() h = heatmap.Heatmap(df) h.category_row['A'] = 1 h.category_row['C'] = 1 h.category_row['D'] = 2 h.category_row['B'] = 2 h.plot() """ # save all parameters in a dict layout = {} if cmap is None: cmap = self.params.cmap try: cmap = colormap.cmap_builder(cmap) except: pass # keep track of row and column names for later. header = self.frame.index # FIXME something clever for the fontsize if len(header) > 100 or len(header) > 100: matplotlib.rcParams['font.size'] = 6 if len(header) > 50 or len(header) > 50: matplotlib.rcParams['font.size'] = 7 if len(header) > 30 or len(header) > 30: matplotlib.rcParams['font.size'] = 8 else: matplotlib.rcParams['font.size'] = 12 if fontsize: matplotlib.rcParams['font.size'] = fontsize # scaling min/max range # Scale the figure window size # fig = pylab.figure(num=num, figsize=figsize) fig.clf() Y = self.linkage(self.frame, self.method, self.metric) Z = hierarchy.dendrogram(Y, orientation='right', color_threshold=0, above_threshold_color="k", distance_sort="descending") ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) # apply the clustering for the array-dendrograms to the actual matrix data idx1 = Z['leaves'] # Rearrange the data frame in the order of the dendogram self.frame = self.frame.iloc[idx1, :] ticks = pylab.yticks()[0] pylab.yticks(ticks, self.frame.index) pylab.tight_layout() # reorder the flat cluster to match the order of the leaves the dendrogram ind1 = ind1[idx1] if self.category: gca = pylab.gca() X, Y = gca.get_position().get_points() f = pylab.gcf() ax = f.add_axes([X[0], X[1], 0.02, Y[1] - X[1]]) category = [self.category[x] for x in self.df.index] dr = np.array(category, dtype=int) dr.shape = (len(category), 1) cmap_r = matplotlib.colors.ListedColormap(self.params.side_colors) ax.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r) ax.set_xticks([]) ax.set_yticks([])
def add_features(self): # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = ["{}-{}".format(x, get_name(x)) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') if len(self.report.df) == 0: return # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID', url="associations/drug_", suffix=".html") table.df.columns = [x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE', url="associations/", suffix=".html") table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False)
def add_bgcolor(self, colname, cmap='copper', mode='absmax', threshold=2): """Change column content into HTML paragraph with background color :param colname: :param cmap: a colormap (matplotlib) or created using colormap package (from pypi). :param mode: type of normalisation in 'absmax', 'max', 'clip' (see details below) :param threshold: used if mode is set to 'clip' Colormap have values between 0 and 1 so we need to normalised the data between 0 and 1. There are 3 mode to normalise the data so far. If mode is set to 'absmax', negatives and positives values are expected to be found in a range from -inf to inf. Values are scaled in between [0,1] X' = (X / M +1) /2. where m is the absolute maximum. Ideally a colormap should be made of 3 colors, the first color used for negative values, the second for zeros and third color for positive values. If mode is set to 'clip', values are clipped to a max value (parameter *threshold* and values are normalised by that same threshold. If mode is set to 'max', values are normalised by the max. """ try: # if a cmap is provided, it may be just a known cmap name cmap = cmap_builder(cmap) except: pass data = self.df[colname].values if len(data) == 0: return if mode == 'clip': data = [min(x, threshold)/float(threshold) for x in data] elif mode == 'absmax': m = abs(data.min()) M = abs(data.max()) M = max([m, M]) if M != 0: data = (data / M + 1)/2. elif mode == 'max': if data.max() != 0: data = data / float(data.max()) # the expected RGB values for a given data point rgbcolors = [cmap(x)[0:3] for x in data] hexcolors = [rgb2hex(*x, normalised=True) for x in rgbcolors] # need to read original data again data = self.df[colname].values # need to set precision since this is going to be a text not a number # so pandas will not use the precision for those cases: def prec(x): try: # this may fail if for instance x is nan or inf x = easydev.precision(x, self.pd_options['precision']) return x except: return x data = [prec(x) for x in data] html_formatter = '<p style="background-color:{0}">{1}</p>' self.df[colname] = [html_formatter.format(x, y) for x, y in zip(hexcolors, data)]
def plot(self, fig=None, grid=True, rotation=30, lower=None, upper=None, shrink=0.9, axisbg='white', colorbar=True, label_color='black', fontsize='small', edgecolor='black', method='ellipse', order_method='complete', order_metric='euclidean', cmap=None, ax=None, binarise_color=False): """plot the correlation matrix from the content of :attr:`df` (dataframe) By default, the correlation is shown on the upper and lower triangle and is symmetric wrt to the diagonal. The symbols are ellipses. The symbols can be changed to e.g. rectangle. The symbols are shown on upper and lower sides but you could choose a symbol for the upper side and another for the lower side using the **lower** and **upper** parameters. :param fig: Create a new figure by default. If an instance of an existing figure is provided, the corrplot is overlayed on the figure provided. Can also be the number of the figure. :param grid: add grid (Defaults to grey color). You can set it to False or a color. :param rotation: rotate labels on y-axis :param lower: if set to a valid method, plots the data on the lower left triangle :param upper: if set to a valid method, plots the data on the upper left triangle :param float shrink: maximum space used (in percent) by a symbol. If negative values are provided, the absolute value is taken. If greater than 1, the symbols wiill overlap. :param axisbg: color of the background (defaults to white). :param colorbar: add the colorbar (defaults to True). :param str label_color: (defaults to black). :param fontsize: size of the fonts defaults to 'small'. :param method: shape to be used in 'ellipse', 'square', 'rectangle', 'color', 'text', 'circle', 'number', 'pie'. :param order_method: see :meth:`order`. :param order_metric: see : meth:`order`. :param cmap: a valid cmap from matplotlib or colormap package (e.g., 'jet', or 'copper'). Default is red/white/blue colors. :param ax: a matplotlib axes. The colorbar can be tuned with the parameters stored in :attr:`params`. Here is an example. See notebook for other examples:: c = corrplot.Corrplot(dataframe) c.plot(cmap=('Orange', 'white', 'green')) c.plot(method='circle') c.plot(colorbar=False, shrink=.8, upper='circle' ) """ # default if cmap != None: try: if isinstance(cmap, str): self.cm = cmap_builder(cmap) else: self.cm = cmap_builder(*cmap) except: print("incorrect cmap. Use default one") self._set_default_cmap() else: self._set_default_cmap() self.shrink = abs(shrink) self.fontsize = fontsize self.edgecolor = edgecolor df = self.order(method=order_method, metric=order_metric) # figure can be a number or an instance; otherwise creates it if isinstance(fig, int): fig = plt.figure(num=fig, facecolor=axisbg) elif fig is not None: fig = plt.figure(num=fig.number, facecolor=axisbg) else: fig = plt.figure(num=None, facecolor=axisbg) # do we have an axes to plot the data in ? if ax is None: ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg) else: # if so, clear the axes. Colorbar cannot be removed easily. plt.sca(ax) ax.clear() # subplot resets the bg color, let us set it again fig.set_facecolor(axisbg) width, height = df.shape labels = (df.columns) # add all patches to the figure # TODO check value of lower and upper if upper is None and lower is None: mode = 'method' diagonal = True elif upper and lower: mode = 'both' diagonal = False elif lower is not None: mode = 'lower' diagonal = True elif upper is not None: mode = 'upper' diagonal = True self.binarise_color = binarise_color if mode == 'upper': self._add_patches(df, upper, 'upper', ax, diagonal=True) elif mode == 'lower': self._add_patches(df, lower, 'lower', ax, diagonal=True) elif mode == 'method': self._add_patches(df, method, 'both', ax, diagonal=True) elif mode == 'both': self._add_patches(df, upper, 'upper', ax, diagonal=False) self._add_patches(df, lower, 'lower', ax, diagonal=False) # shift the limits to englobe the patches correctly ax.set_xlim(-0.5, width - .5) ax.set_ylim(-0.5, height - .5) # set xticks/xlabels on top ax.xaxis.tick_top() xtickslocs = np.arange(len(labels)) ax.set_xticks(xtickslocs) ax.set_xticklabels(labels, rotation=rotation, color=label_color, fontsize=fontsize, ha='left') ax.invert_yaxis() ytickslocs = np.arange(len(labels)) ax.set_yticks(ytickslocs) ax.set_yticklabels(labels, fontsize=fontsize, color=label_color) plt.tight_layout() if grid is not False: if grid is True: grid = 'grey' for i in range(0, width): ratio1 = float(i) / width ratio2 = float(i + 2) / width # TODO 1- set axis off # 2 - set xlabels along the diagonal # set colorbar either on left or bottom if mode == 'lower': plt.axvline(i + .5, ymin=1 - ratio1, ymax=0., color=grid) plt.axhline(i + .5, xmin=0, xmax=ratio2, color=grid) if mode == 'upper': plt.axvline(i + .5, ymin=1 - ratio2, ymax=1, color=grid) plt.axhline(i + .5, xmin=ratio1, xmax=1, color=grid) if mode in ['method', 'both']: plt.axvline(i + .5, color=grid) plt.axhline(i + .5, color=grid) # can probably be simplified if mode == 'lower': plt.axvline(-.5, ymin=0, ymax=1, color='grey') plt.axvline(width - .5, ymin=0, ymax=1. / width, color='grey', lw=2) plt.axhline(width - .5, xmin=0, xmax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1. / width, color='grey', lw=2) plt.xticks([]) for i in range(0, width): plt.text(i, i - .6, labels[i], fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.text(-.6, i, labels[i], fontsize=fontsize, color=label_color, rotation=0, horizontalalignment='right') plt.axis('off') # can probably be simplified elif mode == 'upper': plt.axvline(width - .5, ymin=0, ymax=1, color='grey', lw=2) plt.axvline(-.5, ymin=1 - 1. / width, ymax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1, color='grey', lw=2) plt.axhline(width - .5, xmin=1 - 1. / width, xmax=1, color='grey', lw=2) plt.yticks([]) for i in range(0, width): plt.text(-.6 + i, i, labels[i], fontsize=fontsize, color=label_color, horizontalalignment='right', rotation=0) plt.text(i, -.5, labels[i], fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.axis('off') # set all ticks length to zero ax = plt.gca() ax.tick_params(axis='both', which='both', length=0) if colorbar: from mpl_toolkits.axes_grid1 import make_axes_locatable divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.00) N = self.params['colorbar.N'] + 1 assert N >= 2 cb = plt.gcf().colorbar( self.collection, cax=cax, orientation=self.params['colorbar.orientation'], # shrink=self.params['colorbar.shrink'], boundaries=np.linspace(0, 1, N), ticks=[0, .25, 0.5, 0.75, 1]) cb.ax.set_yticklabels([-1, -.5, 0, .5, 1]) cb.set_clim( 0, 1 ) # make sure it goes from -1 to 1 even though actual values may not reach that range