def test_rotate_axis0_input(self): kws = self.default_kws.copy() kws['rotate'] = True kws['axis'] = 0 p = mat._DendrogramPlotter(self.df_norm.T, **kws) npt.assert_array_equal(p.reordered_ind, self.x_norm_leaves)
def test_fastcluster_other_method(self): import fastcluster kws = self.default_kws.copy() kws['method'] = 'average' linkage = fastcluster.linkage(self.x_norm.T, method='average', metric='euclidean') p = mat._DendrogramPlotter(self.x_norm, **kws) npt.assert_array_equal(p.linkage, linkage)
def test_label_false(self): kws = self.default_kws.copy() kws['label'] = False p = mat._DendrogramPlotter(self.df_norm, **kws) assert p.xticks == [] assert p.yticks == [] assert p.xticklabels == [] assert p.yticklabels == [] assert p.xlabel == "" assert p.ylabel == ""
def test_rotate_input(self): kws = self.default_kws.copy() kws['rotate'] = True p = mat._DendrogramPlotter(self.df_norm, **kws) npt.assert_array_equal(p.array.T, np.asarray(self.df_norm)) pdt.assert_frame_equal(p.data.T, self.df_norm) npt.assert_array_equal(p.xticklabels, []) npt.assert_array_equal(p.yticklabels, self.df_norm_leaves) assert p.xlabel == '' assert p.ylabel == 'letters'
def test_linkage_scipy(self): p = mat._DendrogramPlotter(self.x_norm, **self.default_kws) scipy_linkage = p._calculate_linkage_scipy() from scipy.spatial import distance from scipy.cluster import hierarchy dists = distance.pdist(self.x_norm.T, metric=self.default_kws['metric']) linkage = hierarchy.linkage(dists, method=self.default_kws['method']) npt.assert_array_equal(scipy_linkage, linkage)
def test_df_input(self): p = mat._DendrogramPlotter(self.df_norm, **self.default_kws) npt.assert_array_equal(p.array.T, np.asarray(self.df_norm)) pdt.assert_frame_equal(p.data.T, self.df_norm) npt.assert_array_equal(p.linkage, self.x_norm_linkage) assert p.dendrogram == self.x_norm_dendrogram npt.assert_array_equal( p.xticklabels, np.asarray(self.df_norm.columns)[self.x_norm_leaves]) npt.assert_array_equal(p.yticklabels, []) assert p.xlabel == 'letters' assert p.ylabel == ''
def test_ndarray_input(self): p = mat._DendrogramPlotter(self.x_norm, **self.default_kws) npt.assert_array_equal(p.array.T, self.x_norm) pdt.assert_frame_equal(p.data.T, pd.DataFrame(self.x_norm)) npt.assert_array_equal(p.linkage, self.x_norm_linkage) assert p.dendrogram == self.x_norm_dendrogram npt.assert_array_equal(p.reordered_ind, self.x_norm_leaves) npt.assert_array_equal(p.xticklabels, self.x_norm_leaves) npt.assert_array_equal(p.yticklabels, []) assert p.xlabel is None assert p.ylabel == ''
def test_axis0_input(self): kws = self.default_kws.copy() kws['axis'] = 0 p = mat._DendrogramPlotter(self.df_norm.T, **kws) npt.assert_array_equal(p.array, np.asarray(self.df_norm.T)) pdt.assert_frame_equal(p.data, self.df_norm.T) npt.assert_array_equal(p.linkage, self.x_norm_linkage) assert p.dendrogram == self.x_norm_dendrogram npt.assert_array_equal(p.xticklabels, self.df_norm_leaves) npt.assert_array_equal(p.yticklabels, []) assert p.xlabel == 'letters' assert p.ylabel == ''
def test_df_multindex_input(self): df = self.df_norm.copy() index = pd.MultiIndex.from_tuples([("A", 1), ("B", 2), ("C", 3), ("D", 4)], names=["letter", "number"]) index.name = "letter-number" df.index = index kws = self.default_kws.copy() kws['label'] = True p = mat._DendrogramPlotter(df.T, **kws) xticklabels = ["A-1", "B-2", "C-3", "D-4"] xticklabels = [xticklabels[i] for i in p.reordered_ind] npt.assert_array_equal(p.xticklabels, xticklabels) npt.assert_array_equal(p.yticklabels, []) assert p.xlabel == "letter-number"
def test_custom_linkage(self): kws = self.default_kws.copy() try: import fastcluster linkage = fastcluster.linkage_vector(self.x_norm, method='single', metric='euclidean') except ImportError: d = distance.pdist(self.x_norm, metric='euclidean') linkage = hierarchy.linkage(d, method='single') dendrogram = hierarchy.dendrogram(linkage, no_plot=True, color_threshold=-np.inf) kws['linkage'] = linkage p = mat._DendrogramPlotter(self.df_norm, **kws) npt.assert_array_equal(p.linkage, linkage) assert p.dendrogram == dendrogram
def heatmap(df, corr_types=None, map_type='zaric', ax=None, face_color=None, annot=None, cbar=True, cbar_kws=None, mask=None, row_cluster=False, row_cluster_metric='euclidean', row_cluster_method='average', row_cluster_linkage=None, col_cluster=False, col_cluster_metric='euclidean', col_cluster_method='average', col_cluster_linkage=None, **kwargs): """ Plots a heatmap. Arguments: df: The dataframe to plot. corr_types: Optionally specify correlation type using a dataframe of CorrType enums for each entry (can be obtained from the corr function). When specified, numeric correlations are plotted using different markers. map_type: One of 'zaric', 'standard', 'dendrograms': * 'zaric' (default): a special heatmap, where magnitude is indicated by size of the elements as well as their colour. * 'standard': a standard heatmap plotted using sns.heatmap; * 'dendrograms': a heatmap with dendrograms, using sns.clustermap. ax: The matplotlib axis to use for the plotting (not supported for map_type 'dendrograms'). annot: Whether to also annotate the squares with numbers (defaults to True for map_type 'standard' and 'dendrograms'; for 'zaric' annotations are currently not displayed). cbar: Whether to include a colorbar. cbar_kws: Additional kwargs to use when plotting the colorbar. mask: An array or a dataframe that indicates whether a value should be masked out (True) or displayed (False). row_cluster: Whether to use hierarchical clustering to reorder the rows. row_cluster_metric: The metric to use for clustering the rows (see _DendrogramPlotter in seaborn.matrix). row_cluster_method: The method to use for clustering the rows (see _DendrogramPlotter in seaborn.matrix). row_cluster_linkage: The linkage to use for clustering the rows (see _DendrogramPlotter in seaborn.matrix). col_cluster: Whether to use hierarchical clustering to reorder the cols. col_cluster_metric: The metric to use for clustering the columns (see _DendrogramPlotter in seaborn.matrix). col_cluster_method: The method to use for clustering the columns (see _DendrogramPlotter in seaborn.matrix). col_cluster_linkage: The linkage to use for clustering the columns (see _DendrogramPlotter in seaborn.matrix). square: Whether equal aspect ratio should be used for the axes or not (defaults to True). **kwargs: Any remaining kwargs are passed to the plotting function. """ if map_type == 'dendrograms': if not ax is None: raise ValueError("Argument 'ax' is not supported for map_type == 'dendrograms'.") else: if ax is None: ax = plt.gca() if not mask is None: mask = np.asarray(mask) if not corr_types is None: corr_types = np.asarray(corr_types) if row_cluster and not map_type == 'dendrograms': row_ind = _DendrogramPlotter( df, axis=0, metric=row_cluster_metric, method=row_cluster_method, linkage=row_cluster_linkage, label=False, rotate=False ).reordered_ind df = df.reindex(df.index[row_ind]) if not mask is None: mask = mask[row_ind, :] if not corr_types is None: corr_types = corr_types[row_ind, :] if col_cluster and not map_type == 'dendrograms': col_ind = _DendrogramPlotter( df, axis=1, metric=col_cluster_metric, method=col_cluster_method, linkage=col_cluster_linkage, label=False, rotate=False ).reordered_ind df = df.reindex(df.columns[col_ind], axis=1) if not mask is None: mask = mask[:, col_ind] if not corr_types is None: corr_types = corr_types[:, col_ind] if map_type == "zaric": l = np.asarray(list(itertools.product(df.index, df.columns))) x = l[:, 0] y = l[:, 1] v = df.values.reshape(-1) m = mask.reshape(-1) if not mask is None else None circ = np.zeros(len(x)) if not corr_types is None: circ[np.where(corr_types.reshape(-1) == CorrType.num_vs_num)] = True default_kwargs = dict( color=v, size=np.abs(v), circular=circ ) default_kwargs.update(**kwargs) kwargs = default_kwargs _zaric_heatmap( x, y, ax=ax, face_color=face_color, cbar=cbar, cbar_kws=cbar_kws, mask=m, x_order=df.columns, y_order=df.index, **kwargs ) ax.set_xlabel(df.columns.name) ax.set_ylabel(df.index.name) elif map_type == 'standard' or map_type == 'dendrograms': if annot is None: annot = True if face_color is None: face_color = 'black' default_kwargs = dict(center=0, square=True, linewidths=1, annot=annot) default_kwargs.update(**kwargs) kwargs = default_kwargs if map_type == 'dendrograms': del kwargs['square'] sns.clustermap(df, cbar=cbar, cbar_kws=cbar_kws, mask=mask, **kwargs) else: sns.heatmap(df, ax=ax, cbar=cbar, cbar_kws=cbar_kws, mask=mask, **kwargs) if ax is None: ax = plt.gcf().axes[2] ax.set_facecolor(face_color) ax.xaxis.set_tick_params(rotation=45) plt.setp(ax.get_xticklabels(), rotation_mode="anchor", horizontalalignment="right") else: raise ValueError("Unknown map_type '{}'.".format(map_type))