def plot(df, idx, weights, mappers=None, cols=4): """ Plot bar plots for all columns in the DataFrame. Parameters ---------- df : pd.DataFrame A pandas DataFrame object with the data. idx : iterable An iterable containing the indices of selected participants. weights : optional, dict {column: float} Weighting over dataframe columns By default, a uniform weighting is used mappers : optional, dict {column: entrofy.BaseMapper} Dictionary mapping dataframe columns to BaseMapper objects Returns ------- fig_all : list of matplotlib.Figure objects The list containing all Figure objects with the plots. """ if mappers is None: mappers = construct_mappers(df, weights) columns = list(mappers.keys()) ncolumns = len(columns) rows = np.floor(ncolumns/cols) if (ncolumns % cols) > 0: rows += 1 fig = plt.figure(figsize=(4*cols, 3*rows)) axes = [] for i,c in enumerate(columns): ax = fig.add_subplot(rows, cols, i+1) ax, _ = plot_fractions(df[c], idx, c, mappers[c], ax=ax) axes.append(ax) plt.tight_layout() return axes
def plot_triangle(df, weights, mappers=None, cmap="YlGnBu", bins=30, prefac=10., cat_type="box", cont_type="hist", fig=None): """ Make a triangle plot of all the relevant columns in the DataFrame. Parameters ---------- df : pd.DataFrame A pandas DataFrame with the data weights : optional, dict {column: float} Weighting over dataframe columns By default, a uniform weighting is used mappers : optional, dict {column: entrofy.BaseMapper} Dictionary mapping dataframe columns to BaseMapper objects cmap : matplotlib.cm.colormap A matplotlib colormap to use for shading the bubbles bins : int The number of bins for the histogram. prefac : float A pre-factor steering the shading of the bubbles cat_type : {"box" | "strip" | "swarm" | "violin" | "categorical"} The type of plot for any plot including both categorical and continuous data. cont_type : {"kde" | "scatter"} The type of plot to produce. Either a kernel density estimate ("kde") or a scatter plor ("scatter"). fig : matplotlib.Figure object A Figure object to plot in. Returns ------- fig : matplotlib.Figure object The Figure object axes : list A list of matplotlib.Axes objects """ # if mappers are None, construct them with some default settings if mappers is None: mappers = construct_mappers(df, weights) # the keys keys = np.sort(list(mappers.keys())) # the number of panels I'll need nkeys = len(keys) # determine the types: all_types = [] for k in keys: if isinstance(mappers[k], ObjectMapper): all_types.append("categorical") elif isinstance(mappers[k], ContinuousMapper): all_types.append("continuous") else: raise Exception("Data type not recognized!") # construct the figure if fig is None: fig, axes = plt.subplots(nkeys, nkeys, figsize=(4*nkeys, 3*nkeys)) else: axes = [] k = 1 for i in range(nkeys): for j in range(nkeys): axes.append(fig.add_subplot(nkeys, nkeys, k)) k += 1 axes = np.array(axes).reshape((nkeys, nkeys)) for i, kx in enumerate(keys): for j, ky in enumerate(keys): xtype = all_types[i] ytype = all_types[j] # upper triangle: print white space if i < j: axes[i,j].spines['right'].set_visible(False) axes[i,j].spines['top'].set_visible(False) axes[i,j].spines['left'].set_visible(False) axes[i,j].spines['bottom'].set_visible(False) axes[i,j].set_axis_bgcolor('white') axes[i,j].set_xlabel("") axes[i,j].set_ylabel("") axes[i,j].axis('off') continue # diagonal: plot the univariate distribution elif i == j: axes[i,j] = plot_distribution(df, kx, xmapper=mappers[kx], xtype=xtype, ax=axes[i,j], cmap=cmap, bins=bins) # upper triangle: plot the bivariate distributions else: axes[i,j] = plot_correlation(df, ky, kx, xmapper=mappers[ky], ymapper=mappers[kx], ax=axes[i,j], cmap=cmap, xtype=ytype, ytype=xtype, prefac=prefac, cat_type=cat_type, cont_type=cont_type) if i < nkeys - 1: axes[i,j].set_xticklabels([]) axes[i,j].set_xlabel("") else: [l.set_rotation(45) for l in axes[i,j].get_xticklabels()] if j > 0: axes[i,j].set_yticklabels([]) if i != j: axes[i,j].set_ylabel("") plt.tight_layout() return fig, axes