def plot_activity_matrix(df, cmap, normalized=False, annotate=True, out_path='', title=''): """ Plot activity matrix showing area of land transitioning between land-use types :param df: :param cmap: :param normalized: :param annotate: :param out_path: :param title: :return: """ logger.info('Plot activity matrix') sns.set(font_scale=0.8) formatter = tkr.ScalarFormatter(useMathText=True) # normalized scale is from 0 - 100, does not need scientific scale if not normalized: formatter.set_scientific(True) formatter.set_powerlimits((-2, 2)) df = df * 100.0 if normalized else df * 1.0 vmin = math.ceil(np.nanmin(df)) vmax = math.ceil(np.nanmax(df)) # maximum value on colorbar ax = sns.heatmap(df, cbar_kws={'format': formatter}, cmap=cmap, linewidths=.5, linecolor='lightgray', annot=annotate, fmt='.2g', annot_kws={'size': 6}, vmin=vmin, vmax=vmax) # for annotation of heat map cells, use: annot=True, fmt='g', annot_kws={'size': 6} # ax.invert_yaxis() ax.set_ylabel('FROM') ax.set_xlabel('TO') ax.set_title(title) locs, labels = plt.xticks() plt.setp(labels, rotation=0) locs, labels = plt.yticks() plt.setp(labels, rotation=0) plt.savefig(out_path, dpi=constants.DPI) plt.close() # revert matplotlib params sns.reset_orig() set_matplotlib_params() get_colors(palette='tableau')
def plot_qq(clf, X, y, figsize=(7, 7)): """Generate a Q-Q plot (a.k.a. normal quantile plot). Parameters ---------- clf : sklearn.linear_model A scikit-learn linear model classifier with a `predict()` method. X : numpy.ndarray Training data used to fit the classifier. y : numpy.ndarray Target training values, of shape = [n_samples]. figsize : tuple A tuple indicating the size of the plot to be created, with format (x-axis, y-axis). Defaults to (7, 7). Returns ------- matplotlib.figure.Figure The Figure instance. """ # Ensure we only plot residuals using classifiers we have tested assert isinstance(clf, _utils.supported_linear_models), ( "Classifiers of type {0} not currently supported.".format(type(clf))) residuals = stats.residuals(clf, X, y, r_type='raw') prob_plot = sm.ProbPlot(residuals, scipy.stats.t, fit=True) # Set plot style sns.set_style("darkgrid") sns.set(font_scale=1.2) # Generate plot try: # Q-Q plot doesn't respond to figure size, so prep a figure first fig, ax = plt.subplots(figsize=figsize) prob_plot.qqplot(line='45', ax=ax) plt.title("Normal Quantile Plot") plt.xlabel("Theoretical Standardized Residuals") plt.ylabel("Actual Standardized Residuals") plt.show() except: raise # Re-raise the exception finally: sns.reset_orig() return fig
def plots(): ''' Plots results from csv table into 4 figures, 1x2 subplot each''' sns.reset_orig() data1 = mlines.Line2D([], [], color='grey', marker='*', label="Field", linestyle='', markersize=12) data2 = mlines.Line2D([], [], color='k', marker='*', label="Standard", linestyle='', markersize=12) data3 = mlines.Line2D( [], [], color='#0000b3', marker='o', label="Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$", linestyle='', markersize=8) data4 = mlines.Line2D([], [], color='#0080ff', marker='o', label="Bluer than avg $(\Delta J-K_{s})< 2 \sigma$", linestyle='', markersize=8) data5 = mlines.Line2D( [], [], color='#b30000', marker='o', label="Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$", linestyle='', markersize=8) data6 = mlines.Line2D([], [], color='#ff5600', marker='o', label="Redder than avg $(\Delta J-K_{s})< 2 \sigma$", linestyle='', markersize=8) data7 = mlines.Line2D([], [], color='white', marker='^', label="Young or Subdwarf", linestyle='', markersize=8) #LMIN/LMAX vs J-K plt.figure(figsize=(13, 9)) for n in range(len(names)): plt.subplots_adjust(hspace=0.001) ax1 = plt.subplot(211) plt.errorbar(JK_dev[n], lmin[n], xerr=JK_dev_unc[n], yerr=lmin_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2) plt.scatter(JK_dev[n], lmin[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n]) plt.xlabel("$J-K-(J-K_{s})_{avg}$") plt.ylabel("Local Mininimum ($\lambda$)") plt.legend((data1, data2, data3, data4, data5, data6), ("Field", "Standard", "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$", "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$", "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$", "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"), fontsize=11, loc=3, numpoints=1) #bbox_to_anchor=(.9, 1.25) #bbox_to_anchor=(.9, 1.25) ax2 = plt.subplot(212, sharex=ax1) plt.errorbar(JK_dev[n], lmax[n], xerr=JK_dev_unc[n], yerr=lmax_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2) plt.scatter(JK_dev[n], lmax[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n]) plt.xlabel("$J-K-(J-K_{s})_{avg}$") plt.ylabel("Local Maximum ($\lambda$)") plt.xlim(-1, .8) plt.ylim(1.23, 1.33) plt.legend((data7), ("Young or Subdwarf"), fontsize=11, loc=4, numpoints=1) plt.setp(ax1.get_xticklabels(), visible=False) #LMIN/LMAX vs H-K plt.figure(figsize=(13, 9)) for n in range(len(names)): plt.subplots_adjust(hspace=0.001) ax3 = plt.subplot(211) plt.errorbar(HK_dev[n], lmin[n], xerr=HK_dev_unc[n], yerr=lmin_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2, zorder=-1) plt.scatter(HK_dev[n], lmin[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n], zorder=1) plt.xlabel("$H-K-(H-K_{s})_{avg}$") plt.ylabel("Local Mininimum ($\lambda$)") plt.ylim(1.145, 1.195) ax4 = plt.subplot(212, sharex=ax3) plt.errorbar(HK_dev[n], lmax[n], xerr=HK_dev_unc[n], yerr=lmax_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2, zorder=-1) plt.scatter(HK_dev[n], lmax[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n], zorder=1) plt.xlabel("$H-K-(H-K_{s})_{avg}$") plt.ylabel("Local Maximum ($\lambda$)") plt.ylim(1.24, 1.315) plt.xlim(-.4, .7) plt.legend((data1, data2, data3, data4, data5, data6), ("Field", "Standard", "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$", "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$", "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$", "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"), fontsize=11, loc=3, numpoints=1) plt.legend((data7), ("Young or Subdwarf"), fontsize=11, loc=4, numpoints=1) plt.setp(ax3.get_xticklabels(), visible=False) #LMIN/LMAX vs J-H plt.figure(figsize=(13, 9)) for n in range(len(names)): plt.subplots_adjust(hspace=0.001) ax5 = plt.subplot(211) plt.errorbar(JH_dev[n], lmin[n], xerr=JH_dev_unc[n], yerr=lmin_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2, zorder=-1) plt.scatter(JH_dev[n], lmin[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n], zorder=1) plt.xlabel("$J-H-(J-H)_{avg}$") plt.ylabel("Local Mininimum ($\lambda$)") plt.ylim(1.145, 1.195) plt.legend((data1, data2, data3, data4, data5, data6), ("Field", "Standard", "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$", "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$", "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$", "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"), fontsize=11, loc=4, numpoints=1) plt.legend((data7), ("Young or Subdwarf"), fontsize=11, loc=3, numpoints=1) ax6 = plt.subplot(212, sharex=ax5) plt.errorbar(JH_dev[n], lmax[n], xerr=JH_dev_unc[n], yerr=lmax_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2, zorder=-1) plt.scatter(JH_dev[n], lmax[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n], zorder=1) plt.xlabel("$J-H-(J-H)_{avg}$") plt.ylabel("Local Maximum ($\lambda$)") plt.ylim(1.24, 1.325) plt.xlim(-1, 1.5) plt.setp(ax5.get_xticklabels(), visible=False) plt.figure(figsize=(13, 9)) for n in range(len(names)): plt.subplots_adjust(hspace=0.001) P1 = plt.subplot(211) plt.errorbar(opt_spt[n], lmin[n], yerr=lmin_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2, zorder=-1) plt.scatter(opt_spt[n], lmin[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n], zorder=1) plt.xlabel("Spectral Type") plt.ylabel("Local Mininimum ($\lambda$)") plt.xticks(np.arange(9, 20, 1)) labels = [ '', 'L0', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7', 'L8', 'L9' ] P1.set_xticklabels(labels) plt.ylim(1.14, 1.2) plt.legend((data1, data2, data3, data4, data5, data6), ("Field", "Standard", "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$", "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$", "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$", "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"), fontsize=11, loc=3, numpoints=1) plt.legend((data7), ("Young or Subdwarf"), fontsize=11, loc=4, numpoints=1) P2 = plt.subplot(212, sharex=P1) plt.errorbar(opt_spt[n], lmax[n], yerr=lmax_unc[n], fmt='none', alpha=0.5, linestyle='None', ecolor='k', elinewidth=2, zorder=-1) plt.scatter(opt_spt[n], lmax[n], alpha=0.9, s=marker_size[n], c=color_value[n], marker=marker_value[n], zorder=1) plt.xlabel("Spectral Type") plt.ylabel("Local Maximum ($\lambda$)") #plt.xticks(np.arange(9,20,1)) labels = [ '', 'L0', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7', 'L8', 'L9' ] P2.set_xticklabels(labels) plt.ylim(1.23, 1.325) plt.setp(P1.get_xticklabels(), visible=False)
def plot_scree(clf_pca, xlim=[-1, 10], ylim=[-0.1, 1.0], required_var=0.90, figsize=(10, 5)): """Create side-by-side scree plots for analyzing variance of principal components from PCA. Parameters ---------- clf_pca : sklearn.decomposition.PCA A fitted scikit-learn PCA model. xlim : list X-axis range. If `required_var` is supplied, the maximum x-axis value will automatically be set so that the required variance line is visible on the plot. Defaults to [-1, 10]. ylim : list Y-axis range. Defaults to [-0.1, 1.0]. required_var : float, int, None A value of variance to distinguish on the scree plot. Set to None to not include on the plot. Defaults to 0.90. figsize : tuple A tuple indicating the size of the plot to be created, with format (x-axis, y-axis). Defaults to (10, 5). Returns ------- matplotlib.figure.Figure The Figure instance. """ # Ensure we have the a PCA model assert isinstance(clf_pca, decomposition.PCA), ( "Models of type {0} are not supported. Only models of type " "sklearn.decomposition.PCA are supported.".format(type(clf_pca))) # Extract variances from the model variances = clf_pca.explained_variance_ratio_ # Set plot style and scale up font size sns.set_style("whitegrid") sns.set(font_scale=1.2) # Set up figure and generate subplots try: fig = plt.figure('scree', figsize=figsize) # First plot (in subplot) plt.subplot(1, 2, 1) plt.xlabel("Component Number") plt.ylabel("Proportion of Variance Explained") plt.xlim(xlim) plt.ylim(ylim) plt.plot(variances, marker='o', linestyle='--') # Second plot (in subplot) cumsum = np.cumsum(variances) # Cumulative sum of variances explained plt.subplot(1, 2, 2) plt.xlabel("Number of Components") plt.ylabel("Proportion of Variance Explained") plt.xlim(xlim) plt.ylim(ylim) plt.plot(cumsum, marker='o', linestyle='--') # Add marker for required variance line if required_var is not None: required_var_components = np.argmax(cumsum >= required_var) + 1 # Update xlim if it is too small to see the marker if xlim[1] <= required_var_components: plt.xlim([xlim[0], required_var_components + 1]) # Add the marker and legend to the plot plt.axvline(x=required_var_components, c='r', linestyle='dashed', label="> {0:.0f}% Var. Explained: {1} " "components".format(required_var * 100, required_var_components)) legend = plt.legend(loc='lower right', frameon=True) legend.get_frame().set_facecolor('#FFFFFF') plt.show() except: raise # Re-raise the exception finally: sns.reset_orig() return fig
def plot_pca_pairs(clf_pca, x_train, y=None, n_components=3, diag='kde', cmap=None, figsize=(10, 10)): """ Create pairwise plots of principal components from x data. Colors the components according to the `y` values. Parameters ---------- clf_pca : sklearn.decomposition.PCA A fitted scikit-learn PCA model. x_train : numpy.ndarray Training data used to fit `clf_pca`, either scaled or un-scaled, depending on how `clf_pca` was fit. y : numpy.ndarray Target training values, of shape = [n_samples]. n_components: int Desired number of principal components to plot. Defaults to 3. diag : str Type of plot to display on the diagonals. Default is 'kde'. * 'kde': density curves * 'hist': histograms cmap : str A string representation of a Seaborn color map. See available maps: https://stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes. figsize : tuple A tuple indicating the size of the plot to be created, with format (x-axis, y-axis). Defaults to (10, 10). Returns ------- matplotlib.figure.Figure The Figure instance. """ if y is not None: assert y.shape[0] == x_train.shape[0], ( "Dimensions of y {0} do not match dimensions of x_train {1}". format(y.shape[0], x_train.shape[0])) # Obtain the projections of x_train x_projection = clf_pca.transform(x_train) # Create a data frame to hold the projections of n_components PCs col_names = ["PC{0}".format(i + 1) for i in range(n_components)] df = pd.DataFrame(x_projection[:, 0:n_components], columns=col_names) # Generate the plot cmap = "Greys" if cmap is None else cmap color = "#55A969" if y is None else y sns.set_style("white", {"axes.linewidth": "0.8", "image.cmap": cmap}) sns.set_context("notebook") try: # Create figure instance with subplot and populate the subplot with # the scatter matrix. You need to do this so you can access the figure # properties later to increase distance between subplots. If you don't, # Pandas will create its own figure with a tight layout. fig = plt.figure(figsize=figsize) ax = fig.add_subplot(1, 1, 1) from pandas.tools.plotting import scatter_matrix axes = scatter_matrix(df, ax=ax, alpha=0.7, figsize=figsize, diagonal=diag, marker='o', c=color, density_kwds={'c': '#6283B9'}, hist_kwds={ 'facecolor': '#5A76A4', 'edgecolor': '#3D3D3D' }) # Increase space between subplots fig.subplots_adjust(hspace=0.1, wspace=0.1) # Loop through subplots and remove top and right axes axes_unwound = np.ravel(axes) for i in range(axes_unwound.shape[0]): ax = axes_unwound[i] ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.show() except: raise # Re-raise the exception else: sns.reset_orig() return fig finally: sns.reset_orig()
def plot_residuals(clf, X, y, r_type='standardized', figsize=(10, 8)): """Plot residuals of a linear model. Parameters ---------- clf : sklearn.linear_model A scikit-learn linear model classifier with a `predict()` method. X : numpy.ndarray Training data used to fit the classifier. y : numpy.ndarray Target training values, of shape = [n_samples]. r_type : str Type of residuals to return: 'raw', 'standardized', 'studentized'. Defaults to 'standardized'. * 'raw' will return the raw residuals. * 'standardized' will return the standardized residuals, also known as internally studentized residuals, which is calculated as the residuals divided by the square root of MSE (or the STD of the residuals). * 'studentized' will return the externally studentized residuals, which is calculated as the raw residuals divided by sqrt(LOO-MSE * (1 - leverage_score)). figsize : tuple A tuple indicating the size of the plot to be created, with format (x-axis, y-axis). Defaults to (10, 8). Returns ------- matplotlib.figure.Figure The Figure instance. """ # Ensure we only plot residuals using classifiers we have tested assert isinstance(clf, _utils.supported_linear_models), ( "Classifiers of type {0} not currently supported.".format(type(clf))) # Get residuals or standardized residuals resids = stats.residuals(clf, X, y, r_type) predictions = clf.predict(X) # Prepare plot labels to use, depending on which type of residuals used y_label = { 'raw': 'Residuals', 'standardized': 'Standardized Residuals', 'studentized': 'Studentized Residuals' } # Set plot style sns.set_style("whitegrid") sns.set_context("talk") # Increase font size on plot # Generate residual plot try: fig = plt.figure('residuals', figsize=figsize) plt.scatter(predictions, resids, s=14, c='gray', alpha=0.7) plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max(), linestyle='dotted') plt.title("Residuals Plot") plt.xlabel("Predictions") plt.ylabel(y_label[r_type]) plt.show() except: raise # Re-raise the exception finally: sns.reset_orig() # Always reset back to default matplotlib styles return fig