def distplot(a, bins=None, hist=True, kde=True, rug=False, fit=None, hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None, color=None, vertical=False, xlabel=None, ax=None): """Flexibly plot a distribution of observations. Parameter a : (squeezable to) 1d array Observed data. bins : argument for matplotlib hist(), or None Specification of hist bins, or None to use Freedman-Diaconis rule. hist : bool, default True Whether to plot a (normed) histogram. kde : bool, default True Whether to plot a gaussian kernel density estimate. rug : bool, default False Whether to draw a rugplot on the support axis. fit : random variable object An object with `fit` method, returning a tuple that can be passed to a `pdf` method a positional arguments following an grid of values to evaluate the pdf on. {hist, kde, rug, fit}_kws : dictionaries Keyword arguments for underlying plotting functions. color : matplotlib color, optional Color to plot everything but the fitted curve in. vertical : bool, default False If True, oberved values are on y-axis. xlabel : string, False, or None Name for the x axis label. if None, will try to get it from a.name if False, do not set the x label. ax : matplotlib axis, optional if provided, plot on this axis Returns ------- ax : matplotlib axis """ if ax is None: ax = plt.gca() # Intelligently label the axis label_x = bool(xlabel) if xlabel is None and hasattr(a, "name"): xlabel = a.name if xlabel is not None: label_x = True # Make a a 1-d array a = np.asarray(a).squeeze() # Handle dictionary defaults if hist_kws is None: hist_kws = dict() if kde_kws is None: kde_kws = dict() if rug_kws is None: rug_kws = dict() if fit_kws is None: fit_kws = dict() # Get the color from the current color cycle if color is None: if vertical: line, = ax.plot(0, a.mean()) else: line, = ax.plot(a.mean(), 0) color = line.get_color() line.remove() if hist: if bins is None: # From http://stats.stackexchange.com/questions/798/ h = 2 * moss.iqr(a) * len(a)**-(1 / 3) bins = (a.max() - a.min()) / h hist_alpha = hist_kws.pop("alpha", 0.4) orientation = "horizontal" if vertical else "vertical" hist_color = hist_kws.pop("color", color) ax.hist(a, bins, normed=True, color=hist_color, alpha=hist_alpha, orientation=orientation, **hist_kws) if kde: kde_color = kde_kws.pop("color", color) kdeplot(a, vertical=vertical, color=kde_color, ax=ax, **kde_kws) if rug: rug_color = rug_kws.pop("color", color) axis = "y" if vertical else "x" rugplot(a, axis=axis, color=rug_color, ax=ax, **rug_kws) if fit is not None: fit_color = fit_kws.pop("color", "#282828") npts = fit_kws.pop("npts", 1000) support_thresh = fit_kws.pop("support_thresh", 1e-4) params = fit.fit(a) pdf = lambda x: fit.pdf(x, *params) x = _kde_support(a, pdf, npts, support_thresh) y = pdf(x) if vertical: x, y = y, x ax.plot(x, y, color=fit_color, **fit_kws) if label_x: ax.set_xlabel(xlabel) return ax
def _freedman_diaconis_bins(a): """Calculate number of hist bins using Freedman-Diaconis rule.""" # From http://stats.stackexchange.com/questions/798/ a = np.asarray(a) h = 2 * moss.iqr(a) / (len(a) ** (1 / 3)) return np.ceil((a.max() - a.min()) / h)
def distplot(a, bins=None, hist=True, kde=True, rug=False, fit=None, hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None, color=None, vertical=False, legend=False, xlabel=None, ax=None): """Flexibly plot a distribution of observations. Parameters ---------- a : (squeezable to) 1d array observed data bins : argument for matplotlib hist(), or None specification of bins or None to use Freedman-Diaconis rule hist : bool, default True whether to plot a (normed) histogram kde : bool, defualt True whether to plot a gaussian kernel density estimate rug : bool, default False whether to draw a rugplot on the support axis fit : random variable object object with `fit` method returning a tuple that can be passed to a `pdf` method a positional arguments following an array of values to evaluate the pdf at {hist, kde, rug, fit}_kws : dictionaries keyword arguments for underlying plotting functions color : matplotlib color, optional color to plot everything but the fitted curve in vertical : bool, default False if True, oberved values are on y-axis legend : bool, default True if True, add a legend to the plot with what the plotted lines are xlabel : string, False, or None name for the x axis label. if None, will try to get it from a.name if False, do not set the x label ax : matplotlib axis, optional if provided, plot on this axis Returns ------- ax : matplotlib axis """ if ax is None: ax = plt.gca() # Intelligently label the axis label_x = bool(xlabel) if xlabel is None and hasattr(a, "name"): xlabel = a.name if xlabel is not None: label_x = True # Make a a 1-d array a = np.asarray(a).squeeze() # Handle dictionary defaults if hist_kws is None: hist_kws = dict() if kde_kws is None: kde_kws = dict() if rug_kws is None: rug_kws = dict() if fit_kws is None: fit_kws = dict() # Get the color from the current color cycle if color is None: if vertical: line, = ax.plot(0, a.mean()) else: line, = ax.plot(a.mean(), 0) color = line.get_color() line.remove() if hist: if bins is None: # From http://stats.stackexchange.com/questions/798/ h = 2 * moss.iqr(a) * len(a) ** -(1 / 3) bins = (a.max() - a.min()) / h hist_alpha = hist_kws.pop("alpha", 0.4) orientation = "horizontal" if vertical else "vertical" hist_color = hist_kws.pop("color", color) ax.hist(a, bins, normed=True, color=hist_color, alpha=hist_alpha, orientation=orientation, **hist_kws) if kde: kde_color = kde_kws.pop("color", color) kde_kws["label"] = "kde" kdeplot(a, vertical=vertical, color=kde_color, ax=ax, **kde_kws) if rug: rug_color = rug_kws.pop("color", color) axis = "y" if vertical else "x" rugplot(a, axis=axis, color=rug_color, ax=ax, **rug_kws) if fit is not None: fit_color = fit_kws.pop("color", "#282828") npts = fit_kws.pop("npts", 1000) support_thresh = fit_kws.pop("support_thresh", 1e-4) params = fit.fit(a) pdf = lambda x: fit.pdf(x, *params) x = _kde_support(a, pdf, npts, support_thresh) y = pdf(x) if vertical: x, y = y, x fit_kws["label"] = fit.name ax.plot(x, y, color=fit_color, **fit_kws) if legend: ax.legend(loc="best") if label_x: ax.set_xlabel(xlabel) return ax
def distplot(a, bins=None, hist=True, kde=True, rug=False, fit=None, hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None, color=None, vertical=False, axlabel=None, ax=None): """Flexibly plot a distribution of observations. Parameter a : (squeezable to) 1d array Observed data. bins : argument for matplotlib hist(), or None Specification of hist bins, or None to use Freedman-Diaconis rule. hist : bool, default True Whether to plot a (normed) histogram. kde : bool, default True Whether to plot a gaussian kernel density estimate. rug : bool, default False Whether to draw a rugplot on the support axis. fit : random variable object An object with `fit` method, returning a tuple that can be passed to a `pdf` method a positional arguments following an grid of values to evaluate the pdf on. {hist, kde, rug, fit}_kws : dictionaries Keyword arguments for underlying plotting functions. color : matplotlib color, optional Color to plot everything but the fitted curve in. vertical : bool, default False If True, oberved values are on y-axis. axlabel : string, False, or None Name for the support axis label. If None, will try to get it from a.namel if False, do not set a label. ax : matplotlib axis, optional if provided, plot on this axis Returns ------- ax : matplotlib axis """ if ax is None: ax = plt.gca() # Intelligently label the support axis label_ax = bool(axlabel) if axlabel is None and hasattr(a, "name"): axlabel = a.name if axlabel is not None: label_ax = True # Make a a 1-d array a = np.asarray(a).squeeze() # Handle dictionary defaults if hist_kws is None: hist_kws = dict() if kde_kws is None: kde_kws = dict() if rug_kws is None: rug_kws = dict() if fit_kws is None: fit_kws = dict() # Get the color from the current color cycle if color is None: if vertical: line, = ax.plot(0, a.mean()) else: line, = ax.plot(a.mean(), 0) color = line.get_color() line.remove() if hist: if bins is None: # From http://stats.stackexchange.com/questions/798/ h = 2 * moss.iqr(a) * len(a) ** -(1 / 3) bins = (a.max() - a.min()) / h hist_alpha = hist_kws.pop("alpha", 0.4) orientation = "horizontal" if vertical else "vertical" hist_color = hist_kws.pop("color", color) ax.hist(a, bins, normed=True, color=hist_color, alpha=hist_alpha, orientation=orientation, **hist_kws) if kde: kde_color = kde_kws.pop("color", color) kdeplot(a, vertical=vertical, color=kde_color, ax=ax, **kde_kws) if rug: rug_color = rug_kws.pop("color", color) axis = "y" if vertical else "x" rugplot(a, axis=axis, color=rug_color, ax=ax, **rug_kws) if fit is not None: fit_color = fit_kws.pop("color", "#282828") gridsize = fit_kws.pop("gridsize", 500) cut = fit_kws.pop("cut", 3) clip = fit_kws.pop("clip", (-np.inf, np.inf)) bw = sm.nonparametric.bandwidths.bw_scott(a) x = _kde_support(a, bw, gridsize, cut, clip) params = fit.fit(a) pdf = lambda x: fit.pdf(x, *params) y = pdf(x) if vertical: x, y = y, x ax.plot(x, y, color=fit_color, **fit_kws) if label_ax: if vertical: ax.set_ylabel(axlabel) else: ax.set_xlabel(axlabel) return ax
def _freedman_diaconis_bins(a): """Calculate number of hist bins using Freedman-Diaconis rule.""" # From http://stats.stackexchange.com/questions/798/ a = np.asarray(a) h = 2 * moss.iqr(a) / (len(a)**(1 / 3)) return np.ceil((a.max() - a.min()) / h)