def histplot1d_part(ax, x, prob=None, N_bins='knuth', histrange=None, x_lim=None, y_lim=None): ''' This take the additional value of an array axes. for use with subplots similar to histplot1d but for subplot purposes I believe ''' # compare bin width to knuth bin width # if type(N_bins) is int: # print "specified bin width is {0}, Knuth bin size is {1}".format( # N_bins, knuth_N_bins) if N_bins == 'knuth': binwidth, bins = de.knuth_bin_width(x, return_bins=True) knuth_N_bins = bins.size - 1 N_bins = knuth_N_bins hist, binedges, tmp = ax.hist( x, bins=N_bins, histtype='step', weights=prob, range=histrange, color='k', linewidth=1) # Calculate the location and %confidence intervals # Since my location and confidence calculations can't take weighted data I # need to use the weighted histogram data in the calculations for i in np.arange(N_bins): if i == 0: x_binned = \ np.ones(hist[i]) * (binedges[i] + binedges[i + 1]) / 2 elif np.size(x_binned) == 0: x_binned = \ np.ones(hist[i]) * (binedges[i] + binedges[i + 1]) / 2 else: x_temp = \ np.ones(hist[i]) * (binedges[i] + binedges[i + 1]) / 2 x_binned = np.concatenate((x_binned, x_temp)) loc = biweightLoc(x_binned) ll_68, ul_68 = bcpcl(loc, x_binned, 1) ll_95, ul_95 = bcpcl(loc, x_binned, 2) # Create location and confidence interval line plots # find the binedge that the location falls into # so that the line indicating the location only extends to top of # histogram loc_ix = find_bin_ix(binedges, loc) ll_68_ix = find_bin_ix(binedges, ll_68) ul_68_ix = find_bin_ix(binedges, ul_68) ll_95_ix = find_bin_ix(binedges, ll_95) ul_95_ix = find_bin_ix(binedges, ul_95) ax.plot((loc, loc), (0, hist[loc_ix - 1]), ls='--', lw=1, color="k") width = binedges[ll_68_ix + 1] - binedges[ll_68_ix] for i in range(ll_68_ix, ul_68_ix): ax.bar(binedges[i], hist[i], width, lw=0, color="b", alpha=.6) for i in range(ll_95_ix, ul_95_ix): ax.bar(binedges[i], hist[i], width, lw=0, color="b", alpha=.3) if x_lim is not None: ax.set_xlim(x_lim) if y_lim is not None: ax.set_ylim(y_lim) return loc, ll_68, ul_68, ll_95, ul_95
def hist(x, bins=10, range=None, *args, **kwargs): """Enhanced histogram This is a histogram function that enables the use of more sophisticated algorithms for determining bins. Aside from the `bins` argument allowing a string specified how bins are computed, the parameters are the same as pylab.hist(). Parameters ---------- x : array_like array of data to be histogrammed bins : int or list or str (optional) If bins is a string, then it must be one of: 'blocks' : use bayesian blocks for dynamic bin widths 'knuth' : use Knuth's rule to determine bins 'scott' : use Scott's rule to determine bins 'freedman' : use the Freedman-diaconis rule to determine bins range : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) ax : Axes instance (optional) specify the Axes on which to draw the histogram. If not specified, then the current active axes will be used. **kwargs : other keyword arguments are described in pylab.hist(). """ x = np.asarray(x) if 'ax' in kwargs: ax = kwargs['ax'] del kwargs['ax'] else: ax = plt.gca() # if range is specified, we need to truncate the data for # the bin-finding routines if (range is not None and (bins in ['blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman', 'freedmans'])): x = x[(x >= range[0]) & (x <= range[1])] if bins in ['blocks']: bins = bayesian_blocks(x) elif bins in ['knuth', 'knuths']: dx, bins = knuth_bin_width(x, True) elif bins in ['scott', 'scotts']: dx, bins = scotts_bin_width(x, True) elif bins in ['freedman', 'freedmans']: dx, bins = freedman_bin_width(x, True) elif isinstance(bins, str): raise ValueError("unrecognized bin code: '%s'" % bins) return ax.hist(x, bins, range, **kwargs)
def hist(x, bins=10, range=None, *args, **kwargs): """Enhanced histogram This is a histogram function that enables the use of more sophisticated algorithms for determining bins. Aside from the `bins` argument allowing a string specified how bins are computed, the parameters are the same as pylab.hist(). Parameters ---------- x : array_like array of data to be histogrammed bins : int or list or str (optional) If bins is a string, then it must be one of: 'blocks' : use bayesian blocks for dynamic bin widths 'knuth' : use Knuth's rule to determine bins 'scott' : use Scott's rule to determine bins 'freedman' : use the Freedman-diaconis rule to determine bins range : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) ax : Axes instance (optional) specify the Axes on which to draw the histogram. If not specified, then the current active axes will be used. **kwargs : other keyword arguments are described in pylab.hist(). """ x = np.asarray(x) if 'ax' in kwargs: ax = kwargs['ax'] del kwargs['ax'] else: ax = plt.gca() # if range is specified, we need to truncate the data for # the bin-finding routines if (range is not None and (bins in [ 'blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman', 'freedmans' ])): x = x[(x >= range[0]) & (x <= range[1])] if bins in ['blocks']: bins = bayesian_blocks(x) elif bins in ['knuth', 'knuths']: dx, bins = knuth_bin_width(x, True) elif bins in ['scott', 'scotts']: dx, bins = scotts_bin_width(x, True) elif bins in ['freedman', 'freedmans']: dx, bins = freedman_bin_width(x, True) elif isinstance(bins, str): raise ValueError("unrecognized bin code: '%s'" % bins) return ax.hist(x, bins, range, **kwargs)
def _size_bins(self,hist,bin_tool,**kwargs): """Wrapper for astroML routines to choose optimal bin widths.""" if bin_tool == 'freedman': _,bins = density_estimation.freedman_bin_width(hist,return_bins=True) elif bin_tool == 'scotts': _,bins = density_estimation.scotts_bin_width(hist,return_bins=True) elif bin_tool == 'knuth': _,bins = density_estimation.knuth_bin_width(hist,return_bins=True, disp=False) elif bin_tool == 'blocks': bins = density_estimation.bayesian_blocks(hist,**kwargs) elif type(bin_tool) == type(int()) or type(bin_tool) == type(np.int64()) or type(bin_tool) == type(np.int32()): bins=bin_tool else: self.logger.warning("Unrecognized bin_tool option. Using Freedman-Diaconis rule.") _,bins = density_estimation.freedman_bin_width(hist,return_bins=True) return bins
def get_knuth_bins(x,bounds=None): ''' Code for finding the 'ideal' bin width for plotting histograms. The 'best' width is then converted to the the nearest 'OK' value (ie. a multiple of 0.1, 0.25, 0.5 or 1) Inputs: ------- x: data that we wish to bin. bounds: upper and lower bounds to the data. If None, then the limits of the x data are used. Default is None. Returns: -------- dx: optimal bin width bins: optimal bins for x. ''' ok_widths = (0.1,0.25,0.5,1) if bounds is not None: x_mask = (x >= bounds[0]) & (x <= bounds[1]) x_plot = x[x_mask] else: x_plot = x.copy() bounds = (np.min(x_plot),np.max(x_plot)) dx = knuth_bin_width(x_plot) n_zeros = math.floor(math.log10(dx)) ok_widths_modified = [w*10**(n_zeros+1) for w in ok_widths] dx_modified = find_nearest_value(ok_widths_modified,dx) bin_range = (dx_modified*(math.floor(bounds[0]/dx_modified)), dx_modified*(math.ceil(bounds[1]/dx_modified))) bins = np.arange(bin_range[0],bin_range[1]+dx_modified*0.01, dx_modified) return dx_modified, bins
def get_knuth_bins(x, bounds=None): ''' Code for finding the 'ideal' bin width for plotting histograms. The 'best' width is then converted to the the nearest 'OK' value (ie. a multiple of 0.1, 0.25, 0.5 or 1) Inputs: ------- x: data that we wish to bin. bounds: upper and lower bounds to the data. If None, then the limits of the x data are used. Default is None. Returns: -------- dx: optimal bin width bins: optimal bins for x. ''' ok_widths = (0.1, 0.25, 0.5, 1) if bounds is not None: x_mask = (x >= bounds[0]) & (x <= bounds[1]) x_plot = x[x_mask] else: x_plot = x.copy() bounds = (np.min(x_plot), np.max(x_plot)) dx = knuth_bin_width(x_plot) n_zeros = math.floor(math.log10(dx)) ok_widths_modified = [w * 10**(n_zeros + 1) for w in ok_widths] dx_modified = find_nearest_value(ok_widths_modified, dx) bin_range = (dx_modified * (math.floor(bounds[0] / dx_modified)), dx_modified * (math.ceil(bounds[1] / dx_modified))) bins = np.arange(bin_range[0], bin_range[1] + dx_modified * 0.01, dx_modified) return dx_modified, bins
def test_knuth_bin_width(N=10000, rseed=0): np.random.seed(0) X = np.random.normal(size=N) dx, bins = knuth_bin_width(X, return_bins=True) assert_allclose(len(bins), 59)
def test_knuth_bin_width(N=10000, rseed=0): np.random.seed(0) X = np.random.normal(size=N) dx, bins = knuth_bin_width(X, return_bins=True) assert_allclose(len(bins), 59)
def test_knuth_bin_width(N=10000, rseed=0): np.random.seed(0) X = np.random.normal(size=N) with catch_warnings(AstroMLDeprecationWarning): dx, bins = knuth_bin_width(X, return_bins=True) assert_allclose(len(bins), 59)
def test_knuth_bin_width(N=10000, rseed=0): np.random.seed(0) X = np.random.normal(size=N) with catch_warnings(AstroMLDeprecationWarning): dx, bins = knuth_bin_width(X, return_bins=True) assert_allclose(len(bins), 59)
def histogram(self, data, bin_width='knuth', weights=None, density=None, norm=None, ax=None, **kwargs): """ Plots a histogram. Parameters ---------- data : list or array Data to plot. bin_width : {'knuth', 'bayesian'} or float, optional Automatically determine the bin width using Knuth's rule (2006physics...5197K), with Bayesian blocks (2013ApJ...764..167S), or manually, choosing a floating point value. weights : array, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count (instead of 1). If `density` is True, the weights are normalized, so that the integral of the density over the range remains 1. density : bool, optional If False, the result will contain the number of samples in each bin. If True, the result is the value of the probability *density* function at the bin, normalised such that the *integral* over the range is 1. Note that the sum of the histogram values will not be equal to 1 unless bins of unity width are chosen; it is not a probability *mass* function. norm : int or float Custom normalisation. ax : `matplotlib.axes.Axes`, optional Axes instance. """ # Axes instance: if ax is None: ax = self.axes[0] elif not isinstance(ax, Axes): raise TypeError('ax must be of type `matplotlib.axes.Axes`') # Convert list to array: if isinstance(data, list): data = np.array(data) if bin_width == 'knuth': _, bins = knuth_bin_width(data, return_bins=True) elif bin_width == 'bayesian': bins = bayesian_blocks(data) elif isinstance(bin_width, (int, float)): bins = np.arange(data.min(), data.max(), bin_width) else: raise ValueError('bin_width must be a number, or one of' '(`knuth`, `bayesian`)') # Ensure padding with empty bins: dx = np.diff(bins).min() bins = np.pad(bins, (1, 2), mode='linear_ramp', end_values=(bins[0] - dx, bins[-1] + 2 * dx)) # Calculate histogram: histogram, bins = np.histogram(data, bins, weights=weights, density=density) if norm: histogram /= norm # Plot data: ax.plot(bins[:-1] + np.diff(bins) / 2, histogram, drawstyle='steps-mid', **kwargs)
def knuth_n_bins(data): bandwidth = knuth_bin_width(data) return np.ceil((np.max(data) - np.min(data)) / bandwidth)
def histogram(self, data, bin_width='knuth', weights=None, density=None, norm=None, ax=None, **kwargs): """ Plots a histogram. Parameters ---------- data : list or array Data to plot. bin_width : {'knuth', 'bayesian'} or float, optional Automatically determine the bin width using Knuth's rule (2006physics...5197K), with Bayesian blocks (2013ApJ...764..167S), or manually, choosing a floating point value. weights : array, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count (instead of 1). If `density` is True, the weights are normalized, so that the integral of the density over the range remains 1. density : bool, optional If False, the result will contain the number of samples in each bin. If True, the result is the value of the probability *density* function at the bin, normalised such that the *integral* over the range is 1. Note that the sum of the histogram values will not be equal to 1 unless bins of unity width are chosen; it is not a probability *mass* function. norm : int or float Custom normalisation. ax : `matplotlib.axes.Axes`, optional Axes instance. """ # Axes instance: if ax is None: ax = self.axes[0] elif not isinstance(ax, Axes): raise TypeError('ax must be of type `matplotlib.axes.Axes`') # Convert list to array: if isinstance(data, list): data = np.array(data) if bin_width == 'knuth': _, bins = knuth_bin_width(data, return_bins=True) elif bin_width == 'bayesian': bins = bayesian_blocks(data) elif isinstance(bin_width, (int, float)): bins = np.arange(data.min(), data.max(), bin_width) else: raise ValueError('bin_width must be a number, or one of' '(`knuth`, `bayesian`)') # Ensure padding with empty bins: dx = np.diff(bins).min() bins = np.pad(bins, (1, 2), mode='linear_ramp', end_values=(bins[0] - dx, bins[-1] + 2 * dx)) # Calculate histogram: histogram, bins = np.histogram( data, bins, weights=weights, density=density) if norm: histogram /= norm # Plot data: ax.plot(bins[:-1] + np.diff(bins) / 2, histogram, drawstyle='steps-mid', **kwargs)
def hist(x, bins=10, fitness='events', gamma=None, p0=0.05, errorbars=None, suppress_zero=False, *args, **kwargs): """Enhanced histogram, based on `hist` function from astroML. This is a histogram function that enables the use of more sophisticated algorithms for determining bins. Aside from the `bins` argument allowing a string specified how bins are computed, additional scaling, errorbar, and plotting methods are introduced. All other kwargs can be used as in `pylab.hist()`. Parameters ---------- x : array_like Array of data to be histogrammed bins : int or list or str (optional) If bins is a string, then it must be one of: 'blocks' : use bayesian blocks for dynamic bin widths 'knuth' : use Knuth's rule to determine bins 'scott' : use Scott's rule to determine bins 'freedman' : use the Freedman-diaconis rule to determine bins fitness : str Param used for Bayesian Blocks binning. gamma: Number Param used for Bayesian Blocks binning, ignored if `p0` is present p0 : Number Fake rate value for Bayesian Blocks binning, supersedes `gamma` ax : Axes instance (optional) Specify the Axes on which to draw the histogram. If not specified, then the current active axes will be used. scale : Number or str (optional) If Number, all bin contents are multiplied by the given value. If str: 'binwidth' : every bin content is divided by the bin width. **kwargs : Overloaded kwargs variants: histtype: 'markers' : plot the bin contents as markers, centered on the bin centers. If this method is chosen, all additional kwargs for `pylab.plot()` can be used. Other keyword arguments are described in `pylab.hist()`. """ # do initial checks and set-up for overloaded arguments x = np.asarray(x) if isinstance(bins, str) and "weights" in kwargs: warnings.warn( "weights argument is not supported for this binning method: it will be ignored." ) kwargs.pop('weights') if 'ax' in kwargs: ax = kwargs.pop('ax') else: ax = plt.gca() # if range is specified, we need to truncate the data for # the bin-finding routines if ('range' in kwargs and kwargs['range'] is not None and (bins in [ 'blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman', 'freedmans' ])): x = x[(x >= kwargs['range'][0]) & (x <= kwargs['range'][1])] if bins in ['block', 'blocks']: bins = bayesian_blocks(t=x, fitness=fitness, p0=p0, gamma=gamma) elif bins in ['knuth', 'knuths']: dx, bins = knuth_bin_width(x, True, disp=False) elif bins in ['scott', 'scotts']: dx, bins = scotts_bin_width(x, True) elif bins in ['freedman', 'freedmans']: dx, bins = freedman_bin_width(x, True) elif isinstance(bins, str): raise ValueError("unrecognized bin code: '%s'" % bins) if 'scale' in kwargs: scale = kwargs.pop('scale') else: scale = None if scale and "stacked" in kwargs: warnings.warn( "scaling is not currently supported for stacked histograms: scaling will be ignored." ) scale = None if 'histtype' in kwargs and kwargs['histtype'] == 'marker': marker = kwargs.pop('histtype') else: marker = None # generate histogram-like object vis_objects = None vis_objects_err = None if marker: if 'normed' in kwargs: normed = kwargs.pop('normed') else: normed = False if 'marker' in kwargs: markerstyle = kwargs.pop('marker') else: markerstyle = '.' if 'linestyle' in kwargs: linestyle = kwargs.pop('linestyle') else: linestyle = '' hrange = None if 'range' in kwargs: hrange = kwargs.pop('range') bin_content, bins = np.histogram(x, bins, density=normed, range=hrange) bin_content = np.asarray(bin_content, dtype=float) if normed: bin_content_raw, _ = np.histogram(x, bins, density=False, range=hrange) bin_content_raw = np.asarray(bin_content_raw) else: bin_content_raw = bin_content width = (bins[1:] - bins[:-1]) bin_centers = bins[:-1] + width * 0.5 # bin_error = np.sqrt(bin_content) err_low = np.asarray( [poisson_error(bc, suppress_zero)[0] for bc in bin_content_raw]) err_hi = np.asarray( [poisson_error(bc, suppress_zero)[1] for bc in bin_content_raw]) err_scale = bin_content / bin_content_raw err_low *= err_scale err_hi *= err_scale bin_error = [err_low, err_hi] if errorbars: vis_objects_err = ax.errorbar(bin_centers, bin_content, linestyle=linestyle, marker=markerstyle, yerr=bin_error, **kwargs) else: vis_objects = ax.plot(bin_centers, bin_content, linestyle=linestyle, marker=markerstyle, **kwargs) if 'color' in kwargs: kwargs.pop('color') else: if 'normed' in kwargs: normed = kwargs.pop('normed') else: normed = False hrange = None if 'range' in kwargs: hrange = kwargs.pop('range') bin_content, bins, vis_objects = ax.hist(x, bins, range=hrange, normed=normed, **kwargs) if 'color' in kwargs: kwargs.pop('color') bin_content = np.asarray(bin_content, dtype=float) if normed: bin_content_raw, _ = np.histogram(x, bins, density=False, range=hrange) bin_content_raw = np.asarray(bin_content_raw) else: bin_content_raw = bin_content err_low = np.asarray( [poisson_error(bc, suppress_zero)[0] for bc in bin_content_raw]) err_hi = np.asarray( [poisson_error(bc, suppress_zero)[1] for bc in bin_content_raw]) err_scale = bin_content / bin_content_raw err_low *= err_scale err_hi *= err_scale bin_error = [err_low, err_hi] width = (bins[1:] - bins[:-1]) bin_centers = bins[:-1] + width * 0.5 vis_color = vis_objects[0].get_facecolor() if 'histtype' in kwargs: if kwargs['histtype'] == 'step': vis_color = vis_objects[0].get_edgecolor() kwargs.pop('histtype') if 'weights' in kwargs: kwargs.pop('weights') if 'label' in kwargs: kwargs.pop('label') if 'linewidth' in kwargs: kwargs.pop('linewidth') if errorbars: vis_objects_err = ax.errorbar(bin_centers, bin_content, linestyle='', marker='.', yerr=bin_error, linewidth=2, color=vis_color, **kwargs) # perform any scaling if necessary, including redrawing of the scaled objects if scale: bin_content_scaled = [] if vis_objects is not None: if isinstance(vis_objects[0], matplotlib.patches.Rectangle): if scale == 'binwidth': for i, bc in enumerate(bin_content): width = (bins[i + 1] - bins[i]) bin_content_scaled.append(bin_content[i] / width) plt.setp(vis_objects[i], 'height', vis_objects[i].get_height() / width) elif isinstance(scale, Number): for i, bc in enumerate(bin_content): bin_content_scaled.append(bin_content[i] * scale) plt.setp(vis_objects[i], 'height', vis_objects[i].get_height() * scale) else: warnings.warn("scale argument value `", scale, "` not supported: it will be ignored.") elif isinstance(vis_objects[0], matplotlib.patches.Polygon): xy = vis_objects[0].get_xy() j = 0 if scale == 'binwidth': for i, bc in enumerate(bin_content): width = (bins[i + 1] - bins[i]) bin_content_scaled.append(bin_content[i] / width) xy[j + 1, 1] = bin_content_scaled[i] xy[j + 2, 1] = bin_content_scaled[i] j += 2 elif isinstance(scale, Number): for i, bc in enumerate(bin_content): bin_content_scaled.append(bin_content[i] * scale) xy[j + 1, 1] = bin_content_scaled[i] xy[j + 2, 1] = bin_content_scaled[i] j += 2 else: warnings.warn("scale argument value `", scale, "` not supported: it will be ignored.") plt.setp(vis_objects[0], 'xy', xy) if vis_objects_err is not None: if scale == 'binwidth': for i, bc in enumerate(bin_content): width = (bins[i + 1] - bins[i]) if len(bin_content_scaled) != len(bin_content): bin_content_scaled.append(bin_content[i] / width) bin_error[0][i] /= width bin_error[1][i] /= width elif isinstance(scale, Number): for i, bc in enumerate(bin_content): if len(bin_content_scaled) != len(bin_content): bin_content_scaled.append(bin_content[i] * scale) bin_error[0][i] *= scale bin_error[1][i] *= scale else: warnings.warn("scale argument value `", scale, "` not supported: it will be ignored.") bin_content_scaled = np.asarray(bin_content_scaled) vis_objects_err[0].set_ydata(bin_content_scaled) vis_objects_err[1][0].set_ydata(bin_content_scaled - bin_error[0]) vis_objects_err[1][1].set_ydata(bin_content_scaled + bin_error[1]) #vis_objects_err[1][0].set_ydata(bin_error[0]) #vis_objects_err[1][1].set_ydata(bin_error[1]) tmplines = vis_objects_err[2][0].get_segments() for i, bc in enumerate(bin_content_scaled): tmplines[i][0][1] = bin_content_scaled[i] - bin_error[0][i] tmplines[i][1][1] = bin_content_scaled[i] + bin_error[1][i] #tmplines[i][0][1] = bin_error[0][i] #tmplines[i][1][1] = bin_error[1][i] vis_objects_err[2][0].set_segments(tmplines) ax.relim() ax.autoscale_view(False, False, True) try: bc = bin_content_scaled except: bc = bin_content return bc, bins, vis_objects
def knuth_n_bins(data): bandwidth = knuth_bin_width(data) return np.ceil((np.max(data) - np.min(data)) / bandwidth)