def estimate_conditional_quants(self, probs=None): """ in each direction, computes the conditionsal quantiles for _out | _in , if test values have already been generated using `generate_conditional_sampling` """ assert self.te_data_is_set probs = probs if (probs is not None) else np.arange(1, 10) / 10.0 self.te_quants_causal = [ hdquantiles(_y_hat, prob=probs) for _y_hat in self.te_Y_hat ] self.te_quants_anticausal = [ hdquantiles(_x_hat, prob=probs) for _x_hat in self.te_X_hat ] self.te_qscores_causal = [ itg.simps(qscore(self.te_quants_causal[i], probs, _y_hat), probs) for i, _y_hat in enumerate(self.te_Y_hat) ] self.te_qscores_anticausal = [ itg.simps(qscore(self.te_quants_anticausal[i], probs, _x_hat), probs) for i, _x_hat in enumerate(self.te_X_hat) ] self.te_qscores_causal = np.array(self.te_qscores_causal) self.te_qscores_anticausal = np.array(self.te_qscores_anticausal)
def test_relperm_xy(): network = cube_network(10) relperm_calculator = SimpleRelPermComputer(network) relperm_calculator.compute() print np.max(network.tubes.k_w) print np.min(network.tubes.k_w) print hdquantiles(network.tubes.k_w, prob=[0.01, 0.5, 0.99])
def test_hdquantiles(self): data = self.data assert_almost_equal(ms.hdquantiles(data, [0., 1.]), [0.006514031, 0.995309248]) hdq = ms.hdquantiles(data, [0.25, 0.5, 0.75]) assert_almost_equal(hdq, [ 0.253210762, 0.512847491, 0.762232442, ]) hdq = ms.hdquantiles_sd(data, [0.25, 0.5, 0.75]) assert_almost_equal(hdq, [ 0.03786954, 0.03805389, 0.03800152, ], 4) data = np.array(data).reshape(10, 10) hdq = ms.hdquantiles(data, [0.25, 0.5, 0.75], axis=0) assert_almost_equal(hdq[:, 0], ms.hdquantiles(data[:, 0], [0.25, 0.5, 0.75])) assert_almost_equal(hdq[:, -1], ms.hdquantiles(data[:, -1], [0.25, 0.5, 0.75])) hdq = ms.hdquantiles(data, [0.25, 0.5, 0.75], axis=0, var=True) assert_almost_equal( hdq[..., 0], ms.hdquantiles(data[:, 0], [0.25, 0.5, 0.75], var=True)) assert_almost_equal( hdq[..., -1], ms.hdquantiles(data[:, -1], [0.25, 0.5, 0.75], var=True))
def test_hdquantiles(self): data = [ 0.706560797, 0.727229578, 0.990399276, 0.927065621, 0.158953014, 0.887764025, 0.239407086, 0.349638551, 0.972791145, 0.149789972, 0.936947700, 0.132359948, 0.046041972, 0.641675031, 0.945530547, 0.224218684, 0.771450991, 0.820257774, 0.336458052, 0.589113496, 0.509736129, 0.696838829, 0.491323573, 0.622767425, 0.775189248, 0.641461450, 0.118455200, 0.773029450, 0.319280007, 0.752229111, 0.047841438, 0.466295911, 0.583850781, 0.840581845, 0.550086491, 0.466470062, 0.504765074, 0.226855960, 0.362641207, 0.891620942, 0.127898691, 0.490094097, 0.044882048, 0.041441695, 0.317976349, 0.504135618, 0.567353033, 0.434617473, 0.636243375, 0.231803616, 0.230154113, 0.160011327, 0.819464108, 0.854706985, 0.438809221, 0.487427267, 0.786907310, 0.408367937, 0.405534192, 0.250444460, 0.995309248, 0.144389588, 0.739947527, 0.953543606, 0.680051621, 0.388382017, 0.863530727, 0.006514031, 0.118007779, 0.924024803, 0.384236354, 0.893687694, 0.626534881, 0.473051932, 0.750134705, 0.241843555, 0.432947602, 0.689538104, 0.136934797, 0.150206859, 0.474335206, 0.907775349, 0.525869295, 0.189184225, 0.854284286, 0.831089744, 0.251637345, 0.587038213, 0.254475554, 0.237781276, 0.827928620, 0.480283781, 0.594514455, 0.213641488, 0.024194386, 0.536668589, 0.699497811, 0.892804071, 0.093835427, 0.731107772 ] # assert_almost_equal(ms.hdquantiles(data, [0., 1.]), [0.006514031, 0.995309248]) hdq = ms.hdquantiles(data, [0.25, 0.5, 0.75]) assert_almost_equal(hdq, [ 0.253210762, 0.512847491, 0.762232442, ]) hdq = ms.hdquantiles_sd(data, [0.25, 0.5, 0.75]) assert_almost_equal(hdq, [ 0.03786954, 0.03805389, 0.03800152, ], 4) # data = np.array(data).reshape(10, 10) hdq = ms.hdquantiles(data, [0.25, 0.5, 0.75], axis=0) assert_almost_equal(hdq[:, 0], ms.hdquantiles(data[:, 0], [0.25, 0.5, 0.75])) assert_almost_equal(hdq[:, -1], ms.hdquantiles(data[:, -1], [0.25, 0.5, 0.75])) hdq = ms.hdquantiles(data, [0.25, 0.5, 0.75], axis=0, var=True) assert_almost_equal( hdq[..., 0], ms.hdquantiles(data[:, 0], [0.25, 0.5, 0.75], var=True)) assert_almost_equal( hdq[..., -1], ms.hdquantiles(data[:, -1], [0.25, 0.5, 0.75], var=True))
def test_hdquantiles(self): data = self.data assert_almost_equal(ms.hdquantiles(data,[0., 1.]), [0.006514031, 0.995309248]) hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75]) assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,]) hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75]) assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4) data = np.array(data).reshape(10,10) hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0) assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75])) assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75])) hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True) assert_almost_equal(hdq[...,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True)) assert_almost_equal(hdq[...,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
def test_hdquantiles(self): data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014, 0.887764025,0.239407086,0.349638551,0.972791145,0.149789972, 0.936947700,0.132359948,0.046041972,0.641675031,0.945530547, 0.224218684,0.771450991,0.820257774,0.336458052,0.589113496, 0.509736129,0.696838829,0.491323573,0.622767425,0.775189248, 0.641461450,0.118455200,0.773029450,0.319280007,0.752229111, 0.047841438,0.466295911,0.583850781,0.840581845,0.550086491, 0.466470062,0.504765074,0.226855960,0.362641207,0.891620942, 0.127898691,0.490094097,0.044882048,0.041441695,0.317976349, 0.504135618,0.567353033,0.434617473,0.636243375,0.231803616, 0.230154113,0.160011327,0.819464108,0.854706985,0.438809221, 0.487427267,0.786907310,0.408367937,0.405534192,0.250444460, 0.995309248,0.144389588,0.739947527,0.953543606,0.680051621, 0.388382017,0.863530727,0.006514031,0.118007779,0.924024803, 0.384236354,0.893687694,0.626534881,0.473051932,0.750134705, 0.241843555,0.432947602,0.689538104,0.136934797,0.150206859, 0.474335206,0.907775349,0.525869295,0.189184225,0.854284286, 0.831089744,0.251637345,0.587038213,0.254475554,0.237781276, 0.827928620,0.480283781,0.594514455,0.213641488,0.024194386, 0.536668589,0.699497811,0.892804071,0.093835427,0.731107772] # assert_almost_equal(ms.hdquantiles(data,[0., 1.]), [0.006514031, 0.995309248]) hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75]) assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,]) hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75]) assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4) # data = np.array(data).reshape(10,10) hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0) assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75])) assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75])) hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True) assert_almost_equal(hdq[...,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True)) assert_almost_equal(hdq[...,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
def test_hdquantiles_sd(self): # Standard deviation is a jackknife estimator, so we can check if # the efficient version (hdquantiles_sd) matches a rudimentary, # but clear version here. hd_std_errs = ms.hdquantiles_sd(self.data) # jacknnife standard error, Introduction to the Bootstrap Eq. 11.5 n = len(self.data) jdata = np.broadcast_to(self.data, (n, n)) jselector = np.logical_not(np.eye(n)) # leave out one sample each row jdata = jdata[jselector].reshape(n, n-1) jdist = ms.hdquantiles(jdata, axis=1) jdist_mean = np.mean(jdist, axis=0) jstd = ((n-1)/n * np.sum((jdist - jdist_mean)**2, axis=0))**.5 assert_almost_equal(hd_std_errs, jstd) # Test actual values for good measure assert_almost_equal(hd_std_errs, [0.0379258, 0.0380656, 0.0380013]) two_data_points = ms.hdquantiles_sd([1, 2]) assert_almost_equal(two_data_points, [0.5, 0.5, 0.5])
def calculate_RCIW_MJ_HD(data, prob=0.5, alpha=0.01, axis=None): """ Computes the alpha confidence interval for the selected quantiles of the data, with Maritz-Jarrett estimators. :param prob: :param alpha: :param axis: :return: """ if len(data) < 2: return -1 if variance(data) == 0.0: return 0.0 alpha = min(alpha, 1 - alpha) z = norm.ppf(1 - alpha / 2.) xq = hdquantiles(data, prob, axis=axis) med = round(xq[0], 5) if med == 0: return 0.0 smj = 0.0 try: smj = mjci(data, prob, axis=axis) except: return 0.0 ci_bounds = (xq - z * smj, xq + z * smj) ci_lower = ci_bounds[0][0] ci_lower = 0 if ci_lower < 0 else ci_lower ci_upper = ci_bounds[1][0] ci_upper = 0 if ci_upper < 0 else ci_upper rciw = ((ci_upper - ci_lower) / med) * 100 return rciw
def whiskerbox( series, fsp=None, positions=None, mode="mquantiles", width=0.8, wisk=None, plot_mean=False, logscale=None, color=None, outliers=None, ): """ Draws a whisker plot. The bottom and top of the boxes correspond to the lower and upper quartiles respectively (25th and 75th percentiles). Parameters ---------- series : Sequence Input data. If the sequence is 2D, each column is assumed to represent a different variable. fsp : :class:`Subplot` Subplot where to draw the data. If None, uses the current axe. positions : {None, sequence}, optional Positions along the x-axis. If None, use a scale from 1 to the number of columns. mode : {'mquantiles', 'hdquantiles'}, optional Type of algorithm used to compute the quantiles. If 'mquantiles', use the classical form :func:`~scipy.stats.mstats.mquantiles` If 'hdquantiles', use the Harrell-Davies estimators of the function :func:`~scipy.stats.mmorestats.hdquantiles`. wisk : {None, float}, optional Whiskers size, as a multiplier of the inter-quartile range. If None, the whiskers are drawn between the 5th and 95th percentiles. plot_mean : {False, True}, optional Whether to overlay the mean on the box. color : {None, string}, optional Color of the main box. outliers : {dictionary}, optional Options for plotting outliers. By default, the dictionary uses ``dict(marker='x', ms=4, mfc='#999999', ls='')`` """ outliers = outliers or dict(marker="x", ms=4, mfc="#999999", mec="#999999", ls="") if fsp is None: fsp = pyplot.gca() if not fsp._hold: fsp.cla() # Make sure the series is a masked array series = ma.array(series, copy=False, subok=False) # Reshape the series ................... if series.ndim == 1: series = series.reshape(-1, 1) elif series.ndim > 2: series = np.swapaxes(series, 1, -1).reshape(-1, series.shape[1]) if positions is None: positions = np.arange(1, series.shape[1] + 1) # Get the quantiles .................... plist = [0.05, 0.25, 0.5, 0.75, 0.95] # Harrell-Davies ........ if mode == "hdquantiles": # 1D data ........... if series.ndim == 0: (qb, ql, qm, qh, qt) = mstats.hdquantiles(series.ravel(), plist) # 2D data ........... else: (qb, ql, qm, qh, qt) = ma.apply_along_axis(mstats.hdquantiles, 0, series, plist) # Basic quantiles ....... else: (qb, ql, qm, qh, qt) = mstats.mquantiles(series, plist, axis=0) # Get the heights, bottoms, and whiskers positions heights = qh - ql bottoms = ql if wisk is not None: hival = qh + wisk * heights loval = ql - wisk * heights else: (hival, loval) = (qt, qb) # Plot the whiskers and outliers ....... for i, pos, xh, xl in np.broadcast(np.arange(len(positions)), positions, hival, loval): x = series[:, i] # Get high extreme .. wisk_h = x[(x <= xh).filled(False)] if len(wisk_h) == 0: wisk_h = qh[i] else: wisk_h = max(wisk_h) # Low extremes ...... wisk_l = x[(x >= xl).filled(False)] if len(wisk_l) == 0: wisk_l = ql[i] else: wisk_l = min(wisk_l) fsp.plot((pos, pos), (wisk_l, wisk_h), dashes=(1, 1), c="k", zorder=1) fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_l, wisk_l), "-", c="k") fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_h, wisk_h), "-", c="k") # Outliers, if any... if outliers is not None and len(outliers) > 0: flh = x[(x > xh).filled(False)].view(ndarray) fll = x[(x < xl).filled(False)].view(ndarray) if len(flh) > 0 and len(fll) > 0: fsp.plot([pos] * (len(flh) + len(fll)), np.r_[flh, fll], **outliers) # Plot the median.... fsp.plot((pos - 0.5 * width, pos + 0.5 * width), (qm[i], qm[i]), ls="-", c="k", lw=1.2, zorder=99) # Plot the mean...... if plot_mean: fsp.plot( (pos - 0.5 * width, pos + 0.5 * width), (x.mean(), x.mean()), ls=":", dashes=(1, 1), c="#000000", lw=1.1, zorder=99, ) # fsp.plot((pos,), (x.mean(),), marker='o', color=color, zorder=99) # Plot the boxes ....................... bars = fsp.bar( positions - 0.5 * width, heights, width=width, bottom=bottoms, color=color, yerr=None, xerr=None, ecolor="k", capsize=3, zorder=50, ) if logscale: fsp.set_yscale("log") return bars
def whiskerbox(series, fsp=None, positions=None, mode='mquantiles', width=0.8, wisk=None, plot_mean=False, logscale=None, color=None, outliers=None): """ Draws a whisker plot. The bottom and top of the boxes correspond to the lower and upper quartiles respectively (25th and 75th percentiles). Parameters ---------- series : Sequence Input data. If the sequence is 2D, each column is assumed to represent a different variable. fsp : :class:`Subplot` Subplot where to draw the data. If None, uses the current axe. positions : {None, sequence}, optional Positions along the x-axis. If None, use a scale from 1 to the number of columns. mode : {'mquantiles', 'hdquantiles'}, optional Type of algorithm used to compute the quantiles. If 'mquantiles', use the classical form :func:`~scipy.stats.mstats.mquantiles` If 'hdquantiles', use the Harrell-Davies estimators of the function :func:`~scipy.stats.mmorestats.hdquantiles`. wisk : {None, float}, optional Whiskers size, as a multiplier of the inter-quartile range. If None, the whiskers are drawn between the 5th and 95th percentiles. plot_mean : {False, True}, optional Whether to overlay the mean on the box. color : {None, string}, optional Color of the main box. outliers : {dictionary}, optional Options for plotting outliers. By default, the dictionary uses ``dict(marker='x', ms=4, mfc='#999999', ls='')`` """ outliers = outliers or dict( marker='x', ms=4, mfc='#999999', mec='#999999', ls='', ) if fsp is None: fsp = pyplot.gca() if not fsp._hold: fsp.cla() # Make sure the series is a masked array series = ma.array(series, copy=False, subok=False) # Reshape the series ................... if series.ndim == 1: series = series.reshape(-1, 1) elif series.ndim > 2: series = np.swapaxes(series, 1, -1).reshape(-1, series.shape[1]) if positions is None: positions = np.arange(1, series.shape[1] + 1) # Get the quantiles .................... plist = [0.05, 0.25, 0.5, 0.75, 0.95] # Harrell-Davies ........ if mode == 'hdquantiles': # 1D data ........... if series.ndim == 0: (qb, ql, qm, qh, qt) = mstats.hdquantiles(series.ravel(), plist) # 2D data ........... else: (qb, ql, qm, qh, qt) = ma.apply_along_axis(mstats.hdquantiles, 0, series, plist) # Basic quantiles ....... else: (qb, ql, qm, qh, qt) = mstats.mquantiles(series, plist, axis=0) # Get the heights, bottoms, and whiskers positions heights = qh - ql bottoms = ql if wisk is not None: hival = qh + wisk * heights loval = ql - wisk * heights else: (hival, loval) = (qt, qb) # Plot the whiskers and outliers ....... for i, pos, xh, xl in np.broadcast(np.arange(len(positions)), positions, hival, loval): x = series[:, i] # Get high extreme .. wisk_h = x[(x <= xh).filled(False)] if len(wisk_h) == 0: wisk_h = qh[i] else: wisk_h = max(wisk_h) # Low extremes ...... wisk_l = x[(x >= xl).filled(False)] if len(wisk_l) == 0: wisk_l = ql[i] else: wisk_l = min(wisk_l) fsp.plot((pos, pos), (wisk_l, wisk_h), dashes=(1, 1), c='k', zorder=1) fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_l, wisk_l), '-', c='k') fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_h, wisk_h), '-', c='k') # Outliers, if any... if outliers is not None and len(outliers) > 0: flh = x[(x > xh).filled(False)].view(ndarray) fll = x[(x < xl).filled(False)].view(ndarray) if len(flh) > 0 and len(fll) > 0: fsp.plot([pos] * (len(flh) + len(fll)), np.r_[flh, fll], **outliers) # Plot the median.... fsp.plot((pos - 0.5 * width, pos + 0.5 * width), (qm[i], qm[i]), ls='-', c='k', lw=1.2, zorder=99) # Plot the mean...... if plot_mean: fsp.plot((pos - 0.5 * width, pos + 0.5 * width), (x.mean(), x.mean()), ls=':', dashes=(1, 1), c='#000000', lw=1.1, zorder=99) # fsp.plot((pos,), (x.mean(),), marker='o', color=color, zorder=99) # Plot the boxes ....................... bars = fsp.bar(positions - 0.5 * width, heights, width=width, bottom=bottoms, color=color, yerr=None, xerr=None, ecolor='k', capsize=3, zorder=50) if logscale: fsp.set_yscale('log') return bars